使用结构数组计算文本文件中单词的出现次数C++
Using an array of struct counting the number of occurrence of a word in a text file C++
大家好,这是我第一次使用Stackoverflow。我有一个关于使用C++计算文本文件中单词出现的问题。这是我到目前为止的代码。我必须创建一个单词索引和每个单词的计数器的数组结构,然后将它们全部存储在 AVL 树中。打开文件并阅读一个单词后,我在 avl 树或 try 中查找它。如果存在,请使用节点的索引来递增单词的 Cnt。如果它不存在,请将其添加到单词数组中,并将其位置放在下一个结构中,并将结构体位置放在 avl 树中。此外,我将结构 Cnt 设置为 1。我现在遇到的问题是我的程序似乎没有正确处理计数,因此它只打印出 0。请给我有关如何修复该错误的建议。请在下面找到我的代码:
#include <iostream>
#include <fstream>
#include <string>
#include <cstdlib>
#include <cstring>
#include <ctype.h>
#include <stdio.h>
#include <string>
#include <cctype>
#include <stdlib.h>
#include <stdbool.h>
using namespace std;
struct Node* insert(struct Node* node, int key) ;
void preOrder(struct Node *root) ;
void removePunct(char str[]);
int compareWord(char word1[], char word2[] );
struct Stats {
int wordPos, wordCnt;
};
Stats record[50000];
int indexRec = 0;
char word[50000*10] ;
int indexWord = 0;
int main() {
ifstream fin;
string fname;
char line[200], wordArray[500000];
cout << "Enter the text file name:" << endl;
cin >> fname;
fin.open(fname.c_str());
if (!fin) {
cerr << "Unable to open file" << endl;
exit(1);
}
struct Node *root = NULL;
while (!fin.eof() && fin >> line) { //use getline
for(int n=0,m=0; m!=strlen(line); m+=n) {
sscanf(&line[m],"%s%n",word,&n);
removePunct(word);
//strcpy(&wordArray[indexWord],word);
int flag = compareWord(wordArray, word);
if(flag==-1) {
strcpy(&wordArray[indexWord],word);
record[indexRec].wordPos = indexWord;
record[indexRec].wordCnt = 1;
root = insert(root, record[indexRec].wordPos);
indexWord+=strlen(word)+1;
// indexes of the word array
indexRec++;
cout << wordArray[indexWord] << " ";
} else
record[flag].wordCnt++;
cout << record[indexRec].wordCnt;
cout << endl;
}
/*for(int x = 0; x <= i; x++)
{
cout << record[x].wordPos << record[x].wordCnt << endl;
}*/
}
fin.close();
return 0;
}
void removePunct(char str[]) {
char *p;
int bad = 0;
int cur = 0;
while (str[cur] != ' ') {
if (bad < cur && !ispunct(str[cur]) && !isspace(str[cur])) {
str[bad] = str[cur];
}
if (ispunct(str[cur]) || isspace(str[cur])) {
cur++;
} else {
cur++;
bad++;
}
}
str[bad] = ' ';
for (p= str; *p!= ' '; ++p) {
*p= tolower(*p);
}
return;
}
int compareWord(char word1[], char word2[] ) {
int x = strcmp(word1, word2);
if (x == 0 ) return x++;
if (x != 0) return -1;
}
struct Node {
int key;
struct Node *left;
struct Node *right;
int height;
};
// A utility function to get maximum of two integers
int max(int a, int b);
// A utility function to get height of the tree
int height(struct Node *N) {
if (N == NULL)
return 0;
return N->height;
}
// A utility function to get maximum of two integers
int max(int a, int b) {
return (a > b)? a : b;
}
/* Helper function that allocates a new node with the given key and
NULL left and right pointers. */
struct Node* newNode(int key) {
struct Node* node = (struct Node*)
malloc(sizeof(struct Node));
node->key = key;
node->left = NULL;
node->right = NULL;
node->height = 1; // new node is initially added at leaf
return(node);
}
// A utility function to right rotate subtree rooted with y
// See the diagram given above.
struct Node *rightRotate(struct Node *y) {
struct Node *x = y->left;
struct Node *T2 = x->right;
// Perform rotation
x->right = y;
y->left = T2;
// Update heights
y->height = max(height(y->left), height(y->right))+1;
x->height = max(height(x->left), height(x->right))+1;
// Return new root
return x;
}
// A utility function to left rotate subtree rooted with x
// See the diagram given above.
struct Node *leftRotate(struct Node *x) {
struct Node *y = x->right;
struct Node *T2 = y->left;
// Perform rotation
y->left = x;
x->right = T2;
// Update heights
x->height = max(height(x->left), height(x->right))+1;
y->height = max(height(y->left), height(y->right))+1;
// Return new root
return y;
}
// Get Balance factor of node N
int getBalance(struct Node *N) {
if (N == NULL)
return 0;
return height(N->left) - height(N->right);
}
// Recursive function to insert key in subtree rooted
// with node and returns new root of subtree.
struct Node* insert(struct Node* node, int key) {
/* 1. Perform the normal BST insertion */
if (node == NULL)
return(newNode(key));
if (key < node->key)
node->left = insert(node->left, key);
else if (key > node->key)
node->right = insert(node->right, key);
else // Equal keys are not allowed in BST
return node;
/* 2. Update height of this ancestor node */
node->height = 1 + max(height(node->left),
height(node->right));
/* 3. Get the balance factor of this ancestor
node to check whether this node became
unbalanced */
int balance = getBalance(node);
// If this node becomes unbalanced, then
// there are 4 cases
// Left Left Case
if (balance > 1 && key < node->left->key)
return rightRotate(node);
// Right Right Case
if (balance < -1 && key > node->right->key)
return leftRotate(node);
// Left Right Case
if (balance > 1 && key > node->left->key) {
node->left = leftRotate(node->left);
return rightRotate(node);
}
// Right Left Case
if (balance < -1 && key < node->right->key) {
node->right = rightRotate(node->right);
return leftRotate(node);
}
/* return the (unchanged) node pointer */
return node;
}
void preOrder(struct Node *root) {
if(root != NULL) {
printf("%d ", root->key);
preOrder(root->left);
preOrder(root->right);
}
}
一个问题(我看不出这是否是唯一的问题(是你有这样的代码,删除了所有的中间行:
record[indexRec].wordCnt = 1;
if find word fails
indexRec++;
cout << record[indexRec].wordCnt;
因此,当你有一个新单词时(如果我正确理解代码!(,你正在打印下一条记录。一种解决方法是:
if (flag==-1)
cout << record[indexRec-1].wordCnt;
else
cout << record[indexRec].wordCnt;
还有很多其他问题,比如compareWord()
是非常错误的,你应该决定是真的想使用 C++ 还是只使用 C 和std::cout
,文件读取代码很奇怪,你同时包含标准标头的 C 和 C++ 版本,等等,但这些都是另一个问题的问题!
相关文章:
- 文本文件中的单词链表
- 在指针的帮助下,文本文件中单词的频率
- 如何根据单词在文本中出现的概率输出单词
- 从字符串变量中逐字符读取单词
- 使用std::mt19937从字符串中返回一个随机单词
- 如何用for循环在c++中生成单词三角形
- 将数字转换为单词
- 使用if-else将数字转换为单词
- C++-字符串是否包含一个带有简单循环的单词
- 使用单词"not"作为C ++类的名称会导致VS2019错误
- 为什么这个程序返回最后一个单词而不是最长的单词?
- 3-3. 编写一个程序来计算每个不同单词在其输入中出现的次数
- C++为一串单词添加空格
- 在C++中查找(奇怪的)字符串中的单词
- 当字符串是某个单词时给出输出?
- 当您在此单词中搜索单词时调整字符数组的大小?
- 数每个单词的元音
- 我们如何在文本文件中找到C++中的几个单词?
- 使用 BFS 的单词梯 2
- 替换字符串中的单词,但忽略引号中的单词