使用排序C++进行字数统计

Word Count with Sorting C++

本文关键字：统计排序 C++ 更新时间：2023-10-16

这是我对数据结构类的一个问题。我完全不知道如何处理它，任何人都可以给出一些提示吗？

如何停止程序并确保输出可以正确输出？
我是否必须处理映射？

我从教授那里得到的试卷

以下是我的编码示例：

#include <iostream>
#include <string>
using namespace std;
int main()
{
string s [100];
for (int i = 0; i < 100; i++) {
cin >> s[i];
s[i] = Sort(s[i], s[i+1]);
}

//check the number of time the words repeatcout the answer
for (int i = 0; i < 100; i++) {
cout << s[i] << count (s[i],s[i+1]) <<endl;
}
return 0;
}

string Sort(string current, string next ) {
if (current > next) {
string temp = current;
current = next;
next = temp;
}
else {
return current;
}
}
int count(string word, string Nextword) {
int count;
if (word == Nextword) {
count++;
}
else {
return count;
}
}

与其尝试使用基本的字符串数组，不如使用某种方法来跟踪每个单词的出现次数。您可以使用简单的struct或std::map。在任何一种情况下，您都可以关联一个单词以及将其视为单个对象的次数。如果随后将包含单词和 count 的所有结构收集到std::vector而不是基本数组中，则可以提供一个简单的比较函数，以使用std::sort按单词对向量进行排序，同时保留计数与每个单词的关联。

采用使用stuct的方法，您可以创建一个包含std::string和计数器的结构，例如：

struct wordcount {      /* struct holding word and count */
std::string word;
size_t count;
};

对于一个比较函数，要按word对wordcount的向量进行排序，可以使用一个简单的：

/* compare function to sort vector of struct by words */
bool cmp (const wordcount& a, const wordcount& b)
{
return a.word < b.word;
}

使用结构，您需要遍历到目前为止看到的单词，以确定是只需要增加现有单词的count，还是需要使用count = 1;向向量添加新的wordcount结构体为了使函数有用，如果单词已经存在，您可以让它返回向量中的索引(松散地等效于数组中的索引(，如果没有，则返回-1。

/* interate over each struct in vector words to find word */
int findword (const std::vector<wordcount>& words, 
const std::string& word)
{
for (auto w = words.begin(); w != words.end(); w++)
if (w->word == word)            /* if word found */
return w - words.begin();   /* return index */
return -1;  /* return word not found */
}

根据回报，您可以递增索引处的count，也可以向向量添加新wordcount。使用上述方法的简短实现将是：

int main (int argc, char **argv) {
if (argc < 2) { /* validate filename given as argument */
std::cerr << "error: insufficient input.n"
<< "usage: " << argv[0] << "<filename>n";
return 1;
}
std::string word;                   /* string to hold word */
std::vector<wordcount> words {};    /* vector of struct wordcount */
std::fstream f (argv[1]);           /* file stream */
while (f >> word) {                 /* read each word from file */
int idx = findword (words, word);   /* alread exists, get index */
if (idx != -1) {                /* if index found */
words[idx].count++;         /* increment count */
}
else {  /* otherwise new word */
wordcount tmp = {word, 1};  /* initialize struct */
words.push_back(tmp);       /* add to vector */
}
}
std::sort (words.begin(), words.end(), cmp);    /* sort by words */
for (auto& w : words)   /* output results */
std::cout << w.word << " " << w.count << 'n';
}

如果你把上面的所有部分放在一起，你会有：

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <algorithm>
struct wordcount {      /* struct holding word and count */
std::string word;
size_t count;
};
/* compare function to sort vector of struct by words */
bool cmp (const wordcount& a, const wordcount& b)
{
return a.word < b.word;
}
/* interate over each struct in vector words to find word */
int findword (const std::vector<wordcount>& words, 
const std::string& word)
{
for (auto w = words.begin(); w != words.end(); w++)
if (w->word == word)            /* if word found */
return w - words.begin();   /* return index */
return -1;  /* return word not found */
}
int main (int argc, char **argv) {
if (argc < 2) { /* validate filename given as argument */
std::cerr << "error: insufficient input.n"
<< "usage: " << argv[0] << "<filename>n";
return 1;
}
std::string word;                   /* string to hold word */
std::vector<wordcount> words {};    /* vector of struct wordcount */
std::fstream f (argv[1]);           /* file stream */
while (f >> word) {                 /* read each word from file */
int idx = findword (words, word);   /* alread exists, get index */
if (idx != -1) {                /* if index found */
words[idx].count++;         /* increment count */
}
else {  /* otherwise new word */
wordcount tmp = {word, 1};  /* initialize struct */
words.push_back(tmp);       /* add to vector */
}
}
std::sort (words.begin(), words.end(), cmp);    /* sort by words */
for (auto& w : words)   /* output results */
std::cout << w.word << " " << w.count << 'n';
}

示例使用/输出

针对示例输入运行，您将收到。

$ ./bin/wordcount dat/webpage.txt
Computer 1
algorithm 1
analysis 1
and 1
computer 3
department 1
design 2
quantum 1
science 1
system 1

有很多很多方法可以解决这种类型的问题。它可以使用普通的旧数组来完成，但是您将跟踪单词并在一些单独的数组(或数组(中计数，然后编写自己的排序(或在保存单词的一个数组上使用 Cqsort，然后将计数映射回排序输出，其中包含原始和计数数组的副本(。无论您采用哪种方法，关键是您必须有一种方法来保留单词之间的预排序关联以及每个单词的计数与单词的排序后结果一起出现的次数，然后是将计数映射回正确单词的方法。使用将单词和计数关联为单个单元的对象可以解决关联问题。

仔细观察事物，将它们作为处理它的一种方式。如果您有其他问题，请告诉我。

std：：map 可以同时为您进行排序和计数：

#include <map>
#include <iostream>
using std::cin;
using std::cout;
using std::endl;
using std::string;
int main() {
std::map<string,size_t> wordcount;
for(string word;cin>>word;++wordcount[word]);
for(auto it=wordcount.begin();it!=wordcount.end();++it)
cout << it->first << " " << it->second << endl;
}

echo -ne "Computer systemncomputer designnalgorithm design and analysisnquantum computerncomputer science department" | ./a.out
Computer 1
algorithm 1
analysis 1
and 1
computer 3
department 1
design 2
quantum 1
science 1
system 1

复制粘贴此代码并检查多个字符串。溶液：

#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
using namespace std;
vector<string> split(const string& i_str, const string& i_delim)
{
vector<string> result;
size_t found = i_str.find(i_delim);
size_t startIndex = 0;
while (found != string::npos)
{
string temp(i_str.begin() + startIndex, i_str.begin() + found);
result.push_back(temp);
startIndex = found + i_delim.size();
found = i_str.find(i_delim, startIndex);
}
if (startIndex != i_str.size())
result.push_back(string(i_str.begin() + startIndex, i_str.end()));
return result;
}
void countFunc(vector<string> cal) {
vector<pair<string, int>> result;
for (int i = 0; i < cal.size(); i++)
{
string temp = cal[i];
if (temp.empty())
{
continue;
}
int ncount = 1;
int j = i+1;
while(j < cal.size())
{
if (temp == cal[j])
{
ncount++;
cal[j] = "";
}
j++;
}
result.push_back(make_pair(temp, ncount));
}
std::cout << "StringtCountn";
for (int i = 0; i < result.size(); i++)
{
cout << result[i].first << "t" << result[i].second << endl;
}
}
int main()
{
vector<string> str;
vector<string> res;
printf("Enter the Number Line :");
int size = 0;
cin >> size;
for (int i = 0; i < size+1; i++)
{
string s;
getline(cin, s);
if (s.empty())
{
continue;
}
else
{
str.push_back(s);
}
}
for (int i = 0; i < size; i++)
{
vector<string> temp;
temp = split(str[i], " ");
res.insert(res.end(), temp.begin(), temp.end());
}
sort(res.begin(), res.end());
countFunc(res);
}

如果无法使用 STL，这意味着如果您不能使用 vector 等，那么这个简单的解决方案可能会有所帮助。同样在您的代码中，您已将字符串数组的大小声明为 100，我认为您这样做是因为根据您的问题陈述，数组的最大大小不能作为输入。

您需要一种排序方法(我使用了气泡排序(，在对数组进行排序后，您只需迭代数组并计算单词，只需跟踪即将到来的单词并将其与当前单词进行比较即可轻松。请记住，一旦您输入空行，下面的代码将停止接受输入。

另外，我建议您在C++中学习STL，它也将帮助您进行竞争性编码。

#include <iostream>
using namespace std;
//Forward declaration of functions 
void sortStringArray(int index,string* s);
void printWordCount(int index,string array[]);
int main()
{
string s [100]; //considering maximum words will be upto 100 words
string input;  //variable to keep track of each line input
int index=0;   //index to keep track of size of populated array since we don't need to sort whole array of size 100 if it is not filled

while (getline(cin, input) && !input.empty()){  //while line is not empty continue to take inputs
string temp="";                     
char previous=' ';                  
for(int i=0;i<input.size();i++){            //loop to seperate the words by space or tab
if(input[i]==' ' && previous!=' '){
s[index++]=temp;
temp="";
}
else if(input[i]!=' '){
temp+=input[i];
}
previous=input[i];
}
if(temp.size()!=0){
s[index++] =temp;   
}
}
//Step 1: sort the generated array by calling function with reference, thus function directly modifies the array and don't need to return anything
sortStringArray(index,s);

//Step 2: print each word count in sorted order
printWordCount(index,s);

return 0;
}
/*
Function takes the "index" which is the size upto which array is filled and we need only filled elements
Function takes stirng array by reference and uses Bubble Sort to sort the array
*/
void sortStringArray(int index,string* s){
for(int i=0;i<index-1;i++){
for(int j=0;j<index-i-1;j++){
if(s[j]>s[j+1]){
string temp=s[j];
s[j]=s[j+1];
s[j+1]=temp;
}
}
}
}

/*
Function takes the "index" which is the size upto which array is filled and we need only filled elements
Function takes stirng array by reference and prints count of each word
*/
void printWordCount(int index,string array[]){
int count=1; //to keep track of the similar word count
for(int i=0;i<index-1;i++){
if(array[i]==array[i+1]){ //if current and upcoming words are same then increase the count
count++;
}
else{                       //if next word is not equal to current word then print the count of current word and set counter to 1
cout<<array[i]<<" "<<count<<endl;
count=1;
}
}
if(array[index-1]==array[index-2]){  //at end of array if the last and second last words were equal then print the count+1
cout<<array[index-1]<<" "<<count+1<<endl;
}
else{                               //otherwise the count equal to the last count which will be "1"
cout<<array[index-1]<<" "<<count;
}
}

输出：

Computer system
computer design
algorithm design and analysis
quantum computer
computer science department
Computer 1
algorithm 1
analysis 1
and 1
computer 3
department 1
design 2
quantum 1
science 1
system 1
--------------------------------
Process exited after 1.89 seconds with return value 0