C++：使用fgetc读取csv文件，并在分号上";"分隔单词

C++ : read csv file with fgetc and separate words on semicolon ";"

本文关键字：单词分隔 fgetc 使用读取 csv 文件 C++ 更新时间：2023-10-16

我必须在包含 5 个字段（int 、char[]、char[]、char[]、char[]、float）的 csv 文件中读取

，如下所示：

2345678;Meier;Hans;12.10.1985;2.4;      
1234567;Müller;Fritz;17.05.1990;1.9;

我必须将字段放在结构中，然后在一行完成后将结构放入结构类型的数组中......

为了学习效果，我们只允许使用低级编码，并且只使用像 fgetc、strcpy 这样的函数，没有字符串，只有 char[]...现在，我使算法可以逐个字符读取文本文件，但是在正确分隔它们，将它们再次放在一起并将它们正确分配给结构字段时遇到了问题。这是我的代码：

  #include <cstdlib>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <string.h>
using namespace std;
int main(int argc, char **argv)
{
    struct Stud{
        long matrnr;
        char vorname[30];
        char name[30];
        char datum[30];
        float note;
    };
    const int MAX = 30;
    Stud stud;  
    Stud mystud[30]; // <<-- Array of "Stud" type
    //memset((void*)mystud,0,sizeof(mystud) * sizeof(Stud));
    int wordCounter(0);
    int i(0); //thats the charCounter or index
    int studentCounter(0);
    char wort[MAX];
    //int matrnr;
    //char vorname[MAX];
    //char name[MAX];
    //char datum[MAX];
    //float note;

  FILE * pFile;
  int cnr(0); 

  pFile=fopen("studentendaten.txt","r");  
  if (pFile==nullptr) 
  {
      perror ("Fehler beim öffnen der Datei");
  }
  else
  {       
    while (cnr != EOF) 
    {       
        (cnr=fgetc(pFile)) ;

        if ((char)cnr == 'n') {
            mystud[studentCounter] = stud;
            studentCounter++;                       
            continue;           
        }
        if ((char)cnr == ';') { 
            wort[i] = ''; 
            switch (wordCounter % 5) {
                case 0:             
                stud.matrnr = atol(wort);
                break;
                case 1:
                strcpy(stud.name, wort);
                break;
                case 2:
                strcpy(stud.vorname, wort);
                break;
                case 3:
                strcpy(stud.datum,wort);
                break;
                case 4:
                stud.note = atof(wort); 
                break;
            }       
            wordCounter++;          
            i = 0;
            continue;
        }
        if (wordCounter %  5 == 0 && (char)cnr != ';') {        
        wort[i] = (char)cnr;
        i++;
        //stud.matrnr = atol(wort);
        }           
        if (wordCounter % 5 == 1) {
            wort[i] =  (char)cnr;
            i++;
        //strcpy(stud.name, wort);
        }
        if (wordCounter % 5 == 2) {
            wort[i] = (char)cnr;
            i++;
            //strcpy(stud.vorname, wort);
        }
        if (wordCounter % 5 == 3) {
            wort[i] = (char)cnr;
            i++;
            //strcpy(stud.datum,wort);
        }
        if (wordCounter % 5 == 4) {
            wort[i] = (char)cnr;
            i++;
            //stud.note = atof(wort);                       
        }
    }   

    fclose (pFile);
}
for (int i(0) ; i <= studentCounter; i++) {
cout <<mystud[i].matrnr << "    " << mystud[i].name << "    " << mystud[i].vorname <<"    " 
<< mystud[i].datum <<"    " << mystud[i].note << endl;
  //printf("%5ld        %5s      %5s     %5s     %5f     n",mystud[i].matrnr,mystud[i].name,mystud[i].vorname,mystud[i].datum,mystud[i].note);
}
    return 0;
}

我不确定这是否与错误的增量变量有关，或者我没有在我的 wort[] 数组末尾放置"\0"的事实。因此无法识别我的数组的末尾？如果是这样，我该怎么做，不知道终点到底在哪里......？（我不知道单词的长度。

编辑：我再次更新了我的代码，唯一让我感到困惑的是最后一行没有被正确解析，它显示了一些垃圾，而且我在我的代码中看不到错误......

2345678;Meier;Hans;12.10.1985;2.4;      
1234567;Müller;Fritz;17.05.1990;1.9;
8392019;Thomas;Kretschmer;28.3.1920;2.5;
3471144;Mensch;Arbeit;29.2.2013;4.5;
2039482;Test;Test;30.20.2031;2.0;
7584932;Bau;Maschine;02.02.2010;2.3;
2345678;Meier;Hans;12.10.1985;2.4;      
1234567;Müller;Fritz;17.05.1990;1.9;
8392019;Thomas;Kretschmer;28.3.1920;2.5;
3471144;Mensch;Arbeit;29.2.2013;4.5;
2039482;Test;Test;30.20.2031;2.0;
7584932;Bau;Maschine;02.02.2010;2.3;
2345678;Meier;Hans;12.10.1985;2.4;      
1234567;Müller;Fritz;17.05.1990;1.9;
8392019;Thomas;Kretschmer;28.3.1920;2.5;
3471144;Mensch;Arbeit;29.2.2013;4.5;
2039482;Test;Test;30.20.2031;2.0;
7584932;Bau;Maschine;02.02.2010;2.3;
2345678;Meier;Hans;12.10.1985;2.4;      
1234567;Müller;Fritz;17.05.1990;1.9;
8392019;Thomas;Kretschmer;28.3.1920;2.5;
3471144;Mensch;Arbeit;29.2.2013;4.5;
2039482;Test;Test;30.20.2031;2.0;
7584932;Bau;Maschine;02.02.2010;2.3;

建议：使用case结构进行解析，并让自己成为一个"copyToSemicolon"函数：然后你可以写这样的东西

sIndexCount = 0;
char temp[50];
while((cnr=fgetc(pFile)) != EOF) {
  offset = 0;
  for(var = 0; var < 5; var++ {
    switch(var) {
    case 0:
      offset = copyToSemicolon(temp, cnr, offset) + 1;
      stud.matrnr = atoi(temp);
      break;
    case 1:
      offset = copyToSemicolon(mystud[sIndexCount].vorname, cnr, offset) + 1;
      break;
    ... etc
    }
  }
  sIndexCount++;
  if(sIndexCount == 50) break;  // in case the input file is longer than our structure
}

你需要一个函数copyToSemicolon，它将两个char*指针作为输入，并从第二个字符串（从 offset 开始）复制字符，直到它到达分号或行尾 - 并返回它到达的偏移量（最后一个字符读取）。

int copyToSemicolon(char* dest, char* source, int offset) {
  while(source[offset] != ';' && source[offset] != 'n') {
    *dest = source[offset++];
    dest++;
  }
  return offset;
}

编辑strtok方法：

sIndexCount = 0;
char temp[50];
while((cnr=fgetc(pFile)) != EOF) {
  offset = 0;
  temp = strtok(cnr, ';');
  for(var = 0; var < 5; var++ {
    switch(var) {
    case 0:
      stud.matrnr = atoi(temp);
      break;
    case 1:
      strcpy(mystud[sIndexCount].vorname, strtok(NULL, ';'));
      break;
    ... etc
    case 4:
      mystud[sIndexCount].note = atof(strtok(NULL, 'n'));
    }
  }
  sIndexCount++;
  if(sIndexCount == 50) break;  // in case the input file is longer than our structure
}

我看到的一个问题是，您的代码一次复制或解析一个字符，这样当您读取2345678;Meier;Hans;12.10.1985;2.4;时，您首先将stud.matrnr设置为 2，然后是 23，然后是 234，然后是 2345，然后是 23456，然后是 234567，然后是 2345678。同样，对于stud.name，您首先将其设置为 M，然后是 Me，然后是 Mei，依此类推。我建议你以不同的方式思考问题。我会给你一些伪代码：

while (!eof) {
    get character from file
    if (character isn't ';' and isn't 'n') {
        copy character into buffer (increment buffer index)
    } else if (character is ';') {
        it's the end of a word.  Put it in its place - turn it to an int, copy it, whatever
        reset the buffer
    } else if (character is 'n') {
        it's the end of the last word, and the end of the line.  Handle the last word
        reset the buffer
        copy the structure
    }
}

这应该会让你的生活更轻松。您不会对数据进行太多更改，如果需要调试，可以单独关注每个部分。

一般来说，在编程中，第一步是确保你可以用你的母语说出你想做什么，然后更容易将其翻译成代码。你与你的实施很接近，你可以让它工作。只要确保您可以解释当您看到";"或""时应该发生什么。

由于您已将其标记为C++，因此应考虑使用 std::getline 从文件中读取行，使用 std::getline(file, text_before_semicolon, ';') 解析字段。

您还可以使用 std::istringstream 将文本行中的文本表示形式转换为内部数字格式。