C和C++文件读取性能的比较

Comparison of C and C++ file read performance

本文关键字：性能比较读取文件 C++ 更新时间：2023-10-16

我最近需要逐行读取一个不太重要的文件，为了提高性能，我决定遵循我得到的一些建议，即fstream s比C风格的I/O慢得多。然而，尽管我尽了最大的努力，我还是没能再现同样的戏剧性差异（~25%，这是很大的，但并不疯狂）。我还试用了fscanf，发现它慢了一个幅度。

我的问题是，是什么导致了隐藏的性能差异，为什么fscanf非常糟糕？

以下是我的代码（使用TDM GCC 5.1.0编译）：

struct file
{
    file(const char* str, const char* mode)
        : fp(fopen(str, mode)){}
    ~file(){fclose(fp);}
    FILE* fp;
};
constexpr size_t bufsize = 256;
auto readWord(int pos, char*& word, char* const buf)
{
    for(; buf[pos] != 'n'; ++word, ++pos)
    {
        if(pos == bufsize)
            return 0;
        *word = buf[pos];
    }
    *word = '';
    return pos + 1;
}
void readFileC()
{
    file in{"inC.txt", "r"};
    char buf[bufsize];
    char word[40];
    char* pw = word;
    int sz = fread(buf, 1, bufsize, in.fp);
    for(; sz == bufsize; sz = fread(buf, 1, bufsize, in.fp))
    {
        for(auto nextPos = readWord(0, pw, buf); (nextPos = readWord(nextPos, pw, buf));)
        {
            //use word here
            pw = word;
        }
    }
    for(auto nextPos = readWord(0, pw, buf); nextPos < sz; nextPos = readWord(nextPos, pw, buf))
    {
        //use word here
        pw = word;
    }
}
void readFileCline()
{
    file in{"inCline.txt", "r"};
    char word[40];
    while(fscanf(in.fp, "%s", word) != EOF);
        //use word here
}
void readFileCpp()
{
    ifstream in{"inCpp.txt"};
    string word;
    while(getline(in, word));
        //use word here
}
int main()
{
    static constexpr int runs = 1;
    auto countC = 0;
    for(int i = 0; i < runs; ++i)
    {
        auto start = steady_clock::now();
        readFileC();
        auto dur = steady_clock::now() - start;
        countC += duration_cast<milliseconds>(dur).count();
    }
    cout << "countC: " << countC << endl;
    auto countCline = 0;
    for(int i = 0; i < runs; ++i)
    {
        auto start = steady_clock::now();
        readFileCline();
        auto dur = steady_clock::now() - start;
        countCline += duration_cast<milliseconds>(dur).count();
    }
    cout << "countCline: " << countCline << endl;
    auto countCpp = 0;
    for(int i = 0; i < runs; ++i)
    {
        auto start = steady_clock::now();
        readFileCpp();
        auto dur = steady_clock::now() - start;
        countCpp += duration_cast<milliseconds>(dur).count();
    }
    cout << "countCpp: " << countCpp << endl;
}

用1070KB大小的文件运行，结果如下：

countC: 7
countCline: 61
countCpp: 9

编辑：三个测试用例现在读取不同的文件并运行一次。结果正好是读取同一文件20次的1/20。countC始终优于countCpp，即使我翻转了执行它们的顺序

fscanf必须解析格式字符串参数，寻找所有可能的%符号，并对其进行解释，以及宽度指定符、转义符、表达式等。它必须一次遍历格式参数或多或少一个字符，处理一大组潜在的格式。即使你的格式像"%s"一样简单，与其他技术相比，它仍然需要大量的开销，这些技术只需抓取一堆字节，几乎没有解释/转换等开销。