一个文件被读取了多少

How much of a file has been read

本文关键字:读取 多少 文件 一个      更新时间:2023-10-16

我有一个程序,它读取一个10M字节的文件,并在以4K块读取数据时处理数据。测试通常需要1分钟-2分钟。但也有一些情况下,程序需要超过10分钟,在这一点上,它终止了测试并生成了核心。以下是读取文件的代码:

    string filename("data.out");
    ifstream ifs;
    vector<char> buf(4096);
    ifs.open(filename,  ios::in | ios::binary);
    if (!ifs.is_open()) {
            cout << "ERROR : " << filename << "can't be opened." << endl;
            VERIFY(ifs.is_open());
    }
    while (!ifs.eof()) {
            ifs.read(buf.data(), buf.size());     <======== Line 1
            process_data (buf.data(), ifs.gcount());   <======== Line 2
    }
    ifs.close();

我有两个核心,显示程序卡在第1行和第2行。

1号线岩心1的bt顶部:

#0  0x00007f942a462175 in std::istream::read (this=0x7fff4ce69de0,
__s=0x9120000 "324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324"..., __n=4096) at /home/packages/gcc/4.7/w/gcc-4.7-4.7.2/build/x86_64-linux-gnu/libstdc++-v3/include/bits/istream.tcc:651

2号线岩心2的bt顶部:

#0  0x00000000004375f3 in std::__addressof<char> (__r=@0x7fa3176391a6: -128 '200') at /usr/include/c++/4.7/bits/move.h:47
#1  0x0000000000436cd4 in std::vector<char, std::allocator<char> >::data (this=0x7fff346ad770)
at /usr/include/c++/4.7/bits/stl_vector.h:859

最初,在core1中,我认为问题在于ifs.read()花费了很长时间。但在第二个核心之后,我认为这个问题可能与vector::data()有关。

有没有一种方法可以通过检查ifstream中存储的某些字段(例如文件偏移量)来判断文件的任何部分是否已被读取。

我不喜欢发布大结构的垃圾堆,但如果有人能告诉我如何从这个垃圾堆中计算出10MB的阅读量。

(gdb) p ifs
$3 = warning: can't find linker symbol for virtual table for `std::basic_ifstream<char, std::char_traits<char> >' value
{
  <std::basic_istream<char, std::char_traits<char> >> = {
    <std::basic_ios<char, std::char_traits<char> >> = {
      <std::ios_base> = {
        _vptr.ios_base = 0xfbfcc0,
        static boolalpha = std::_S_boolalpha,
        static dec = std::_S_dec,
        static fixed = std::_S_fixed,
        static hex = std::_S_hex,
        static internal = std::_S_internal,
        static left = std::_S_left,
        static oct = std::_S_oct,
        static right = std::_S_right,
        static scientific = std::_S_scientific,
        static showbase = std::_S_showbase,
        static showpoint = std::_S_showpoint,
        static showpos = std::_S_showpos,
        static skipws = std::_S_skipws,
        static unitbuf = std::_S_unitbuf,
        static uppercase = std::_S_uppercase,
        static adjustfield = std::_S_adjustfield,
        static basefield = std::_S_basefield,
        static floatfield = std::_S_floatfield,
        static badbit = std::_S_badbit,
        static eofbit = std::_S_eofbit,
        static failbit = std::_S_failbit,
        static goodbit = std::_S_goodbit,
        static app = std::_S_app,
        static ate = std::_S_ate,
        static binary = std::_S_bin,
        static in = std::_S_in,
        static out = std::_S_out,
        static trunc = std::_S_trunc,
        static beg = std::_S_beg,
        static cur = std::_S_cur,
        static end = std::_S_end,
        _M_precision = 6,
        _M_width = 0,
        _M_flags = 4098,
        _M_exception = std::_S_goodbit,
        _M_streambuf_state = 5,
        _M_callbacks = 0x0,
        _M_word_zero = {
          _M_pword = 0x0,
          _M_iword = 0
        },
        _M_local_word = {{
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }},
        _M_word_size = 8,
        _M_word = 0x7fff4ce69f20,
        _M_ios_locale = {
          static none = 0,
          static ctype = 1,
          static numeric = 2,
          static collate = 4,
          static time = 8,
          static monetary = 16,
          static messages = 32,
          static all = 63,
          _M_impl = 0x7f942a6e3aa0,
          static _S_classic = 0x7f942a6e3aa0,
          static _S_global = 0x7f942a6e3aa0,
          static _S_categories = 0x7f942a6c86a0,
          static _S_once = 2
        }
      },
      members of std::basic_ios<char, std::char_traits<char> >:
      _M_tie = 0x0,
      _M_fill = 0 '00',
      _M_fill_init = false,
      _M_streambuf = 0x7fff4ce69df0,
      _M_ctype = 0x7f942a6e3d20,
      _M_num_put = 0x7f942a6e4040,
      _M_num_get = 0x7f942a6e4030
    },
    members of std::basic_istream<char, std::char_traits<char> >:
    _vptr.basic_istream = 0xfbfc98,
    _M_gcount = 0
  },
  members of std::basic_ifstream<char, std::char_traits<char> >:
  _M_filebuf = warning: can't find linker symbol for virtual table for `std::basic_filebuf<char, std::char_traits<char> >' value
{
    <std::basic_streambuf<char, std::char_traits<char> >> = {
      _vptr.basic_streambuf = 0xfc0a70,
      _M_in_beg = 0x6306000 "317317317......320320320320"...,
      _M_in_cur = 0x6307fff "",
      _M_in_end = 0x6307fff "",
      _M_out_beg = 0x0,
      _M_out_cur = 0x0,
      _M_out_end = 0x0,
      _M_buf_locale = {
        static none = 0,
        static ctype = 1,
        static numeric = 2,
        static collate = 4,
        static time = 8,
        static monetary = 16,
        static messages = 32,
        static all = 63,
        _M_impl = 0x7f942a6e3aa0,
        static _S_classic = 0x7f942a6e3aa0,
        static _S_global = 0x7f942a6e3aa0,
        static _S_categories = 0x7f942a6c86a0,
        static _S_once = 2
      }
    },
    members of std::basic_filebuf<char, std::char_traits<char> >:
    _M_lock = {
      __data = {
        __lock = 0,
        __count = 0,
        __owner = 0,
        __nusers = 0,
        __kind = 0,
        __spins = 0,
        __list = {
          __prev = 0x0,
          __next = 0x0
        }
      },
      __size = '00' <repeats 39 times>,
      __align = 0
    },
    _M_file = {
      _M_cfile = 0x70186c0,
      _M_cfile_created = true
    },
    _M_mode = 12,
    _M_state_beg = {
      __count = 0,
      __value = {
        __wch = 0,
        __wchb = "000000"
      }
    },
    _M_state_cur = {
      __count = 0,
      __value = {
        __wch = 0,
        __wchb = "000000"
      }
    },
    _M_state_last = {
      __count = 0,
      __value = {
        __wch = 0,
        __wchb = "000000"
      }
    },
    _M_buf = 0x6306000 "317317317317317......320320320320320"...,
    _M_buf_size = 8192,
    _M_buf_allocated = true,
    _M_reading = true,
    _M_writing = false,
    _M_pback = 0 '00',
    _M_pback_cur_save = 0x0,
    _M_pback_end_save = 0x0,
    _M_pback_init = false,
    _M_codecvt = 0x7f942a6e3f60,
    _M_ext_buf = 0x0,
    _M_ext_buf_size = 0,
    _M_ext_next = 0x0,
    _M_ext_end = 0x0
  }
}
(gdb)

谢谢,艾哈迈德。

不要在eof上循环。

while (ifs.read(buf.data(), buf.size())) {
  size_t read = ifs.gcount();
  if(read==0) break; // don't trust passing `0` to `process_data`:
  process_data(buf.data(), read);
  if (read<buf.size()) break; // if we finished, end.
}

最好通过尝试io并注意到出现了问题来找到输入的末尾。在这种情况下,我们读取,计算我们读取的字节数,当我们读取0个字节或读取的字节比预期读取的字节少时,我们就决定没有更多的数据了。

如果IO操作在ifs上设置了任何故障位,我们也会结束。