正在从二进制文件中读取双值和int值

Reading double values and int values from binary file

本文关键字:int 读取 二进制文件      更新时间:2023-10-16

我有一个二进制文件,其格式如下:

# vtk DataFile Version 4.0
vtk output
BINARY
DATASET POLYDATA
POINTS 10000 double
?�T�����?����h�?�T�����?���� <-- 10000 double values (in binary format) follow separated by space and new line after every 9 values.

我想逐字节读取这个文件,这样我就可以在数组中存储这些双值。下面的代码将这个文件加载到一个char*缓冲区数组中。现在我想知道如何继续?

#include<iostream>     
#include<fstream> 
#include<sstream>
#include<stdlib.h>     
#include<string>
using namespace std;
int main () {
  ifstream is ("Data_binary.vtk", ifstream::binary);
  if (is) {
    // get length of file:
    is.seekg (0, is.end);
    unsigned long length = is.tellg();
    is.seekg (0, is.beg);
    char * buffer = new char [length+1];
    buffer[length] = '';
    cout << "Reading " << length << " characters... ";
    // read data as a block:
    is.seekg(0, is.beg);
    is.read (buffer,length);
    if (is)
      cout << "all characters read successfully." << endl;
    else
      cout << "error: only " << is.gcount() << " could be read";
    is.close();
   }
  return 0;
}

在ASCII格式中,示例文件如下所示:

# vtk DataFile Version 4.0
vtk output
ASCII
DATASET POLYDATA
POINTS 18 double
.1 .2 .3 1.4 11.55 1 0 8e-03 5.6
1.02 2.2 3.3 .1 .5 0.001 4e-07 4.2 1.55

对于二进制文件,双值以二进制形式存在。我想从二进制格式中获得双值。

Use this function.

/*
* read a double from a stream in ieee754 format regardless of host
*  encoding.
*  fp - the stream
*  bigendian - set to if big bytes first, clear for little bytes
*              first
*
*/
double freadieee754(FILE *fp, int bigendian)
{
    unsigned char buff[8];
    int i;
    double fnorm = 0.0;
    unsigned char temp;
    int sign;
    int exponent;
    double bitval;
    int maski, mask;
    int expbits = 11;
    int significandbits = 52;
    int shift;
    double answer;
    /* read the data */
    for (i = 0; i < 8; i++)
        buff[i] = fgetc(fp);
    /* just reverse if not big-endian*/
    if (!bigendian)
    {
        for (i = 0; i < 4; i++)
        {
            temp = buff[i];
            buff[i] = buff[8 - i - 1];
            buff[8 - i - 1] = temp;
        }
    }
    sign = buff[0] & 0x80 ? -1 : 1;
    /* exponet in raw format*/
    exponent = ((buff[0] & 0x7F) << 4) | ((buff[1] & 0xF0) >> 4);
    /* read inthe mantissa. Top bit is 0.5, the successive bits half*/
    bitval = 0.5;
    maski = 1;
    mask = 0x08;
    for (i = 0; i < significandbits; i++)
    {
        if (buff[maski] & mask)
            fnorm += bitval;
        bitval /= 2.0;
        mask >>= 1;
        if (mask == 0)
        {
            mask = 0x80;
            maski++;
        }
    }
    /* handle zero specially */
    if (exponent == 0 && fnorm == 0)
        return 0.0;
    shift = exponent - ((1 << (expbits - 1)) - 1); /* exponent = shift + bias */
    /* nans have exp 1024 and non-zero mantissa */
    if (shift == 1024 && fnorm != 0)
        return sqrt(-1.0);
    /*infinity*/
    if (shift == 1024 && fnorm == 0)
    {
#ifdef INFINITY
        return sign == 1 ? INFINITY : -INFINITY;
#endif
        return  (sign * 1.0) / 0.0;
    }
    if (shift > -1023)
    {
        answer = ldexp(fnorm + 1.0, shift);
        return answer * sign;
    }
    else
    {
        /* denormalised numbers */
        if (fnorm == 0.0)
            return 0.0;
        shift = -1022;
        while (fnorm < 1.0)
        {
            fnorm *= 2;
            shift--;
        }
        answer = ldexp(fnorm, shift);
        return answer * sign;
    }
}

它有很多,但它只是一个可以剪切和粘贴的片段,您再也不用担心二进制浮点格式了。它只需读取IEEE 754双精度,而不考虑主机浮点格式。有一个双胞胎写

与其读取char *缓冲区,不如读取double *缓冲区。仅出于此目的,允许从char *进行铸造。

vector<double> buffer;
buffer.resize(n);
is.read(reinterpret_cast<char *>(&buffer[0]), n * sizeof(buffer[0]));

您需要首先读取非二进制数据,以便文件指针位于二进制数据的开头。它被定义为紧跟在标头中最后一个字段的换行符之后。

规范似乎没有强制要求使用小端或大端格式,它希望您根据文件的来源知道。如果你幸运的话,格式将与你用来读取文件的机器匹配,无需转换。否则,您将需要进行字节交换:

void ByteSwap(double * p)
{
    char * pc = reinterpret_cast<char *>(p);
    std::swap(pc[0], pc[7]);
    std::swap(pc[1], pc[6]);
    std::swap(pc[2], pc[5]);
    std::swap(pc[3], pc[4]);
}