使用boost::mapped_region增长文件以供进一步写入?

Grow a file for further writing to using boost::mapped_region?

本文关键字:进一步 文件 mapped boost region 使用      更新时间:2023-10-16

我需要创建并写入内存映射文件。有时需要增大文件。

我创建了以下小测试,我创建了一个文件,使用boost::mapped_region映射它并写入其中。

这一切都按预期工作:

#include <fstream>
#include <boost/interprocess/file_mapping.hpp>
#include <boost/interprocess/mapped_region.hpp>
namespace bip = boost::interprocess;
void createFile(const char* fn, std::uint64_t num)
{
std::filebuf fbuf;
fbuf.open(fn, std::ios_base::out|std::ios_base::binary|std::ios_base::trunc);
const std::uint64_t size = sizeof(std::uint64_t) * (num + 1);
fbuf.pubseekoff(size - 1, std::ios_base::beg);
fbuf.sputc(0);
fbuf.close();
}
void writeToFile(const char* fn, std::uint64_t pos, std::uint64_t val)
{
bip::file_mapping  fm(fn, bip::read_write);
bip::mapped_region rg(fm, bip::read_write);
std::uint64_t* p = reinterpret_cast<std::uint64_t*>(rg.get_address());
*p = std::max(*p, pos); // store max num values
*(p + pos) = val;       // write value into position
}
int main ()
{
const char* fn = "/tmp/test.dat";
createFile(fn, 3);
writeToFile(fn, 1, 0x1111111111111111);
writeToFile(fn, 2, 0x2222222222222222);
writeToFile(fn, 3, 0x3333333333333333);
return 0;
}

运行此程序会按预期生成一个输出文件,当我转储其内容时,我可以看到其中正确写入的值:

$ xxd -p /tmp/test.dat 
030000000000000011111111111111112222222222222222333333333333
3333

但是,现在我想调整文件的大小,以便我可以在最后写入其他数据。

我添加了以下函数,growFile(按照下面的 timrau 的建议使用ios_base::app(

void growFile(const char* fn, std::uint64_t num)
{
std::filebuf fbuf;
fbuf.open(fn, std::ios_base::out|std::ios_base::binary|std::ios_base::app);
const std::uint64_t size = sizeof(std::uint64_t) * (num + 1);
fbuf.pubseekoff(size - 1, std::ios_base::beg);
fbuf.sputc(0);
fbuf.close();
}

我现在在增长文件后添加更多值:

int main ()
{
const char* fn = "/tmp/test.dat";
createFile(fn, 3);
writeToFile(fn, 1, 0x1111111111111111);
writeToFile(fn, 2, 0x2222222222222222);
writeToFile(fn, 3, 0x3333333333333333);
growFile(fn, 6);
writeToFile(fn, 4, 0x4444444444444444);
writeToFile(fn, 5, 0x5555555555555555);
writeToFile(fn, 6, 0x6666666666666666);
return 0;
}

当我转储文件时,它缺少大多数新值。

$ xxd -p /tmp/test.dat 
060000000000000011111111111111112222222222222222333333333333
333344

请注意,如果我不增长文件,而只是最初创建具有足够空间的文件,它会按预期工作:

int main ()
{
const char* fn = "/tmp/test.dat";
createFile(fn, 6);
writeToFile(fn, 1, 0x1111111111111111);
writeToFile(fn, 2, 0x2222222222222222);
writeToFile(fn, 3, 0x3333333333333333);
writeToFile(fn, 4, 0x4444444444444444);
writeToFile(fn, 5, 0x5555555555555555);
writeToFile(fn, 6, 0x6666666666666666);
return 0;
}

当我转储文件时,所有值都在那里:

$ xxd -p /tmp/test.dat 
060000000000000011111111111111112222222222222222333333333333
3333444444444444444455555555555555556666666666666666

如何在创建后增大文件,以便我可以进一步写入其初始大小?

又玩了一次关于大肠杆菌的戏。

您必须打开文件进行读/写 - 相当于 ::fopen(..., "r+"(

这些选项都有效:

void resizeFile(const char* fn)
{
constexpr auto offset = sizeof(std::uint64_t) * 6 - 1;
/*
FILE* fp = ::fopen(fn, "r+");
::fseek(fp, offset, SEEK_SET);
::fputc(0, fp);
::fclose(fp);
*/
/*
std::fstream f;
f.exceptions(std::ios::failbit | std::ios::badbit);
f.open(fn, std::ios_base::in | std::ios_base::out | std::ios_base::binary);
f.seekp(offset, std::ios_base::beg);
f.put(0);
f.flush();
*/
std::filebuf fbuf;
fbuf.open(fn, std::ios_base::in | std::ios_base::out | std::ios_base::binary);
fbuf.pubseekoff(offset, std::ios_base::beg);
fbuf.sputc(0);
fbuf.close();
}

http://coliru.stacked-crooked.com/a/ae224032dd036639

resizeFile()时,您应该以追加模式打开文件。否则,该文件将在open()上截断为空文件。

fbuf.open(fn, std::ios_base::out | std::ios_base::binary | std::ios_base::app);

看起来,使用 boost::iostreams::mapped_file; 截断/创建一个文件,然后用 std::ofstream 追加,可以在 Windows 上提供非常快的写入速度。只是猜测,但似乎 mapped_file::close(( 被推迟了,并且 ofstream 能够进入开放的mapped_file。

  • 左:创建 = 流,追加 = 流
  • 中间:创建 = mapped_file,追加 = mapped_file
  • 右:创建 = mapped_file,追加 = 流

一些时序(MSVC 15.9.19;提升 1.72(:

Time [ms] = 5.6878 ; 0.774 ; 0.7593
Time [ms] = 6.7207 ; 8.2712 ; 0.3294
Time [ms] = 5.8094 ; 4.7558 ; 0.439
Time [ms] = 3.3206 ; 4.7963 ; 0.324
Time [ms] = 5.2561 ; 3.9712 ; 0.3331
Time [ms] = 3.9206 ; 4.0262 ; 0.3952
Time [ms] = 3.0896 ; 3.9835 ; 0.3359
Time [ms] = 9.9593 ; 4.9418 ; 0.3266
Time [ms] = 3.7967 ; 4.9202 ; 0.3138
Time [ms] = 3.1793 ; 3.8531 ; 0.3195
Time [ms] = 3.0293 ; 3.7158 ; 0.3453
Time [ms] = 2.885 ; 3.6458 ; 0.3262
Time [ms] = 2.9635 ; 3.8436 ; 0.321
Time [ms] = 3.0339 ; 3.8216 ; 0.3427
Time [ms] = 2.8762 ; 3.7251 ; 0.3334
Time [ms] = 2.9138 ; 4.4343 ; 0.3165
Same 1 = 1
Same 2 = 1
Same 3 = 1

使用以下代码生成:

#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/filesystem.hpp>
#include <vector>
#include <iostream>
#include <random>
#include <chrono>
#include <stdint.h>
#include <stdio.h>
template <class Type, bool append>
void writer_std(const std::string& filename, const std::vector<Type>& data)
{
size_t bytes = sizeof(Type) * data.size();
std::ofstream writer;
writer.open(filename, std::ios::binary | std::ios::out | (append ? std::ios::app : std::ios::trunc));
writer.write(reinterpret_cast<const char *>(&data[0]), bytes);
writer.close();
}
template <class Type>
void writer_boost_trunc(const std::string& filename, const std::vector<Type>& data)
{
size_t bytes = sizeof(Type) * data.size();
boost::iostreams::mapped_file_params params(filename);
params.new_file_size = bytes; // overwrites filename if non-zero
params.flags = boost::iostreams::mapped_file::mapmode::readwrite;
boost::iostreams::mapped_file mf;
mf.open(params);
char * buffer = reinterpret_cast<char *>(mf.data());
memcpy(buffer, reinterpret_cast<const char *>(&data[0]), bytes);
mf.close();
}
template <class Type>
void writer_boost_append(const std::string& filename, const std::vector<Type>& data)
{
size_t bytes = sizeof(Type) * data.size();
#pragma warning(push)
#pragma warning(disable: 4996)
FILE * file = fopen(filename.c_str(), "r+");
#pragma warning(pop)
fseek(file, 0, SEEK_END);
size_t current = ftell(file);
size_t larger  = current + bytes;
fseek(file, static_cast<long>(larger - 1), SEEK_SET);
fputc(0, file);
fclose(file);
size_t alignment = boost::iostreams::mapped_file::alignment();
boost::iostreams::mapped_file_params params(filename);
params.flags  = boost::iostreams::mapped_file::mapmode::readwrite;
params.offset = alignment * (current / alignment);
params.length = larger - params.offset;
boost::iostreams::mapped_file mf;
mf.open(params);
char * buffer = reinterpret_cast<char *>(mf.data()) + (current - params.offset);
memcpy(buffer, reinterpret_cast<const char *>(&data[0]), bytes);
mf.close();
}
template <class Type>
std::vector<Type> read(const std::string& filename, const size_t begin, const size_t number)
{
const size_t begin_byte =  begin * sizeof(Type);
const size_t  size_byte = number * sizeof(Type);
const size_t   end_byte = begin_byte + size_byte;
std::vector<Type> result(number);
size_t alignment = boost::iostreams::mapped_file::alignment();
boost::iostreams::mapped_file_params params(filename);
params.flags  = boost::iostreams::mapped_file::mapmode::readonly;
params.offset = alignment * (begin_byte / alignment);
params.length = end_byte - params.offset;
boost::iostreams::mapped_file mf;
mf.open(params);
const char * buffer = reinterpret_cast<const char *>(mf.const_data()) + (begin_byte - params.offset);
memcpy(reinterpret_cast<char *>(&result[0]), buffer, size_byte);
mf.close();
return result;
}
int main()
{
std::random_device rd;
std::mt19937_64 gen(rd());
std::uniform_int_distribution<uint64_t> dis(0, UINT64_MAX);
constexpr size_t num_batches = 16;
constexpr size_t batch_size  = 65536;
std::vector<std::vector<uint64_t>> data(num_batches);
for (size_t batch = 0; batch < num_batches; ++batch)
{
data[batch].reserve(batch_size);
for (size_t item = 0; item < batch_size; ++item)
data[batch].push_back(dis(gen));
}
const std::string f1 = "dump1.bin";
const std::string f2 = "dump2.bin";
const std::string f3 = "dump3.bin";
double test1[num_batches];
for (size_t batch = 0; batch < num_batches; ++batch)
{
auto t1 = std::chrono::system_clock::now();
if (batch == 0)
writer_std<uint64_t, false>(f1, data[batch]);
else
writer_std<uint64_t, true>(f1, data[batch]);
auto t2 = std::chrono::system_clock::now();
test1[batch] = 1e-6 * std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
}
double test2[num_batches];
for (size_t batch = 0; batch < num_batches; ++batch)
{
auto t1 = std::chrono::system_clock::now();
if (batch == 0)
writer_boost_trunc<uint64_t>(f2, data[batch]);
else
writer_boost_append<uint64_t>(f2, data[batch]);
auto t2 = std::chrono::system_clock::now();
test2[batch] = 1e-6 * std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
}
double test3[num_batches];
for (size_t batch = 0; batch < num_batches; ++batch)
{
auto t1 = std::chrono::system_clock::now();
if (batch == 0)
writer_boost_trunc<uint64_t>(f3, data[batch]);
else
writer_std<uint64_t, true>(f3, data[batch]);
auto t2 = std::chrono::system_clock::now();
test3[batch] = 1e-6 * std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
}
for (size_t batch = 0; batch < num_batches; ++batch)
std::cout << "Time [ms] = " << test1[batch] << " ; " << test2[batch] << " ; " << test3[batch] << std::endl;
bool same1 = true;
for (size_t batch = 0; batch < num_batches; ++batch)
{
std::vector<uint64_t> part = read<uint64_t>(f1, batch * batch_size, batch_size);
for (size_t item = 0; item < batch_size; ++item)
same1 = same1 && part[item] == data[batch][item];
}
std::cout << "Same 1 = " << same1 << std::endl;
bool same2 = true;
for (size_t batch = 0; batch < num_batches; ++batch)
{
std::vector<uint64_t> part = read<uint64_t>(f2, batch * batch_size, batch_size);
for (size_t item = 0; item < batch_size; ++item)
same2 = same2 && part[item] == data[batch][item];
}
std::cout << "Same 2 = " << same2 << std::endl;
bool same3 = true;
for (size_t batch = 0; batch < num_batches; ++batch)
{
std::vector<uint64_t> part = read<uint64_t>(f3, batch * batch_size, batch_size);
for (size_t item = 0; item < batch_size; ++item)
same3 = same3 && part[item] == data[batch][item];
}
std::cout << "Same 3 = " << same3 << std::endl;
boost::filesystem::path p1(f1); boost::filesystem::remove(p1);
boost::filesystem::path p2(f2); boost::filesystem::remove(p2);
boost::filesystem::path p3(f3); boost::filesystem::remove(p3);
return 0;
}