C++对象应该如何序列化

How should C++ objects be serialized?

本文关键字:序列化 对象 C++      更新时间:2023-10-16

我们正在做一个关于高性能计算的项目,该项目使用MPI作为并行计算框架。只有少数算法已经在遗留平台上实现。我们所做的是将原来的串行算法重写为基于MPI的并行版本。

我遇到了这样一个性能问题:在运行基于MPI的并行算法时,多个进程之间存在大量的通信开销。进程间通信由三个步骤组成:

  1. 进程A将一些C++对象序列化为二进制格式
  2. 进程A通过MPI向进程B发送二进制格式的数据
  3. 进程B将二进制格式的数据反序列化为C++对象

我们发现这些通信步骤,特别是序列化/反序列化步骤,花费了大量的时间。我们如何处理这个性能问题?

顺便说一句,在我们的C++代码中,我们使用了很多STL,它比类C结构更复杂。

附言:我现在通过编写代码遍历对象的所有字段并将它们顺序复制到字节数组中来完成这项工作(序列化)。

为了演示我在做什么,这里有一个代码片段。请注意,这只是一个单一的功能构建过程:

sic::GeometryFeature *ptFeature =
    (GeometryFeature *) outLayer->getFeature(iFeature);
sic::Geometry* geom = ptFeature->getGeometry();
std::string geomClassName = geom->getClassName();
sic::Geometry* ptGeom = geom;
unsigned char *wkbBuffer = NULL;
OGRGeometry * gtGeom = NULL;
if (geomClassName == "Point") {
    ptGeom = new sic::MultiPoint();
    ((sic::MultiPoint *) ptGeom)->insert(geom);
    gtGeom = new OGRMultiPoint();
    int wkbSize = ((sic::MultiPoint *) ptGeom)->WkbSize();
    wkbBuffer = (unsigned char *) malloc(wkbSize);
    ((sic::GeometryCollection *) ptGeom)->exportToWkb(sic::wkbNDR,
        wkbBuffer, wkbMultiPoint);
}
} else if (...) {
    ......
}
gtGeom->importFromWkb(wkbBuffer);
free(wkbBuffer);
assert(gtGeom);
OGRFeature * poFeature = OGRFeature::CreateFeature(
     poLayer->GetLayerDefn());
poFeature->SetGeometry(gtGeom);

关于我正在做的序列化对象的更多信息:

unsigned char *bytes = (unsigned char *) malloc(size);
    size_t offset = 0;
    size_t type_size = sizeof(OGRwkbGeometryType);
    OGRwkbGeometryType type = layer->GetGeomType();
    memcpy(bytes + offset, &type, type_size);
    offset += type_size;
    size_t count_size = sizeof(int);
    int count = layer->GetFeatureCount();
    memcpy(bytes + offset, &count, count_size);
    offset += count_size;
    layer->ResetReading();
    for (OGRFeature *feature = layer->GetNextFeature(); feature != NULL;
            feature = layer->GetNextFeature()) {
        OGRGeometry *geometry = feature->GetGeometryRef();
        if (geometry) {
            geometry->exportToWkb(wkbNDR, bytes + offset);
            offset += geometry->WkbSize();
        } else {
            (*(int *) (bytes + type_size))--;
        }
        OGRFeature::DestroyFeature(feature);
    }
    return bytes;

如有任何意见,我们将不胜感激。谢谢

(Brian的回答是帮助您使用库……他是一位经验丰富的程序员,听起来值得一试。)

另外,我看了你的代码——有很多临时缓冲区、新的malloc分配、sizeof的使用等等。所以我想我应该说明一种"快速、简单但很好"的清理方法——足以让你开始。。。

首先创建一个二进制流类型,它考虑并隐藏了许多底层工作:

#include <arpa/inet.h> // for htonl/s, ntoh/s
#include <endian.h> // for htonbe64, if you have it...
#include <iostream>
#include <string>
#include <map>
// support routines - use C++ overloading to polymorphically dispatch htonl/s
// uint64_t hton(uint64_t n) { return htonbe64(n); }
uint32_t hton(uint32_t n) { return htonl(n); }
uint16_t hton(uint16_t n) { return htons(n); }
// there are no "int" versions - this is ugly but effective...
uint32_t hton(int32_t n) { return htonl(n); }
uint16_t hton(int16_t n) { return htons(n); }
// uint64_t ntoh(uint64_t n) { return betoh64(n); }
uint32_t ntoh(uint32_t n) { return ntohl(n); }
uint16_t ntoh(uint16_t n) { return ntohl(n); }
template <typename OStream>
class Binary_OStream : public OStream
{
  public:
    typedef Binary_OStream This;
    This& write(const char* s, std::streamsize n)
    {
        OStream::write(s, n);
        return *this;
    }
    template <typename T>
    This& rawwrite(const T& t)
    {
        static_cast<OStream&>(*this) << '[' << sizeof t << ']';
        return write((const char*)&t, sizeof t);
    }
    template <typename T>
    This& hton(T h)
    {
        T n = ::hton(h);
        return rawwrite(n);
    }
    // conversions for inbuilt & Standard-library types...
    friend This& operator<<(This& bs, bool x) { return bs << (x ? 'T' : 'F'); }
    friend This& operator<<(This& bs, int8_t x) { return bs << x; }
    friend This& operator<<(This& bs, uint8_t x) { return bs << x; }
    friend This& operator<<(This& bs, int16_t x) { return bs.hton(x); }
    friend This& operator<<(This& bs, uint16_t x) { return bs.hton(x); }
    friend This& operator<<(This& bs, int32_t x) { return bs.hton(x); }
    friend This& operator<<(This& bs, uint32_t x) { return bs.hton(x); }
    friend This& operator<<(This& bs, double d) { return bs.rawwrite(d); }
    friend This& operator<<(This& bs, const std::string& x)
    {
        bs << x.size();
        return bs.write(x.data(), x.size());
    }
    template <typename K, typename V, typename A>
    friend This& operator<<(This& bs, const std::map<K, V, A>& m)
    {
        typedef typename std::map<K, V, A>::const_iterator It;
        bs << m.size();
        for (It it = m.begin(); it != m.end(); ++it)
            bs << it->first << it->second;
        return bs;
    }
    // add any others you want...
};

正在创建用户定义的二进制可串行化类型。。。

// for your own objects...    
struct Object
{
    Object(const std::string& s, double x) : s_(s), x_(x) { }
    std::string s_;
    double x_;
    // specify how you want binary serialisation performed (which fields/order etc)
    template <typename T>
    friend Binary_OStream<T>& operator<<(Binary_OStream<T>& os, const Object& o)
    {
        return os << o.s_ << o.x_;
    }
};

示例用法:

#include <iomanip>
#include <sstream>
// support routines just to help you observe/debug the serialisation...
std::string printable(char c)
{
    std::ostringstream oss;
    if (isprint(c))
        oss << c;
    else
        oss << "\x" << std::hex << std::setw(2) << std::setfill('0')
            << (int)(uint8_t)c << std::dec;
    return oss.str();
}
std::string printable(const std::string& s)
{
    std::string result;
    for (std::string::const_iterator i = s.begin(); i != s.end(); ++i)
        result += printable(*i);
    return result;
}
int main()
{
    {
        Binary_OStream<std::ostringstream> bs;
        Object o("pi", 3.14);
        bs << o;
        std::cout << "serialised to '" << printable(bs.str()) << "'n";
    }
    {
        Binary_OStream<std::ostringstream> bs;
        std::map<int, std::string> m;
        m[0] = "zero";
        m[1] = "one";
        m[2] = "two";
        bs << m;
        std::cout << "serialised to '" << printable(bs.str()) << "'n";
    }
}

下一步是创建一个Binary_IStream——它与上面的非常非常相似。(boost通过使用"%"运算符而不是传统的<<>>来稍微减少工作量,这样同一函数可以指定用于序列化和取消序列化的字段。)

实施说明/想法:

  • 如果愿意,可以从Binary_Stream中删除模板参数,并让构造函数将任意std::ostream&存储到private成员变量中,然后将所有流操作发送到该数据成员。
    • 这样做的优点是,可以最大限度地减少不同流类型的实例化带来的代码膨胀,允许实现从翻译单元隐藏起来,并在以后进行链接(有助于在大型项目中缩短编译时间),还可以随时将Binary_Stream附加到任何现有流(如果有人向您传递预先存在的流,那就太好了)
    • "缺点"是,您必须显式地转发给任何其他ostream成员函数,这些函数希望Binary_Stream用户能够访问(更具控制性但乏味),或者提供一个(不太方便/优雅?)std::ostream& stream() { return s_; }风格的访问器