静态多态性与boost变体单访问者与多访问者与动态多态性

Static Polymorphism with boost variant single visitor vs multi visitor vs dynamic polymorphism

本文关键字：多态性访问者动态 boost 静态更新时间：2023-10-16

我正在比较以下C++多态性方法的性能：

方法[1]。使用boost变体的静态多态性，每个方法都有一个单独的访问者方法[2]。使用boost变体的静态多态性与使用方法重载调用不同方法的单个访问者方法[3]。普通老动态多态性

平台：-Intel x86 64位Red Hat现代多核处理器，32 GB RAM-gcc（gcc）4.8.1与-O2优化-提升1.6.0

一些发现：

方法[1]似乎明显优于方法[2]和[3]
方法[3]在大多数情况下都优于方法[2]

我的问题是，为什么我使用访问者但使用方法重载来调用正确方法的方法[2]的性能比虚拟方法差。我希望静态多态性比动态多态性更好。我知道在方法[2]中传递额外参数以确定要调用类的哪一个visit（）方法是有代价的，并且可能由于方法重载而导致更多分支？但这难道不应该仍然优于虚拟方法吗？

代码如下：

// qcpptest.hpp
#ifndef INCLUDED_QCPPTEST_H
#define INCLUDED_QCPPTEST_H
#include <boost/variant.hpp>
class IShape {
 public:
  virtual void rotate() = 0;
  virtual void spin() = 0;
};
class Square : public IShape {
 public:
  void rotate() {
   // std::cout << "Square:I am rotating" << std::endl;
    }
  void spin() { 
    // std::cout << "Square:I am spinning" << std::endl; 
  }
};
class Circle : public IShape {
 public:
  void rotate() { 
    // std::cout << "Circle:I am rotating" << std::endl; 
  }
  void spin() {
   // std::cout << "Circle:I am spinning" << std::endl; 
}
};
// template variation
// enum class M {ADD, DEL};
struct ADD {};
struct DEL {};
class TSquare {
    int i;
 public:
    void visit(const ADD& add) {
        this->i++;
    // std::cout << "TSquare:I am rotating" << std::endl;
  }
    void visit(const DEL& del) {
        this->i++;
    // std::cout << "TSquare:I am spinning" << std::endl;
  }
    void spin() {
        this->i++;
     // std::cout << "TSquare:I am rotating" << std::endl; 
 }
    void rotate() {
        this->i++;
     // std::cout << "TSquare:I am spinning" << std::endl; 
 }
};
class TCircle {
    int i;
 public:
    void visit(const ADD& add) {
        this->i++;
    // std::cout << "TCircle:I am rotating" << std::endl;
  }
    void visit(const DEL& del) {
        this->i++;
    // std::cout << "TCircle:I am spinning" << std::endl;
  }
    void spin() { 
        this->i++;
        // std::cout << "TSquare:I am rotating" << std::endl; 
    }
    void rotate() {
    this->i++; 
        // std::cout << "TSquare:I am spinning" << std::endl; 
    }
};
class MultiVisitor : public boost::static_visitor<void> {
 public:
  template <typename T, typename U>
    void operator()(T& t, const U& u) {
    // std::cout << "visit" << std::endl;
    t.visit(u);
  }
};
// separate visitors, single dispatch
class RotateVisitor : public boost::static_visitor<void> {
 public:
  template <class T>
  void operator()(T& x) {
    x.rotate();
  }
};
class SpinVisitor : public boost::static_visitor<void> {
 public:
  template <class T>
  void operator()(T& x) {
    x.spin();
  }
};
#endif
// qcpptest.cpp
#include <iostream>
#include "qcpptest.hpp"
#include <vector>
#include <boost/chrono.hpp>
using MV = boost::variant<ADD, DEL>;
// MV const add = M::ADD;
// MV const del = M::DEL;
static MV const add = ADD();
static MV const del = DEL();
void make_virtual_shapes(int iters) {
  // std::cout << "make_virtual_shapes" << std::endl;
  std::vector<IShape*> shapes;
  shapes.push_back(new Square());
  shapes.push_back(new Circle());
  boost::chrono::high_resolution_clock::time_point start =
      boost::chrono::high_resolution_clock::now();
  for (int i = 0; i < iters; i++) {
    for (IShape* shape : shapes) {
      shape->rotate();
      shape->spin();
    }
  }
  boost::chrono::nanoseconds nanos =
      boost::chrono::high_resolution_clock::now() - start;
  std::cout << "make_virtual_shapes took " << nanos.count() * 1e-6
            << " millisn";
}
void make_template_shapes(int iters) {
  // std::cout << "make_template_shapes" << std::endl;
  using TShapes = boost::variant<TSquare, TCircle>;
  // using MV = boost::variant< M >;
  // xyz
  std::vector<TShapes> tshapes;
  tshapes.push_back(TSquare());
  tshapes.push_back(TCircle());
  MultiVisitor mv;
  boost::chrono::high_resolution_clock::time_point start =
      boost::chrono::high_resolution_clock::now();
  for (int i = 0; i < iters; i++) {
    for (TShapes& shape : tshapes) {
      boost::apply_visitor(mv, shape, add);
      boost::apply_visitor(mv, shape, del);
      // boost::apply_visitor(sv, shape);
    }
  }
  boost::chrono::nanoseconds nanos =
      boost::chrono::high_resolution_clock::now() - start;
  std::cout << "make_template_shapes took " << nanos.count() * 1e-6
            << " millisn";
}
void make_template_shapes_single(int iters) {
  // std::cout << "make_template_shapes_single" << std::endl;
  using TShapes = boost::variant<TSquare, TCircle>;
  // xyz
  std::vector<TShapes> tshapes;
  tshapes.push_back(TSquare());
  tshapes.push_back(TCircle());
  SpinVisitor sv;
  RotateVisitor rv;
  boost::chrono::high_resolution_clock::time_point start =
      boost::chrono::high_resolution_clock::now();
  for (int i = 0; i < iters; i++) {
    for (TShapes& shape : tshapes) {
      boost::apply_visitor(rv, shape);
      boost::apply_visitor(sv, shape);
    }
  }
  boost::chrono::nanoseconds nanos =
      boost::chrono::high_resolution_clock::now() - start;
  std::cout << "make_template_shapes_single took " << nanos.count() * 1e-6
            << " millisn";
}
int main(int argc, const char* argv[]) {
  std::cout << "Hello, cmake" << std::endl;
  int iters = atoi(argv[1]);
  make_virtual_shapes(iters);
  make_template_shapes(iters);
  make_template_shapes_single(iters);
  return 0;
}

方法2基本上是低效地重新实现动态调度。当你有：

shape->rotate();
shape->spin();

这涉及到在vtable中查找正确的函数并调用它。查找的效率很低。但当你有：

boost::apply_visitor(mv, shape, add);

它大致分解为（假设一个add<>成员函数模板，它只是一个没有检查的reinterpret_cast）：

if (shape.which() == 0) {
    if (add.which() == 0) {
        mv(shape.as<TSquare&>(), add.as<ADD&>());
    }
    else if (add.which() == 1) {
        mv(shape.as<TSquare&>(), add.as<DEL&>());
    }
    else {
        // ???
    }
}
else if (shape.which() == 1) {
    if (add.which() == 0) {
        mv(shape.as<TCircle&>(), add.as<ADD&>());
    }
    else if (add.which() == 1) {
        mv(shape.as<TCircle&>(), add.as<DEL&>());
    }
    else {
        // ???
    }
}
else {
   // ???
}

在这里，我们有一个分支的组合爆炸（在方法1中我们不必这样做），但实际上我们必须检查每个变体的每个可能的静态类型，以找出我们必须做的事情（在方法3中我们不必须这样做）。而且这些分支是无法预测的，因为你每次都要使用不同的分支，所以你无法在不急剧停止的情况下传输任何类型的代码。

mv()上的重载是免费的——这是为了弄清楚我们用什么来调用mv，而不是。还要注意，根据改变两个轴中的任何一个会发生的增量时间：

+---------------+----------------+----------------+----------+
|               |    Method 1    |    Method 2    | Method 3 |
+---------------+----------------+----------------+----------+
|    New Type   | More Expensive | More Expensive |   Free   |
| New Operation |      Free      | More Expensive |   Free*  |
+---------------+----------------+----------------+----------+

方法1在添加新类型方面变得更加昂贵，因为我们必须显式地迭代所有类型。添加新操作是免费的，因为操作是什么并不重要。

方法3可以自由地添加新类型，也可以自由地增加新操作——唯一的变化是增加vtable。由于对象大小的原因，这将产生一些影响，但通常会小于对类型增加的迭代。