表达式模板和 C++11.

Expression templates and C++11

本文关键字：C++11 表达式更新时间：2023-10-16

让我们来看看表达式模板的一个特殊好处：ET 可用于避免内存中出现矢量大小的临时，这些临时出现在重载运算符中，例如：

template<typename T>
std::vector<T> operator+(const std::vector<T>& a, const std::vector<T>& b)
{
  std::vector<T> tmp;   // vector-sized temporary
  for_each(...);
  return tmp;
}

在 C++11 中，此函数的 return 语句应用移动语义。没有矢量的副本。这是一场胜利。

但是，如果我看一个简单的表达式，例如

d = a + b + c;

我看到上面的函数被调用了两次(对于两个operator+(，而最终的赋值可以用移动语义来完成。

总共执行 2 个循环。意味着我放了一个临时的，然后马上读了回来。对于大向量，这会从缓存中消失。这比表达式模板更糟糕。他们可以在 1 个循环中完成整个事情。ET 可以执行上述代码，相当于：

for(int i=0 ; i < vec_length ; ++i)
  d[i] = a[i] + b[i] + c[i];

我想知道 lambda 与移动语义或任何其他新功能是否可以像 ET 一样好。有什么想法吗？

编辑：

基本上，使用 ET 技术，编译器构建一个解析树类似于代数表达式，它的类型系统。此树由内部节点和叶节点组成。这内部节点表示运算(加法、乘法等(和叶节点表示对数据对象的引用。

我试图以堆栈计算机：从操作堆栈中获取操作并拉取参数堆栈中的下一个参数并计算操作。将结果放回堆栈上等待操作。

表示这两个不同的对象(操作堆栈和数据叶堆栈(我将一个用于操作的std::tuple和一个捆绑在一起 std::tuple数据会进入std::pair<>。最初我使用了std:vector，但这会导致运行时开销。

整个过程分为两个阶段：堆垛机初始化其中初始化操作和参数堆栈。而通过分配配对容器触发的评估阶段到向量。

我创建了一个包含私有array<int,5>的类Vec(有效负载(，并且具有重载赋值运算符采取"表达"。

全局operator*对于所有获取组合都过载 Vec和"表达式"，以便在情况下也能正确处理我们拥有的不仅仅是a*b.(注意，我为此切换了乘法的教育示例 - 基本上是为了快速发现汇编程序中的imull。

在评估开始之前首先要做的是"提取"的涉及的Vec对象中的值并初始化参数叠。这是必要的，不要有不同种类的物体躺着周围：可索引向量和不可索引结果。这就是 Extractor是为了。又是一件好事：使用了可变参数模板在这种情况下，不会产生运行时开销(所有这些都在编译时(。

整个事情都有效。表达式得到了很好的评估(我也添加了添加，但此处省略了以适合代码(。下面您可以看到汇编程序输出。只是原始的计算，和你一模一样希望它成为：与ET技术相媲美。

结果。C++11 的新语言功能提供了可变参数模板(以及模板元编程(打开了编译时计算的区域。我在这里展示了如何可变参数模板可用于生成与传统的ET技术。

#include<algorithm>
#include<iostream>
#include<vector>
#include<tuple>
#include<utility>
#include<array>

template<typename Target,typename Tuple, int N, bool end>
struct Extractor {
  template < typename ... Args >
  static Target index(int i,const Tuple& t, Args && ... args)
  {
    return Extractor<Target, Tuple,  N+1, 
             std::tuple_size<Tuple>::value == N+1>::
      index(i, t , std::forward<Args>(args)..., std::get<N>(t).vec[i] );
  }
};
template < typename Target, typename Tuple, int N >
struct Extractor<Target,Tuple,N,true>
{
    template < typename ... Args >
    static Target index(int i,Tuple const& t, 
            Args && ... args) { 
      return Target(std::forward<Args>(args)...); }
};
template < typename ... Vs > 
std::tuple<typename std::remove_reference<Vs>::type::type_t...>
extract(int i , const std::tuple<Vs...>& tpl)
{
  return Extractor<std::tuple<typename std::remove_reference<Vs>::type::type_t...>,
           std::tuple<Vs...>, 0,
           std::tuple_size<std::tuple<Vs...> >::value == 0>::index(i,tpl);
}

struct Vec {
  std::array<int,5> vec;
  typedef int type_t;
  template<typename... OPs,typename... VALs>
  Vec& operator=(const std::pair< std::tuple<VALs...> , std::tuple<OPs...> >& e) {
    for( int i = 0 ; i < vec.size() ; ++i ) {
      vec[i] = eval( extract(i,e.first) , e.second );
    }
  }
};


template<int OpPos,int ValPos, bool end>
struct StackMachine {
  template<typename... OPs,typename... VALs>
  static void eval_pos( std::tuple<VALs...>& vals , const std::tuple<OPs...> & ops )
  {
    std::get<ValPos+1>( vals ) =
      std::get<OpPos>(ops).apply( std::get<ValPos>( vals ) , 
                  std::get<ValPos+1>( vals ) );
    StackMachine<OpPos+1,ValPos+1,sizeof...(OPs) == OpPos+1>::eval_pos(vals,ops);
  }
};
template<int OpPos,int ValPos>
struct StackMachine<OpPos,ValPos,true> {
  template<typename... OPs,typename... VALs>
  static void eval_pos( std::tuple<VALs...>& vals , 
            const std::tuple<OPs...> & ops )
  {}
};

template<typename... OPs,typename... VALs>
int eval( const std::tuple<VALs...>& vals , const std::tuple<OPs...> & ops )
{
  StackMachine<0,0,false>::eval_pos(const_cast<std::tuple<VALs...>&>(vals),ops);
  return std::get<sizeof...(OPs)>(vals);
}


struct OpMul {
  static int apply(const int& lhs,const int& rhs)  {
    return lhs*rhs;
  }
};
std::pair< std::tuple< const Vec&, const Vec& > , std::tuple<OpMul> >
operator*(const Vec& lhs,const Vec& rhs)
{
  return std::make_pair( std::tuple< const Vec&, const Vec& >( lhs , rhs ) , 
             std::tuple<OpMul>( OpMul() ) );
}
template<typename... OPs,typename... VALs>
std::pair< std::tuple< const Vec&, VALs... > , std::tuple<OPs...,OpMul> >
operator*(const Vec& lhs,const std::pair< std::tuple< VALs... > , std::tuple<OPs...> >& rhs)
{
  return std::make_pair( std::tuple_cat( rhs.first , std::tuple< const Vec& >(lhs)  ) , 
             std::tuple_cat( rhs.second , std::tuple<OpMul>( OpMul() )  ) );
}
template<typename... OPs,typename... VALs>
std::pair< std::tuple< const Vec&, VALs... > , std::tuple<OPs...,OpMul> >
operator*(const std::pair< std::tuple< VALs... > , std::tuple<OPs...> >& lhs,
      const Vec& rhs)
{
  return std::make_pair( std::tuple_cat( lhs.first , std::tuple< const Vec& >(rhs)  ) , 
             std::tuple_cat( lhs.second , std::tuple<OpMul>( OpMul() ) ) );
}
int main()
{
  Vec d,c,b,a;

  for( int i = 0 ; i < d.vec.size() ; ++i ) {
    a.vec[i] = 10+i;
    b.vec[i] = 20+i;
    c.vec[i] = 30+i;
    d.vec[i] = 0;
  }
  d = a * b * c * a;
  for( int i = 0 ; i < d.vec.size() ; ++i ) 
    std::cout << d.vec[i] << std::endl;
}

用g++-4.6 -O3生成的汇编程序(我不得不在向量初始化中加入一些运行时依赖性，以便编译器不会在编译时计算整个事情，并且您实际上可以看到imull的引点。

imull   %esi, %edx
imull   32(%rsp), %edx
imull   %edx, %esi
movl    68(%rsp), %edx
imull   %ecx, %edx
movl    %esi, (%rsp)
imull   36(%rsp), %edx
imull   %ecx, %edx
movl    104(%rsp), %ecx
movl    %edx, 4(%rsp)
movl    72(%rsp), %edx
imull   %ecx, %edx
imull   40(%rsp), %edx
imull   %ecx, %edx
movl    108(%rsp), %ecx
movl    %edx, 8(%rsp)
movl    76(%rsp), %edx
imull   %ecx, %edx
imull   44(%rsp), %edx
imull   %ecx, %edx
movl    112(%rsp), %ecx
movl    %edx, 12(%rsp)
movl    80(%rsp), %edx
imull   %ecx, %edx
imull   %eax, %edx
imull   %ecx, %edx
movl    %edx, 16(%rsp)

我想知道 lambda 与移动语义或任何其他新功能是否可以像 ET 一样好。有什么想法吗？

快速回答

移动语义本身并不是万能的——C++11 中仍然需要表达式模板 (ET( 等技术来消除移动数据等开销！因此，在深入研究我的其余答案、移动语义等之前快速回答您的问题并不能完全取代 ET，正如我的答案如下所示。

详细答案

ET 通常会返回代理对象以将评估推迟到以后，因此在触发计算的代码之前，C++11 语言功能不会立即产生明显的好处。也就是说，人们不想编写 ET 代码，但是，在使用代理构建表达式树期间触发运行时代码生成。很好，C++11的移动语义和完美的转发可以帮助避免这种开销。(这在C++03中是不可能的。

从本质上讲，在编写ET时，人们希望在调用所涉及的代理对象的成员函数后，以某种方式利用语言功能来生成最佳代码。在 C++11 中，这将包括使用完美转发、移动语义而不是复制等，如果实际上仍然需要超出编译器已经可以做的事情。游戏的名称是最小化生成的运行时代码和/或最大化运行时速度和/或最小化运行时开销。

我想实际尝试一些具有 C++11 功能的 ET，看看我是否可以使用 a = b + c + d; 表达式消除所有中间临时实例类型。(因为这只是我正常活动的一个有趣的休息时间，所以我没有将其与纯粹使用 C++03 进行比较或编写 ET 代码。我也不担心下面显示的代码润色的所有方面。

首先，我没有使用 lambdas——

因为我更喜欢使用显式类型和函数——所以我不会就你的问题争论支持/反对 lambdas。我的猜测是，它们类似于使用函子，并且性能不会比下面的非 ET 代码更好(即需要移动(——至少在编译器可以使用自己的内部 ET 自动优化 lambda 之前。然而，我编写的代码利用了移动语义和完美转发。这是我从结果开始，然后最终呈现代码所做的。

我创建了一个math_vector<N>类，其中N==3，它定义了std::array<long double, N>的内部私有实例。成员是默认构造函数、复制和移动构造函数和赋值、初始值设定项列表构造函数、析构函数、swap(( 成员、用于访问向量元素的运算符 [] 和运算符 +=。在没有任何表达式模板的情况下使用，以下代码：

{
  cout << "CASE 1:n";
  math_vector<3> a{1.0, 1.1, 1.2};
  math_vector<3> b{2.0, 2.1, 2.2};
  math_vector<3> c{3.0, 3.1, 3.2};
  math_vector<3> d{4.0, 4.1, 4.2};
  math_vector<3> result = a + b + c + d;
  cout << '[' << &result << "]: " << result << "n";
}

输出(使用 clang++ 3.1 或 g++ 4.8 编译时为 - std=c++11 -O3(：

CASE 1:
0x7fff8d6edf50: math_vector(initlist)
0x7fff8d6edef0: math_vector(initlist)
0x7fff8d6ede90: math_vector(initlist)
0x7fff8d6ede30: math_vector(initlist)
0x7fff8d6edd70: math_vector(copy: 0x7fff8d6edf50)
0x7fff8d6edda0: math_vector(move: 0x7fff8d6edd70)
0x7fff8d6eddd0: math_vector(move: 0x7fff8d6edda0)
0x7fff8d6edda0: ~math_vector()
0x7fff8d6edd70: ~math_vector()
[0x7fff8d6eddd0]: (10,10.4,10.8)
0x7fff8d6eddd0: ~math_vector()
0x7fff8d6ede30: ~math_vector()
0x7fff8d6ede90: ~math_vector()
0x7fff8d6edef0: ~math_vector()
0x7fff8d6edf50: ~math_vector()

即，使用初始值设定项列表(即initlist项(、result变量(即0x7fff8d6eddd0(的四个显式构造实例，并且，还使另外三个对象复制和移动。

为了只关注临时和移动，我创建了第二个案例，它只创建result作为命名变量 - 所有其他都是右值：

{
  cout << "CASE 2:n";
  math_vector<3> result =
    math_vector<3>{1.0, 1.1, 1.2} +
    math_vector<3>{2.0, 2.1, 2.2} +
    math_vector<3>{3.0, 3.1, 3.2} +
    math_vector<3>{4.0, 4.1, 4.2}
  ;
  cout << '[' << &result << "]: " << result << "n";
}

输出以下内容(再次不使用 ET 时(：

CASE 2:
0x7fff8d6edcb0: math_vector(initlist)
0x7fff8d6edc50: math_vector(initlist)
0x7fff8d6edce0: math_vector(move: 0x7fff8d6edcb0)
0x7fff8d6edbf0: math_vector(initlist)
0x7fff8d6edd10: math_vector(move: 0x7fff8d6edce0)
0x7fff8d6edb90: math_vector(initlist)
0x7fff8d6edd40: math_vector(move: 0x7fff8d6edd10)
0x7fff8d6edb90: ~math_vector()
0x7fff8d6edd10: ~math_vector()
0x7fff8d6edbf0: ~math_vector()
0x7fff8d6edce0: ~math_vector()
0x7fff8d6edc50: ~math_vector()
0x7fff8d6edcb0: ~math_vector()
[0x7fff8d6edd40]: (10,10.4,10.8)
0x7fff8d6edd40: ~math_vector()

哪个更好：只创建额外的移动对象。

但我想要更好的：我

想要零额外的临时，并且让代码就像我用一个普通的编码警告硬编码它一样：所有显式实例化的类型仍将被创建(即，四个initlist构造函数和result(。为此，我随后添加了表达式模板代码，如下所示：

创建了一个代理math_vector_expr<LeftExpr,BinaryOp,RightExpr>类来保存尚未计算的表达式，
创建了一个代理plus_op类来保存加法操作，
将构造函数添加到math_vector以接受math_vector_expr对象，并且，
添加了"初学者"成员函数以触发表达式模板的创建。

使用 ET 的结果非常棒：无论哪种情况都没有额外的临时人员！上面的前两种情况现在输出：

CASE 1:
0x7fffe7180c60: math_vector(initlist)
0x7fffe7180c90: math_vector(initlist)
0x7fffe7180cc0: math_vector(initlist)
0x7fffe7180cf0: math_vector(initlist)
0x7fffe7180d20: math_vector(expr: 0x7fffe7180d90)
[0x7fffe7180d20]: (10,10.4,10.8)
0x7fffe7180d20: ~math_vector()
0x7fffe7180cf0: ~math_vector()
0x7fffe7180cc0: ~math_vector()
0x7fffe7180c90: ~math_vector()
0x7fffe7180c60: ~math_vector()
CASE 2:
0x7fffe7180dd0: math_vector(initlist)
0x7fffe7180e20: math_vector(initlist)
0x7fffe7180e70: math_vector(initlist)
0x7fffe7180eb0: math_vector(initlist)
0x7fffe7180d20: math_vector(expr: 0x7fffe7180dc0)
0x7fffe7180eb0: ~math_vector()
0x7fffe7180e70: ~math_vector()
0x7fffe7180e20: ~math_vector()
0x7fffe7180dd0: ~math_vector()
[0x7fffe7180d20]: (10,10.4,10.8)
0x7fffe7180d20: ~math_vector()

即，每种情况下正好有 5 个构造函数调用和 5 个析构函数调用。事实上，如果你要求编译器在 4 个initlist构造函数调用和输出之间生成汇编代码result则会得到这样一串漂亮的汇编代码：

fldt    128(%rsp)
leaq    128(%rsp), %rdi
leaq    80(%rsp), %rbp
fldt    176(%rsp)
faddp   %st, %st(1)
fldt    224(%rsp)
faddp   %st, %st(1)
fldt    272(%rsp)
faddp   %st, %st(1)
fstpt   80(%rsp)
fldt    144(%rsp)
fldt    192(%rsp)
faddp   %st, %st(1)
fldt    240(%rsp)
faddp   %st, %st(1)
fldt    288(%rsp)
faddp   %st, %st(1)
fstpt   96(%rsp)
fldt    160(%rsp)
fldt    208(%rsp)
faddp   %st, %st(1)
fldt    256(%rsp)
faddp   %st, %st(1)
fldt    304(%rsp)
faddp   %st, %st(1)
fstpt   112(%rsp)

使用 g++ 和 clang++ 输出相似(甚至更小(的代码。没有函数调用等 - 只是一堆添加，这正是人们想要的！

实现此目的的 C++11 代码如下。只需#define DONT_USE_EXPR_TEMPL不使用 ET，或者根本不定义它即可使用 ET。

#include <array>
#include <algorithm>
#include <initializer_list>
#include <type_traits>
#include <iostream>
//#define DONT_USE_EXPR_TEMPL
//===========================================================================
template <std::size_t N> class math_vector;
template <
  typename LeftExpr,
  typename BinaryOp,
  typename RightExpr
>
class math_vector_expr
{
  public:
    math_vector_expr() = delete;
    math_vector_expr(LeftExpr l, RightExpr r) : 
      l_(std::forward<LeftExpr>(l)), 
      r_(std::forward<RightExpr>(r))
    {
    }
    // Prohibit copying...
    math_vector_expr(math_vector_expr const&) = delete;
    math_vector_expr& operator =(math_vector_expr const&) = delete;
    // Allow moves...
    math_vector_expr(math_vector_expr&&) = default;
    math_vector_expr& operator =(math_vector_expr&&) = default;
    template <typename RE>
    auto operator +(RE&& re) const ->
      math_vector_expr<
        math_vector_expr<LeftExpr,BinaryOp,RightExpr> const&,
        BinaryOp,
        decltype(std::forward<RE>(re))
      >
    {
      return 
        math_vector_expr<
          math_vector_expr<LeftExpr,BinaryOp,RightExpr> const&,
          BinaryOp,
          decltype(std::forward<RE>(re))
        >(*this, std::forward<RE>(re))
      ;
    }
    auto le() -> 
      typename std::add_lvalue_reference<LeftExpr>::type
      { return l_; }
    auto le() const ->
      typename std::add_lvalue_reference<
        typename std::add_const<LeftExpr>::type
      >::type
      { return l_; }
    auto re() -> 
      typename std::add_lvalue_reference<RightExpr>::type
      { return r_; }
    auto re() const -> 
      typename std::add_lvalue_reference<
        typename std::add_const<RightExpr>::type
      >::type
      { return r_; }
    auto operator [](std::size_t index) const ->
      decltype(
        BinaryOp::apply(this->le()[index], this->re()[index])
      )
    {
      return BinaryOp::apply(le()[index], re()[index]);
    }
  private:
    LeftExpr l_;
    RightExpr r_;
};
//===========================================================================
template <typename T>
struct plus_op
{
  static T apply(T const& a, T const& b)
  {
    return a + b;
  }
  static T apply(T&& a, T const& b)
  {
    a += b;
    return std::move(a);
  }
  static T apply(T const& a, T&& b)
  {
    b += a;
    return std::move(b);
  }
  static T apply(T&& a, T&& b)
  {
    a += b;
    return std::move(a);
  }
};
//===========================================================================
template <std::size_t N>
class math_vector
{
  using impl_type = std::array<long double, N>;
  public:
    math_vector()
    {
      using namespace std;
      fill(begin(v_), end(v_), impl_type{});
      std::cout << this << ": math_vector()" << endl;
    }
    math_vector(math_vector const& mv) noexcept
    {
      using namespace std;
      copy(begin(mv.v_), end(mv.v_), begin(v_));
      std::cout << this << ": math_vector(copy: " << &mv << ")" << endl;
    }
    math_vector(math_vector&& mv) noexcept
    {
      using namespace std;
      move(begin(mv.v_), end(mv.v_), begin(v_));
      std::cout << this << ": math_vector(move: " << &mv << ")" << endl;
    }
    math_vector(std::initializer_list<typename impl_type::value_type> l)
    {
      using namespace std;
      copy(begin(l), end(l), begin(v_));
      std::cout << this << ": math_vector(initlist)" << endl;
    }
    math_vector& operator =(math_vector const& mv) noexcept
    {
      using namespace std;
      copy(begin(mv.v_), end(mv.v_), begin(v_));
      std::cout << this << ": math_vector op =(copy: " << &mv << ")" << endl;
      return *this;
    }
    math_vector& operator =(math_vector&& mv) noexcept
    {
      using namespace std;
      move(begin(mv.v_), end(mv.v_), begin(v_));
      std::cout << this << ": math_vector op =(move: " << &mv << ")" << endl;
      return *this;
    }
    ~math_vector()
    {
      using namespace std;
      std::cout << this << ": ~math_vector()" << endl;
    }
    void swap(math_vector& mv)
    {
      using namespace std;
      for (std::size_t i = 0; i<N; ++i)
        swap(v_[i], mv[i]);
    }
    auto operator [](std::size_t index) const
      -> typename impl_type::value_type const&
    {
      return v_[index];
    }
    auto operator [](std::size_t index)
      -> typename impl_type::value_type&
    {
      return v_[index];
    }
    math_vector& operator +=(math_vector const& b)
    {
      for (std::size_t i = 0; i<N; ++i)
        v_[i] += b[i];
      return *this;
    }
  #ifndef DONT_USE_EXPR_TEMPL
    template <typename LE, typename Op, typename RE>
    math_vector(math_vector_expr<LE,Op,RE>&& mve)
    {
      for (std::size_t i = 0; i < N; ++i)
        v_[i] = mve[i];
      std::cout << this << ": math_vector(expr: " << &mve << ")" << std::endl;
    }
    template <typename RightExpr>
    math_vector& operator =(RightExpr&& re)
    {
      for (std::size_t i = 0; i<N; ++i)
        v_[i] = re[i];
      return *this;
    }
    template <typename RightExpr>
    math_vector& operator +=(RightExpr&& re)
    {
      for (std::size_t i = 0; i<N; ++i)
        v_[i] += re[i];
      return *this;
    }
    template <typename RightExpr>
    auto operator +(RightExpr&& re) const ->
      math_vector_expr<
        math_vector const&, 
        plus_op<typename impl_type::value_type>,
        decltype(std::forward<RightExpr>(re))
      >
    {
      return 
        math_vector_expr<
          math_vector const&, 
          plus_op<typename impl_type::value_type>, 
          decltype(std::forward<RightExpr>(re))
        >(
          *this, 
          std::forward<RightExpr>(re)
        )
      ;
    }
  #endif // #ifndef DONT_USE_EXPR_TEMPL
  private:
    impl_type v_;
};
//===========================================================================
template <std::size_t N>
inline void swap(math_vector<N>& a, math_vector<N>& b)
{
  a.swap(b);
}
//===========================================================================
#ifdef DONT_USE_EXPR_TEMPL
template <std::size_t N>
inline math_vector<N> operator +(
  math_vector<N> const& a, 
  math_vector<N> const& b
)
{
  math_vector<N> retval(a);
  retval += b;
  return retval;
}
template <std::size_t N>
inline math_vector<N> operator +(
  math_vector<N>&& a, 
  math_vector<N> const& b
)
{
  a += b;
  return std::move(a);
}
template <std::size_t N>
inline math_vector<N> operator +(
  math_vector<N> const& a, 
  math_vector<N>&& b
)
{
  b += a;
  return std::move(b);
}
template <std::size_t N>
inline math_vector<N> operator +(
  math_vector<N>&& a, 
  math_vector<N>&& b
)
{
  a += std::move(b);
  return std::move(a);
}
#endif // #ifdef DONT_USE_EXPR_TEMPL
//===========================================================================
template <std::size_t N>
std::ostream& operator <<(std::ostream& os, math_vector<N> const& mv)
{
  os << '(';
  for (std::size_t i = 0; i < N; ++i)
    os << mv[i] << ((i+1 != N) ? ',' : ')');
  return os;
}
//===========================================================================
int main()
{
  using namespace std;
  try
  {
    {
      cout << "CASE 1:n";
      math_vector<3> a{1.0, 1.1, 1.2};
      math_vector<3> b{2.0, 2.1, 2.2};
      math_vector<3> c{3.0, 3.1, 3.2};
      math_vector<3> d{4.0, 4.1, 4.2};
      math_vector<3> result = a + b + c + d;
      cout << '[' << &result << "]: " << result << "n";
    }
    cout << endl;
    {
      cout << "CASE 2:n";
      math_vector<3> result =
        math_vector<3>{1.0, 1.1, 1.2} +
        math_vector<3>{2.0, 2.1, 2.2} +
        math_vector<3>{3.0, 3.1, 3.2} +
        math_vector<3>{4.0, 4.1, 4.2}
      ;
      cout << '[' << &result << "]: " << result << "n";
    }
  }
  catch (...)
  {
    return 1;
  }
}
//===========================================================================

这是 Paul Preney 代码的更正版本。我已通过电子邮件和评论通知作者;我写了一篇编辑，但被不合格的审稿人拒绝了。

原始代码中的错误是修复了 math_vector_expr：：operator+ 的二进制操作模板参数。

#include <array>
#include <algorithm>
#include <initializer_list>
#include <type_traits>
#include <iostream>
//#define DONT_USE_EXPR_TEMPL
//===========================================================================
template <std::size_t N> class math_vector;
template <typename T> struct plus_op;

template <
  typename LeftExpr,
  typename BinaryOp,
  typename RightExpr
>
class math_vector_expr
{
  public:
    typedef typename std::remove_reference<LeftExpr>::type::value_type value_type;
    math_vector_expr() = delete;
    math_vector_expr(LeftExpr l, RightExpr r) :
      l_(std::forward<LeftExpr>(l)),
      r_(std::forward<RightExpr>(r))
    {
    }
    // Prohibit copying...
    math_vector_expr(math_vector_expr const&) = delete;
    math_vector_expr& operator =(math_vector_expr const&) = delete;
    // Allow moves...
    math_vector_expr(math_vector_expr&&) = default;
    math_vector_expr& operator =(math_vector_expr&&) = default;
    template <typename RE>
    auto operator +(RE&& re) const ->
      math_vector_expr<
        math_vector_expr<LeftExpr,BinaryOp,RightExpr> const&,
        plus_op<value_type>,
        decltype(std::forward<RE>(re))
      >
    {
      return
        math_vector_expr<
          math_vector_expr<LeftExpr,BinaryOp,RightExpr> const&,
          plus_op<value_type>,
          decltype(std::forward<RE>(re))
        >(*this, std::forward<RE>(re))
      ;
    }
    auto le() ->
      typename std::add_lvalue_reference<LeftExpr>::type
      { return l_; }
    auto le() const ->
      typename std::add_lvalue_reference<
        typename std::add_const<LeftExpr>::type
      >::type
      { return l_; }
    auto re() ->
      typename std::add_lvalue_reference<RightExpr>::type
      { return r_; }
    auto re() const ->
      typename std::add_lvalue_reference<
        typename std::add_const<RightExpr>::type
      >::type
      { return r_; }
    auto operator [](std::size_t index) const ->
      value_type
    {
      return BinaryOp::apply(le()[index], re()[index]);
    }
  private:
    LeftExpr l_;
    RightExpr r_;
};
//===========================================================================
template <typename T>
struct plus_op
{
  static T apply(T const& a, T const& b)
  {
    return a + b;
  }
  static T apply(T&& a, T const& b)
  {
    a += b;
    return std::move(a);
  }
  static T apply(T const& a, T&& b)
  {
    b += a;
    return std::move(b);
  }
  static T apply(T&& a, T&& b)
  {
    a += b;
    return std::move(a);
  }
};
//===========================================================================
template <std::size_t N>
class math_vector
{
  using impl_type = std::array<long double, N>;
  public:
    typedef typename impl_type::value_type value_type;
    math_vector()
    {
      using namespace std;
      fill(begin(v_), end(v_), impl_type{});
      std::cout << this << ": math_vector()" << endl;
    }
    math_vector(math_vector const& mv) noexcept
    {
      using namespace std;
      copy(begin(mv.v_), end(mv.v_), begin(v_));
      std::cout << this << ": math_vector(copy: " << &mv << ")" << endl;
    }
    math_vector(math_vector&& mv) noexcept
    {
      using namespace std;
      move(begin(mv.v_), end(mv.v_), begin(v_));
      std::cout << this << ": math_vector(move: " << &mv << ")" << endl;
    }
    math_vector(std::initializer_list<value_type> l)
    {
      using namespace std;
      copy(begin(l), end(l), begin(v_));
      std::cout << this << ": math_vector(initlist)" << endl;
    }
    math_vector& operator =(math_vector const& mv) noexcept
    {
      using namespace std;
      copy(begin(mv.v_), end(mv.v_), begin(v_));
      std::cout << this << ": math_vector op =(copy: " << &mv << ")" << endl;
      return *this;
    }
    math_vector& operator =(math_vector&& mv) noexcept
    {
      using namespace std;
      move(begin(mv.v_), end(mv.v_), begin(v_));
      std::cout << this << ": math_vector op =(move: " << &mv << ")" << endl;
      return *this;
    }
    ~math_vector()
    {
      using namespace std;
      std::cout << this << ": ~math_vector()" << endl;
    }
    void swap(math_vector& mv)
    {
      using namespace std;
      for (std::size_t i = 0; i<N; ++i)
        swap(v_[i], mv[i]);
    }
    auto operator [](std::size_t index) const
      -> value_type const&
    {
      return v_[index];
    }
    auto operator [](std::size_t index)
      -> value_type&
    {
      return v_[index];
    }
    math_vector& operator +=(math_vector const& b)
    {
      for (std::size_t i = 0; i<N; ++i)
        v_[i] += b[i];
      return *this;
    }
  #ifndef DONT_USE_EXPR_TEMPL
    template <typename LE, typename Op, typename RE>
    math_vector(math_vector_expr<LE,Op,RE>&& mve)
    {
      for (std::size_t i = 0; i < N; ++i)
        v_[i] = mve[i];
      std::cout << this << ": math_vector(expr: " << &mve << ")" << std::endl;
    }
    template <typename RightExpr>
    math_vector& operator =(RightExpr&& re)
    {
      for (std::size_t i = 0; i<N; ++i)
        v_[i] = re[i];
      return *this;
    }
    template <typename RightExpr>
    math_vector& operator +=(RightExpr&& re)
    {
      for (std::size_t i = 0; i<N; ++i)
        v_[i] += re[i];
      return *this;
    }
    template <typename RightExpr>
    auto operator +(RightExpr&& re) const ->
      math_vector_expr<
        math_vector const&,
        plus_op<value_type>,
        decltype(std::forward<RightExpr>(re))
      >
    {
      return
        math_vector_expr<
          math_vector const&,
          plus_op<value_type>,
          decltype(std::forward<RightExpr>(re))
        >(
          *this,
          std::forward<RightExpr>(re)
        )
      ;
    }
  #endif // #ifndef DONT_USE_EXPR_TEMPL
  private:
    impl_type v_;
};
//===========================================================================
template <std::size_t N>
inline void swap(math_vector<N>& a, math_vector<N>& b)
{
  a.swap(b);
}
//===========================================================================
#ifdef DONT_USE_EXPR_TEMPL
template <std::size_t N>
inline math_vector<N> operator +(
  math_vector<N> const& a,
  math_vector<N> const& b
)
{
  math_vector<N> retval(a);
  retval += b;
  return retval;
}
template <std::size_t N>
inline math_vector<N> operator +(
  math_vector<N>&& a,
  math_vector<N> const& b
)
{
  a += b;
  return std::move(a);
}
template <std::size_t N>
inline math_vector<N> operator +(
  math_vector<N> const& a,
  math_vector<N>&& b
)
{
  b += a;
  return std::move(b);
}
template <std::size_t N>
inline math_vector<N> operator +(
  math_vector<N>&& a,
  math_vector<N>&& b
)
{
  a += std::move(b);
  return std::move(a);
}
#endif // #ifdef DONT_USE_EXPR_TEMPL
//===========================================================================
template <std::size_t N>
std::ostream& operator <<(std::ostream& os, math_vector<N> const& mv)
{
  os << '(';
  for (std::size_t i = 0; i < N; ++i)
    os << mv[i] << ((i+1 != N) ? ',' : ')');
  return os;
}
//===========================================================================
int main()
{
  using namespace std;
  try
  {
    {
      cout << "CASE 1:n";
      math_vector<3> a{1.0, 1.1, 1.2};
      math_vector<3> b{2.0, 2.1, 2.2};
      math_vector<3> c{3.0, 3.1, 3.2};
      math_vector<3> d{4.0, 4.1, 4.2};
      math_vector<3> result = a + b + c + d;
      cout << '[' << &result << "]: " << result << "n";
    }
    cout << endl;
    {
      cout << "CASE 2:n";
      math_vector<3> result =
        math_vector<3>{1.0, 1.1, 1.2} +
        math_vector<3>{2.0, 2.1, 2.2} +
        math_vector<3>{3.0, 3.1, 3.2} +
        math_vector<3>{4.0, 4.1, 4.2}
      ;
      cout << '[' << &result << "]: " << result << "n";
    }
  }
  catch (...)
  {
    return 1;
  }
}
//===========================================================================