std::bind vs lambda performance

using namespace std;
template<int N = 1, class Fun, class... Args>
void timeExec(string name, Fun fun, Args... args) {
    auto start = chrono::steady_clock::now();
    for(int i = 0; i < N; ++i) {
    auto end = chrono::steady_clock::now();
    auto diff = end - start;
    cout << name << ": "<< chrono::duration<double, milli>(diff).count() << " ms. << endl;


const int TIMES = 10000;
timeExec<TIMES>("Bind evaluation", bind(&decltype(result)::eval, &result));
timeExec<1>("Lambda evaluation", [&]() {
    for(int i = 0; i < TIMES; ++i) {


Bind evaluation: 0.355158 ms.
Lambda evaluation: 0.014414 ms.






然而,这里可能没有发生编译器优化技巧。罪魁祸首可能是绑定的参数bind(&decltype(result)::eval, &result)。您正在传递一个指向成员函数的指针(PTMF)和一个对象。与lambda类型不同,PTMF不会捕获实际调用的函数;它只包含函数签名(参数和返回类型)。慢循环使用间接分支函数调用,因为编译器无法通过常量传播解析函数指针。

如果将成员eval()重命名为operator () ()并去掉bind,则显式对象的行为本质上就像lambda一样,性能差异应该会消失。



#include <iostream>
#include <functional>
#include <chrono>
using namespace std;
using namespace chrono;
using namespace placeholders;
typedef void SumDataBlockEventHandler(uint8_t data[], uint16_t len);
class SpeedTest {
    uint32_t sum = 0;
    uint8_t i = 0;
    void SumDataBlock(uint8_t data[], uint16_t len) {
        for (i = 0; i < len; i++) {
            sum += data[i];
    function<SumDataBlockEventHandler> Bind() {
        return bind(&SpeedTest::SumDataBlock, this, _1, _2);
    function<SumDataBlockEventHandler> Lambda() {
        return [this](auto data, auto len)
            SumDataBlock(data, len);
int main()
    SpeedTest test;
    function<SumDataBlockEventHandler> testF;
    uint8_t data[] = { 0,1,2,3,4,5,6,7 };
#if _DEBUG
    const uint32_t testFcallCount = 1000000;
    const uint32_t testFcallCount = 100000000;
    uint32_t callsCount, whileCount = 0;
    auto begin = high_resolution_clock::now();
    auto end = begin;
    while (whileCount++ < 10) {
        testF = test.Bind();
        begin = high_resolution_clock::now();
        callsCount = 0;
        while (callsCount++ < testFcallCount)
            testF(data, 8);
        end = high_resolution_clock::now();
        cout << testFcallCount << " calls of binded function: " << duration_cast<nanoseconds>(end - begin).count() << "ns" << endl;
        testF = test.Lambda();
        begin = high_resolution_clock::now();
        callsCount = 0;
        while (callsCount++ < testFcallCount)
            testF(data, 8);
        end = high_resolution_clock::now();
        cout << testFcallCount << " calls of lambda function: " << duration_cast<nanoseconds>(end - begin).count() << "ns" << endl << endl;


100000000 calls of binded function: 1846298524ns
100000000 calls of lambda function: 1048086461ns
100000000 calls of binded function: 1259759880ns
100000000 calls of lambda function: 1032256243ns
100000000 calls of binded function: 1264817832ns
100000000 calls of lambda function: 1039052353ns
100000000 calls of binded function: 1263404007ns
100000000 calls of lambda function: 1031216018ns
100000000 calls of binded function: 1275305794ns
100000000 calls of lambda function: 1041313446ns
100000000 calls of binded function: 1256565304ns
100000000 calls of lambda function: 1031961675ns
100000000 calls of binded function: 1248132135ns
100000000 calls of lambda function: 1033890224ns
100000000 calls of binded function: 1252277130ns
100000000 calls of lambda function: 1042336736ns
100000000 calls of binded function: 1250320869ns
100000000 calls of lambda function: 1046529458ns

我在Visual Studio Enterprise 2015下以完全优化(/Ox)的发布模式和禁用优化的调试模式编译了它。结果证实lambda比我的笔记本电脑(戴尔Inspiron 7537,英特尔酷睿i7-4510U 2.00GHz, 8GB RAM)上的bind更快。
