
Optimised range checking and returning a value

>我有以下函数,它根据输入返回一个值。我需要尽可能快地编写此代码,而不使用除法或模运算符或循环。每个连续值之间的数量几乎等于 6553。

int GetScalingFactor(int input)
    unsigned int factor = 0;
    if(input < 13107) factor = 72816;
    else if(input < 19660) factor = 81918;
    else if(input < 26214) factor = 93621;
    else if(input < 32767) factor = 109225;
    else if(input < 39321) factor = 131070;
    else if(input < 45874) factor = 163837;
    else if(input < 52428) factor = 218450;
    else if(input < 58981) factor = 327675;
    return factor;

在 c++ 中使用 std::lower_bound

int GetScalingFactor(int input)
    const unsigned int inputs[] =  {13107, 19660, 26214, 32767, 39321, 45874, 52428, 58981};
    const int factors[] = {72816, 81918, 93621, 109225, 131070, 163837, 218450, 327675, 0};
    auto it = std::lower_bound(std::begin(inputs), std::end(inputs), input + 1);
    return factors[std::distance(std::begin(inputs), it)];


您可以准备一个包含重复72816 13107次、81918重复19660-13107次等的表,然后只检查上限 (58981(。如果在范围内,只需返回table[input]否则返回0,就像您当前(应该(所做的那样。


除法,没有模数,只有一些分配的内存(远低于 1 兆字节(和预先计算的表。


#include <stdio.h>
#include <stdint.h>
int32_t table[58981];
void prepare_table()
    int32_t input,factor;
    for (input=0;input<sizeof(table)/sizeof(table[0]);input++)
    // just reusing your code as-is, but only to create the table
    if(input < 13107) factor = 72816;
    else if(input < 19660) factor = 81918;
    else if(input < 26214) factor = 93621;
    else if(input < 32767) factor = 109225;
    else if(input < 39321) factor = 131070;
    else if(input < 45874) factor = 163837;
    else if(input < 52428) factor = 218450;
    else if(input < 58981) factor = 327675;
    table[input] = factor;
int GetScalingFactor(int input)
    return input < sizeof(table)/sizeof(table[0]) ? table[input] : 0;
int main() {
   printf("%d => %dn",19600,GetScalingFactor(19600));
   printf("%d => %dn",26200,GetScalingFactor(26200));
   printf("%d => %dn",58000,GetScalingFactor(58000));
   printf("%d => %dn",60000,GetScalingFactor(60000));





结果:在 clang 3.9.1 上,未生成输入数组。编译器只是以最有效的顺序比较每个绑定。GCC 决定创建一个数组并有效地实现lower_bound本身(哇!


#include <utility>
#include <tuple>
// turn values into types
template<std::size_t I> using index = std::integral_constant<std::size_t, I>;
// termination case    
template<class T, class Tuple, std::size_t it>
std::size_t iteration(T value, Tuple&&, index<it>, index<0>)
    return it;
// end of search 'else' path which will not be taken but there must
// be code available at compile time 
template<class T, class Tuple, std::size_t first, std::size_t count, std::enable_if_t<(first >= count)>* = nullptr>
std::size_t iteration(T value, Tuple&& tuple, index<first>, index<count>)
    return count-1;
// normal iteration of the lower_bound loop    
template<class T, class Tuple, std::size_t first, std::size_t count, std::enable_if_t<(first < count)>* = nullptr>
std::size_t iteration(T value, Tuple&& tuple, index<first>, index<count>)
    constexpr auto step = count / 2;
    constexpr auto it = first + step;
    if(std::get<it>(tuple) < value)
        return iteration(value, std::forward<Tuple>(tuple), index<it>(), index<step + 1>());
    else {
        return iteration(value, std::forward<Tuple>(tuple), index<first>(), index<step>());
// expand out a lower-bound algorithm from a tuple of bounds
template<class Tuple, class T>
constexpr std::size_t tuple_lower_bound(Tuple&& tuple, const T& value)
    constexpr auto count = index<std::tuple_size<std::decay_t<Tuple>>::value>();
    constexpr auto first = index<0>();
    return iteration(value, std::forward<Tuple>(tuple), first, count);

int GetScalingFactor(int input)
    static constexpr auto indexes = std::make_tuple(13107, 19660, 26214, 32767, 39321, 45874, 52428, 58981);
    static constexpr std::array<int, std::tuple_size<std::decay_t<decltype(indexes)>>::value + 1> factors = 
        72816, 81918, 93621, 109225, 131070, 163837, 218450, 327675, 0
    auto i = tuple_lower_bound(indexes, input + 1);
    return factors[i];

int main()
    extern int get_input();
    auto s1 = GetScalingFactor(get_input());
    return s1;



    int GetScalingFactor(int input) {
        if(input < 13107) return 72816; // most common case
        else if(input < 19660) return 81918; // second common case
        else if(input < 26214) return 93621; // ...
        else if(input < 32767) return 109225;
        else if(input < 39321) return 131070;
        else if(input < 45874) return 163837;
        else if(input < 52428) return 218450;
        else if(input < 58981) return 327675;
        else return 0;


int GetScalingFactor(int input)
    static const int factors[] =
        72816, 72816, 81918, 93621, 109225, 131070, 163837, 218450, 327675
    if (input < 0)
        input = 0;
        input = (input * 2 + 1) / 13107;
        if (input >= sizeof(factors) / sizeof(factors[0]))
            return 0;
    return factors[input];


if(input < 32767ul)
  if(input < 19660ul)
else if(input < 45874ul)

依此类推(在编码之前将其绘制在纸上作为二叉搜索树,如果有帮助的话(。这减少了与"O log(n("的比较次数,并且是您可以实现的最佳效果,而无需创建包含 58981 个项目的巨大查找表,其中input是索引 - 这将是执行速度方面性能最佳的解决方案。

此外,您的代码有错误,您不应该将无符号变量与int混合。将数据类型切换为 uint_fast32_t