循环和if的优化
Optimization of loops and if
我有一个过程如下:
void Process1(unsigned char* data)
{
}
void Process2(unsigned char* data)
{
}
void Process3(unsigned char* data)
{
}
#define FLAG1 (1 << 1)
#define FLAG2 (1 << 2)
#define FLAG3 (1 << 3)
void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
for (unsigned int i = 0; i < bytes; i ++)
{
if (b1) Process1(data + i);
if (b2) Process2(data + i);
if (b3) Process3(data + i);
}
}
看起来,flags & FLAG1
A.K.A b1
在所有循环中都不会改变。但我们仍然需要在每个循环中进行分支。我只是想知道是否有一种方法可以动态地避免这个不必要的分支。
这是Lundin解决方案的演示。
#include <windows.h>
#include <stdio.h>
#include <time.h>
LARGE_INTEGER ls, le, ll;
#define START_CLOCK() QueryPerformanceCounter(&ls)
#define END_CLOCK() printf ("%.0lf nsn", (QueryPerformanceCounter(&le), ((double)le.QuadPart - ls.QuadPart) / ll.QuadPart * 1000000));
void Process1(unsigned char* data)
{
(*data)++;
}
void Process2(unsigned char* data)
{
(*data)--;
}
void Process3(unsigned char* data)
{
(*data) *= (*data);
}
#define FLAG1 (1 << 1)
#define FLAG2 (1 << 2)
#define FLAG3 (1 << 3)
void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
for (unsigned int i = 0; i < bytes; i ++)
{
if (b1) Process1(data + i);
if (b2) Process2(data + i);
if (b3) Process3(data + i);
}
}
typedef void (*proc_t)(unsigned char*);
inline static void do_nothing (unsigned char* ptr)
{
(void)ptr;
}
void ProcessData_x(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = (flags & FLAG1) != 0; // de-obfuscate the boolean logic
bool b2 = (flags & FLAG2) != 0;
bool b3 = (flags & FLAG3) != 0;
proc_t p1 = b1 ? Process1 : do_nothing;
proc_t p2 = b2 ? Process2 : do_nothing;
proc_t p3 = b3 ? Process3 : do_nothing;
for (unsigned int i = 0; i<bytes; i++)
{
p1(data + i);
p2(data + i);
p3(data + i);
}
}
int main()
{
if (!QueryPerformanceFrequency(&ll)) return 1;
const unsigned int bytes = 0xffff;
srand((unsigned int)time(NULL));
unsigned int flags = rand() & 0x7;
unsigned char* data = new unsigned char[bytes];
for (unsigned int i = 0; i < bytes; i++)
{
data[i] = (unsigned char)(rand() & 0xff);
}
START_CLOCK();
ProcessData(data, bytes, flags);
END_CLOCK();
START_CLOCK();
ProcessData_x(data, bytes, flags);
END_CLOCK();
}
这是输出:
134 ns
272 ns
我已经运行了好几次,但出乎意料的是,它花费了更多的时间:(..它也是编译的"vs2010 Release x86"
首先,在没有考虑特定系统的情况下谈论优化是没有意义的。。。
话虽如此,我会以以下方式优化分支:
typedef void (*proc_t)(unsigned char*);
inline static void do_nothing (unsigned char* ptr)
{
(void)ptr;
}
...
void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = (flags & FLAG1) != 0; // de-obfuscate the boolean logic
bool b2 = (flags & FLAG2) != 0;
bool b3 = (flags & FLAG3) != 0;
proc_t p1 = b1 ? Process1 : do_nothing;
proc_t p2 = b2 ? Process2 : do_nothing;
proc_t p3 = b3 ? Process3 : do_nothing;
for (unsigned int i = 0; i<bytes; i++)
{
p1(data + i);
p2(data + i);
p3(data + i);
}
}
一个c++解决方案。类似于Lundin的回答,但没有调用空函数。我不确定这是否会对性能产生任何影响,主要优点是不需要手动列出循环中的所有进程调用。如果你想微优化或想要c,你可以在堆栈上使用一个数组,但你必须自己管理一些计数器。
typedef void (*proc_t)(unsigned char*);
std::vector<proc_t> processes;
if (b1) processes.push_back(Process1);
if (b2) processes.push_back(Process2);
if (b3) processes.push_back(Process3);
for(auto p : processes)
for (unsigned int i = 0; i<bytes; i++)
p(data + i);
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
int caseNow=SelectCaseAtOnce(b1,b2,b3);
if(caseNow==0)
for (unsigned int i = 0; i < bytes; i ++)
{
Process1(data + i);
}
else if(caseNow==1)
for (unsigned int i = 0; i < bytes; i ++)
{
Process2(data + i);
}
else if(caseNow==2)
for (unsigned int i = 0; i < bytes; i ++)
{
Process3(data + i);
}
else if(caseNow==3)
for (unsigned int i = 0; i < bytes; i ++)
{
Process1(data + i);
Process2(data + i);
}
if(caseNow==4)
for (unsigned int i = 0; i < bytes; i ++)
{
Process1(data + i);
Process3(data + i);
}
else if(caseNow==5)
for (unsigned int i = 0; i < bytes; i ++)
{
Process2(data + i);
Process3(data + i);
}
else if(caseNow==6)
for (unsigned int i = 0; i < bytes; i ++)
{
Process1(data + i);
Process2(data + i);
Process3(data + i);
}
else {}
这里有另一个使用模板的解决方案-通过这种方式,您将获得每个变体的内部循环的优化版本。如果ProcessN函数足够短/简单,值得内联,那么这可能是一个有价值的优化。
#include <tuple>
#include <map>
#include <utility>
using namespace std;
inline void Process1(unsigned char* data) {}
inline void Process2(unsigned char* data) {}
inline void Process3(unsigned char* data) {}
#define FLAG1 (1 << 1)
#define FLAG2 (1 << 2)
#define FLAG3 (1 << 3)
template <bool b1, bool b2, bool b3>
void ProcessData(unsigned char* data, unsigned int bytes) {
for (unsigned int i = 0; i < bytes; i++) {
if (b1) Process1(data + i);
if (b2) Process2(data + i);
if (b3) Process3(data + i);
}
}
void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags) {
typedef void (*ProcessFunc)(unsigned char*, unsigned int bytes);
static map<tuple<bool, bool, bool>, ProcessFunc> funcs{
{make_tuple(false, false, false), ProcessData<false, false, false>},
{make_tuple(false, false, true), ProcessData<false, false, true>},
{make_tuple(false, true, false), ProcessData<false, true, false>},
{make_tuple(false, true, true), ProcessData<false, true, true>},
{make_tuple(true, false, false), ProcessData<true, false, false>},
{make_tuple(true, false, true), ProcessData<true, false, true>},
{make_tuple(true, true, false), ProcessData<true, true, false>},
{make_tuple(true, true, true), ProcessData<true, true, true>}};
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
funcs[make_tuple(b1, b2, b3)](data, bytes);
}
相关文章:
- 优化/减少 if 语句中的条件表达式
- 而不是那么多的 if 语句,我想要一个逻辑,我可以用一个语句或优化的方式来完成
- 需要消除这些"else if"并优化代码
- 从编译器优化和代码性能的角度来看,"if constexpr"与"if"
- "constexpr if" vs "if" 与优化 - 为什么需要"constexpr"?
- C++ 优化 if/else 条件
- 如何优化循环/if语句的C
- 试图优化我的 if 语句
- 优化代码/实现"for"循环而不是长"if - else if - else"
- if条件下两个字符串比较的优化代码
- if else-if梯形图和编译器优化
- if(0) 和 if(1) 语句是否优化
- 使用简写IF的C++编译器优化
- if/else优化测试结果令人震惊
- 循环和if的优化
- 为什么C++编译器无法优化"if(test) --foo"以"foo -= test"?
- 优化if/else语句
- 如何优化If/then条件表达式的长序列- SIMD
- C++优化 if 语句
- 优化if-else函数的代码