试图理解c++ 11中宏、模板和枚举的复杂用法

Trying to understand complicated use of macros, templates and enums in C++11

本文关键字：枚举用法复杂 c++ 中宏更新时间：2023-10-16

我刚刚看了这个视频http://www.youtube.com/watch?v=y71lli8MS8s，觉得它太棒了。问题是关于他的操作码解码矩阵函数:

template<u16 op> // Execute a single CPU instruction, defined by opcode "op".
void Ins()       // With template magic, the compiler will literally synthesize >256 different functions.
{
    // Note: op 0x100 means "NMI", 0x101 means "Reset", 0x102 means "IRQ". They are implemented in terms of "BRK".
    // User is responsible for ensuring that WB() will not store into memory while Reset is being processed.
    unsigned addr=0, d=0, t=0xFF, c=0, sb=0, pbits = op<0x100 ? 0x30 : 0x20;
    // Define the opcode decoding matrix, which decides which micro-operations constitute
    // any particular opcode. (Note: The PLA of 6502 works on a slightly different principle.)
    enum { o8 = op/8, o8m = 1 << (op%8) };
    // Fetch op'th item from a bitstring encoded in a data-specific variant of base64,
    // where each character transmits 8 bits of information rather than 6.
    // This peculiar encoding was chosen to reduce the source code size.
    // Enum temporaries are used in order to ensure compile-time evaluation.
    #define t(s,code) { enum { 
        i=o8m & (s[o8]>90 ? (130+" (),-089<>?BCFGHJLSVWZ[^hlmnxy|}"[s[o8]-94]) 
                          : (s[o8]-" (("[s[o8]/39])) }; if(i) { code; } }
    /* Decode address operand */
    t("                                !", addr = 0xFFFA) // NMI vector location
    t("                                *", addr = 0xFFFC) // Reset vector location
    t("!                               ,", addr = 0xFFFE) // Interrupt vector location
    t("zy}z{y}zzy}zzy}zzy}zzy}zzy}zzy}z ", addr = RB(PC++))
    t("2 yy2 yy2 yy2 yy2 XX2 XX2 yy2 yy ", d = X) // register index
    t("  62  62  62  62  om  om  62  62 ", d = Y)
    t("2 y 2 y 2 y 2 y 2 y 2 y 2 y 2 y  ", addr=u8(addr+d); d=0; tick())              // add zeropage-index
    t(" y z!y z y z y z y z y z y z y z ", addr=u8(addr);   addr+=256*RB(PC++))       // absolute address
    t("3 6 2 6 2 6 286 2 6 2 6 2 6 2 6 /", addr=RB(c=addr); addr+=256*RB(wrap(c,c+1)))// indirect w/ page wrap
    t("  *Z  *Z  *Z  *Z      6z  *Z  *Z ", Misfire(addr, addr+d)) // abs. load: extra misread when cross-page
    t("  4k  4k  4k  4k  6z      4k  4k ", RB(wrap(addr, addr+d)))// abs. store: always issue a misread
    /* Load source operand */
    t("aa__ff__ab__,4  ____ -  ____     ", t &= A) // Many operations take A or X as operand. Some try in
    t("                knnn     4  99   ", t &= X) // error to take both; the outcome is an AND operation.
    t("                9989    99       ", t &= Y) // sty,dey,iny,tya,cpy
    t("                       4         ", t &= S) // tsx, las
    t("!!!!  !!  !!  !!  !   !!  !!  !!/", t &= P.raw|pbits; c = t)// php, flag test/set/clear, interrupts
    t("_^__dc___^__            ed__98   ", c = t; t = 0xFF)        // save as second operand
    t("vuwvzywvvuwvvuwv    zy|zzywvzywv ", t &= RB(addr+d)) // memory operand
    t(",2  ,2  ,2  ,2  -2  -2  -2  -2   ", t &= RB(PC++))   // immediate operand
    /* Operations that mogrify memory operands directly */
    t("    88                           ", P.V = t & 0x40; P.N = t & 0x80) // bit
    t("    nink    nnnk                 ", sb = P.C)       // rol,rla, ror,rra,arr
    t("nnnknnnk     0                   ", P.C = t & 0x80) // rol,rla, asl,slo,[arr,anc]
    t("        nnnknink                 ", P.C = t & 0x01) // lsr,sre, ror,rra,asr
    t("ninknink                         ", t = (t << 1) | (sb * 0x01))
    t("        nnnknnnk                 ", t = (t >> 1) | (sb * 0x80))
    t("                 !      kink     ", t = u8(t - 1))  // dec,dex,dey,dcp
    t("                         !  khnk ", t = u8(t + 1))  // inc,inx,iny,isb
    /* Store modified value (memory) */
    t("kgnkkgnkkgnkkgnkzy|J    kgnkkgnk ", WB(addr+d, t))
    t("                   q             ", WB(wrap(addr, addr+d), t &= ((addr+d) >> 8))) // [shx,shy,shs,sha?]
    /* Some operations used up one clock cycle that we did not account for yet */
    t("rpstljstqjstrjst - - - -kjstkjst/", tick()) // nop,flag ops,inc,dec,shifts,stack,transregister,interrupts
    /* Stack operations and unconditional jumps */
    t("     !  !    !                   ", tick(); t = Pop())                        // pla,plp,rti
    t("        !   !                    ", RB(PC++); PC = Pop(); PC |= (Pop() << 8)) // rti,rts
    t("            !                    ", RB(PC++))  // rts
    t("!   !                           /", d=PC+(op?-1:1); Push(d>>8); Push(d))      // jsr, interrupts
    t("!   !    8   8                  /", PC = addr) // jmp, jsr, interrupts
    t("!!       !                      /", Push(t))   // pha, php, interrupts
    /* Bitmasks */
    t("! !!  !!  !!  !!  !   !!  !!  !!/", t = 1)
    t("  !   !                   !!  !! ", t <<= 1)
    t("! !   !   !!  !!       !   !   !/", t <<= 2)
    t("  !   !   !   !        !         ", t <<= 4)
    t("   !       !           !   !____ ", t = u8(~t)) // sbc, isb,      clear flag
    t("`^__   !       !               !/", t = c | t)  // ora, slo,      set flag
    t("  !!dc`_  !!  !   !   !!  !!  !  ", t = c & t)  // and, bit, rla, clear/test flag
    t("        _^__                     ", t = c ^ t)  // eor, sre
    /* Conditional branches */
    t("      !       !       !       !  ", if(t)  { tick(); Misfire(PC, addr = s8(addr) + PC); PC=addr; })
    t("  !       !       !       !      ", if(!t) { tick(); Misfire(PC, addr = s8(addr) + PC); PC=addr; })
    /* Addition and subtraction */
    t("            _^__            ____ ", c = t; t += A + P.C; P.V = (c^t) & (A^t) & 0x80; P.C = t & 0x100)
    t("                        ed__98   ", t = c - t; P.C = ~t & 0x100) // cmp,cpx,cpy, dcp, sbx
    /* Store modified value (register) */
    t("aa__aa__aa__ab__ 4 !____    ____ ", A = t)
    t("                    nnnn 4   !   ", X = t) // ldx, dex, tax, inx, tsx,lax,las,sbx
    t("                 !  9988 !       ", Y = t) // ldy, dey, tay, iny
    t("                   4   0         ", S = t) // txs, las, shs
    t("!  ! ! !!  !   !       !   !   !/", P.raw = t & ~0x30) // plp, rti, flag set/clear
    /* Generic status flag updates */
    t("wwwvwwwvwwwvwxwv 5 !}}||{}wv{{wv ", P.N = t & 0x80)
    t("wwwv||wvwwwvwxwv 5 !}}||{}wv{{wv ", P.Z = u8(t) == 0)
    t("             0                   ", P.V = (((t >> 5)+1)&2))         // [arr]
    /* All implemented opcodes are cycle-accurate and memory-access-accurate.
     * [] means that this particular separate rule exists only to provide the indicated unofficial opcode(s).
     */
}

我发现非常令人困惑的是define语句。当宏展开时，它看起来像这样http://codepad.org/bUxdX8MQ。但是'130+" (),-089<>?BCFGHJLSVWZ[^hlmnxy|}"'怎么可能是合法的c++代码呢?据我所知，以这种方式组合字符串和整数是非法的。此外，这种("zy}z{y}zzy}zzy}zzy}zzy}zzy}zzy}z "[o8]>90)根本没有意义，除非c++ 11增加了以数组方式访问字符串字符的支持。希望能学到新的东西!

编辑:感谢每一个回应的人。我不知道在C/c++中允许"blabla"[idx]，现在我做的代码是有意义的。

其实应该是

130+" (),-089<>?BCFGHJLSVWZ[^hlmnxy|}"["                                !"[o8]-94]

字符串字面量确实具有array of char类型，来自第一个c++规范。

所以上面的代码可以重写为:

char s1[]=" (),-089<>?BCFGHJLSVWZ[^hlmnxy|}";
char s2[]="                                !";
130+s1[s2[o8]-94];

你可以看到这是合法的c++

字符串字面量只是char数组，字符只是8位整数，所以用它们执行算术是完全合法的。他们也可以自动提升到int。

那么，把一行拆下来:

{ enum { i=                                                    //1

=右边的值必须是一个积分常数

           o8m &                                               //2

o8m = 1 << (op%8)，上面定义的;所以我们知道0 <= 08m < 8。我们把它按位排序，所以RHS还是一个积分常数

                 ("                                !"[o8]>90 ? //3

如果此字符串字面值的第o8个字符为整数值> 90，则…

(130+                                                          //4

…那么结果(将and与上面的08m结合)将是130加上一些东西…

" (),-089<>?BCFGHJLSVWZ[^hlmnxy|}"[                            //5

…同样，我们从这个字符串字面量中取一个字符，计算索引为:

"                                !"[o8]-94])                   //6

…另一个字符串字面值的第o8个字符，减去94。我不确定这会做正确的事情，因为字符串字面量中的所有字符似乎都没有值>= 94，并且否定答案将是表达式5的无效索引。

总之，完成了表达式4，它是三元表达式3的第一个分支。现在是else…

: ("                                !"[o8]-                    //7
   " (("[                                                      //8
     "                                !"[o8]/39                //9
   ]
  )

9是一个整型值，给出了8的索引;从7中减去该索引处的字符，得到else分支的最终值。

) };
if(i) { addr = 0xFFFA; } }

所以我们终于完成了i的常量计算。如果非零，则设置addr。

很糟糕，我很怀念浏览器中的括号匹配，但希望你们能理解

字符串是数组，所以"hello"[ 1 ] == 'e'。此外，数组衰变为指向其第一个元素的指针，下标操作定义为array[ index ] == * ( array + index )。该加法操作在第一个元素后产生一个指针。

#define中的表达式令人困惑，因为它依赖于优先级规则而不是括号，并且以一种有趣的顺序完成。这真的不是一个好代码的例子。

没有必要强制编译器在编译时求值。当启用优化时，所有编译时常量表达式都将被优化，否则您可能希望在调试器中逐步执行求值!这是一个重新格式化的版本:

if ( ( op % 8 ) & // low 3 bits select conditions for execution
     ( s[ op / 8 ] > 'Z'? // lowercase letters go through complicated mapping
       ( " (),-089<>?BCFGHJLSVWZ[^hlmnxy|}"[ s[ op / 8 ] - 'Z' - 4 ] ) + 130
     : s[ op / 8 ] - " (("[ s [ op / 8 ] / 39 ] ) ) { // uppercase is simpler
    code;
}

这仍然很混乱，而且可能效率低下。我不知道为什么作者这样做(没有观看YouTube)，但由于op8 = op % 8生成低三位，即0范围内的数字…7，在再次提取低三位的子集之前(这次使用&运算符)添加130似乎毫无意义。

无论如何……

计算宏参数字符串的索引:

[ s[ op / 8 ] - 'Z' - 4 ]

这将宏参数字符串中的大写字母映射为其他字母…不要问我为什么。看起来像是设计缺陷的证据;在这个级别，字符串应该是不需要转换的格式。

( " (),-089<>?BCFGHJLSVWZ[^hlmnxy|}"[ s[ op / 8 ] - 'Z' - 4 ] )

最后加上数字130。自从130 % 8 == 2，我想2也会起作用。难倒我了。此外，在这里执行加法有将表达式的类型更改为int的副作用，但130是否在char的范围内并不重要。

( " (),-089<>?BCFGHJLSVWZ[^hlmnxy|}"[ s[ op / 8 ] - 'Z' - 4 ] ) + 130

将二进制数据转换为字符串字面值是混淆和"代码高尔夫"(使程序尽可能短的运动)最常用的策略。这是一种使程序不可读且在内存中相当紧凑的方法，而不是是一种使程序更快的方法，也不是优化内存消耗的最佳方法。如果您想要一个数字表，请将它们指定为十六进制或十进制的数字。