正在尝试理解ASM代码

Trying to understand ASM code

本文关键字:ASM 代码      更新时间:2023-10-16

编辑

我从memcmp切换到自制的13字节比较功能,自制程序没有额外的指令。所以我只能猜测,额外的程序集只是优化器中的一个缺陷。

if (!EQ13(&ti, &m_ti)) {  // in 2014, memcmp was not being optimzied here
000007FEF91B2CFE  mov         rdx,qword ptr [rsp]  
000007FEF91B2D02  movzx       eax,byte ptr [rsp+0Ch]  
000007FEF91B2D07  mov         ecx,dword ptr [rsp+8]  
000007FEF91B2D0B  cmp         rdx,qword ptr [r10+28h]  
000007FEF91B2D0F  jne         TSccIter::SetTi+9Dh (7FEF91B2D1Dh)  
000007FEF91B2D11  cmp         ecx,dword ptr [r10+30h]  
000007FEF91B2D15  jne         TSccIter::SetTi+9Dh (7FEF91B2D1Dh)  
000007FEF91B2D17  cmp         al,byte ptr [r10+34h]  
000007FEF91B2D1B  je          TSccIter::SetTi+0B1h (7FEF91B2D31h)  

在这种情况下,我的自制程序并不完美,因为它一开始就有3个mov,尽管它不太可能检查过第一个mov。我需要做那个部分。

原始问题

以下是msvc2010中的asm代码,展示了它如何优化小型、固定大小的memcmp(在本例中为13字节(。我在我们的代码中见过很多这种类型的优化,但在最后6行中从未见过。有人能告诉我为什么最后6条装配线在那里吗?TransferItem是13个字节,因此可以解释QWORD、DWORD,然后是BYTE cmp。

struct TransferItem {
  char m_szCxrMkt1[3];
  char m_szCxrOp1[3];
  char m_chDelimiter;
  char m_szCxrMkt2[3];
  char m_szCxrOp2[3];
};
...
if (memcmp(&ti, &m_ti, sizeof(TransferItem))) {
2B8E lea         rax,[rsp]  
2B92 mov         rdx,qword ptr [rax]  
2B95 cmp         rdx,qword ptr [r10+28h]  
2B99 jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2B9B mov         edx,dword ptr [rax+8]  
2B9E cmp         edx,dword ptr [r10+30h]  
2BA2 jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2BA4 movzx       edx,byte ptr [rax+0Ch]  
2BA8 cmp         dl,byte ptr [r10+34h]  
2BAC jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2BAE xor         eax,eax  
2BB0 jmp         TSccIter::SetTi+0A7h (7FEF9302BB7h)  
2BB2 sbb         eax,eax  
2BB4 sbb         eax,0FFFFFFFFh  
2BB7 test        eax,eax  
2BB9 je          TSccIter::SetTi+0CCh (7FEF9302BDCh) 

还有,xor-eax的意义是什么,我们知道eax将为零,然后在2bb7线上测试已知为零的点?

这是的全部功能

// fWildCard means match certain fields to '**' in the db
// szCxrMkt1,2 are required and cannot be null, '  ', or ''.
// szCxrOp1,2 can be null, '  ', or ''.
TSccIter& SetTi(bool fWildCard, LPCSTR szCxrMkt1, LPCSTR szCxrOp1, LPCSTR szCxrMkt2, LPCSTR szCxrOp2) {
    if (m_fSkipSet) 
        return *this;
    m_iSid = -1; // resets the iterator to search from the start
    // Pad the struct to 16 bytes so we can clear it with 2 QWORDS
    // We use a temp, ti, to detect if the new transferitem has changed
    class TransferItemPadded : public TransferItem {
        char padding[16 - sizeof(TransferItem)]; // get us to 16 bytes
    } ti;
    U8(&ti) = U8(BUMP(&ti, 8)) = 0x2020202020202020;  // 8 spaces
    // copy in the params
    CPY2(ti.m_szCxrMkt1, szCxrMkt1);
    if (szCxrOp1 && *szCxrOp1)
        CPY2(ti.m_szCxrOp1, szCxrOp1);
    ti.m_chDelimiter = (fWildCard) ? '*' : ':'; // this controls wild card matching  
    CPY2(ti.m_szCxrMkt2, szCxrMkt2);
    if (szCxrOp2 && *szCxrOp2)
        CPY2(ti.m_szCxrOp2, szCxrOp2);
    // see if different
    if (memcmp(&ti, &m_ti, sizeof(TransferItem))) {
        memcpy(&m_ti, &ti, sizeof(TransferItem));
        m_fQryChanged  = true;
    }
    return *this;
}
typedef unsigned __int64 U8;
#define CPY2(a,b) ((*(WORD*)a) = (*(WORD*)b))

这是整个asm

TSccIter& SetTi(bool fWildCard, LPCSTR szCxrMkt1, LPCSTR szCxrOp1, LPCSTR szCxrMkt2, LPCSTR szCxrOp2) {
2B10  sub         rsp,18h  
    if (m_fSkipSet) 
2B14  cmp         byte ptr [rcx+0EAh],0  
2B1B  mov         r10,rcx  
        return *this;
2B1E  jne         TSccIter::SetTi+0CCh (7FEF9302BDCh)  
    m_iSid = -1; 
    class TransferItemPadded : public TransferItem {
        char padding[16 - sizeof(TransferItem)]; 
    } ti;
    U8(&ti) = U8(BUMP(&ti, 8)) = 0x2020202020202020; 
2B24  mov         rax,2020202020202020h  
2B2E  mov         byte ptr [rcx+36h],0FFh  
2B32  mov         qword ptr [rsp],rax  
2B36  mov         qword ptr [rsp+8],rax  
    CPY2(ti.m_szCxrMkt1, szCxrMkt1);
2B3B  movzx       eax,word ptr [r8]  
2B3F  mov         word ptr [rsp],ax  
    if (szCxrOp1 && *szCxrOp1)
2B43  test        r9,r9  
2B46  je          TSccIter::SetTi+47h (7FEF9302B57h)  
2B48  cmp         byte ptr [r9],0  
2B4C  je          TSccIter::SetTi+47h (7FEF9302B57h)  
        CPY2(ti.m_szCxrOp1, szCxrOp1);
2B4E  movzx       eax,word ptr [r9]  
2B52  mov         word ptr [rsp+3],ax  
    ti.m_chDelimiter = (fWildCard) ? '*' : ':'; 
2B57  mov         eax,3Ah  
2B5C  mov         ecx,2Ah  
2B61  test        dl,dl  
2B63  cmovne      eax,ecx  
2B66  mov         byte ptr [rsp+6],al  
    CPY2(ti.m_szCxrMkt2, szCxrMkt2);
2B6A  mov         rax,qword ptr [szCxrMkt2]  
2B6F  movzx       ecx,word ptr [rax]  
    if (szCxrOp2 && *szCxrOp2)
2B72  mov         rax,qword ptr [szCxrOp2]  
2B77  mov         word ptr [rsp+7],cx  
2B7C  test        rax,rax  
2B7F  je          TSccIter::SetTi+7Eh (7FEF9302B8Eh)  
2B81  cmp         byte ptr [rax],0  
2B84  je          TSccIter::SetTi+7Eh (7FEF9302B8Eh)  
        CPY2(ti.m_szCxrOp2, szCxrOp2);
2B86  movzx       eax,word ptr [rax]  
2B89  mov         word ptr [rsp+0Ah],ax  
    if (memcmp(&ti, &m_ti, sizeof(TransferItem))) {
2B8E  lea         rax,[rsp]  
2B92  mov         rdx,qword ptr [rax]  
2B95  cmp         rdx,qword ptr [r10+28h]  
2B99  jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2B9B  mov         edx,dword ptr [rax+8]  
2B9E  cmp         edx,dword ptr [r10+30h]  
2BA2  jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2BA4  movzx       edx,byte ptr [rax+0Ch]  
2BA8  cmp         dl,byte ptr [r10+34h]  
2BAC  jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2BAE  xor         eax,eax  
2BB0  jmp         TSccIter::SetTi+0A7h (7FEF9302BB7h)  
2BB2  sbb         eax,eax  
2BB4  sbb         eax,0FFFFFFFFh  
2BB7  test        eax,eax  
2BB9  je          TSccIter::SetTi+0CCh (7FEF9302BDCh)  
        memcpy(&m_ti, &ti, sizeof(TransferItem));
2BBB  mov         rax,qword ptr [rsp]  
        m_fQryChanged  = true;
2BBF  mov         byte ptr [r10+0E9h],1  
2BC7  mov         qword ptr [r10+28h],rax  
2BCB  mov         eax,dword ptr [rsp+8]  
2BCF  mov         dword ptr [r10+30h],eax  
2BD3  movzx       eax,byte ptr [rsp+0Ch]  
2BD8  mov         byte ptr [r10+34h],al  
    }
    return *this;
2BDC  mov         rax,r10  
}

2bb7可以通过不同的代码路径到达:通过在2b992ba22bac进行跳转,以及在没有进行任何条件跳转时直接到达。xor eax,eax只在最后一个路径上执行,它确保eax为0——否则显然不是这样。

最后6行返回eax==0中的匹配值,并设置SF和ZF条件代码。

测试eax,eax将测试eax AND eax==0。如果为零,下面的je将跳跃。

而xor-eax,eax是编码"eax=0"的有效方法。它比mov eax,0 更有效率

编辑:最初误解了这个问题。看起来"TSccIter::SetTi+0A7h"会发生什么事情,该更改值吗?

此外,这里解释了SBB复制进位(2BB2-2BB4(的技巧:

http://compgroups.net/comp.lang.asm.x86/trick-with-sbb-instruction/20164