如何使用程序集获取返回float的__stdcall函数的结果

how to use assembly to get the result of a __stdcall function that returns float

本文关键字：stdcall 函数结果 float 何使用程序集获取返回更新时间：2023-10-16

我有一个汇编例程，它以通用的方式调用一个已知使用stdcall约定并返回float的函数。此函数正被编组框架用于向脚本语言公开stdcall函数。

背景

以下是使用GNU内联汇编的函数，它在MinGW 4.3、Win32上编译：

inline uint64_t stdcall_invoke_return_float(int args_size_bytes,
                                            const char * args_ptr,
                                            void * func_ptr)
{
    uint64_t result;
    assert(
        0 == args_size_bytes % 4
            || !"argument size must be a multiple of 4 bytes");
#if defined(__GNUC__)
    asm
    (
        /* INPUT PARAMS:  %0 is the address where top of FP stack to be stored
         *                %1 is the number of BYTES to push onto the stack, */
        /*                   and during the copy loop it is the address of */
        /*                   the next word to push */
        /*                %2 is the base address of the array */
        /*                %3 is the address of the function to call */
            "testl %1, %1    # If zero argument bytes given, skip nt"
            "je    2f        # right to the function call.        nt"
            "addl  %2, %1n"
        "1:nt"
            "subl  $4, %1    # Push arguments onto the stack in   nt"
            "pushl (%1)      # reverse order. Keep looping while  nt"
            "cmp   %2, %1    # addr to push (%1) > base addr (%2) nt"
            "jg    1b        # Callee cleans up b/c __stdcall.    n"
        "2:nt"
            "call  * %3      # Callee will leave result in ST0    nt"
            "fsts  %0        # Copy 32-bit float from ST0->result"
        : "=m" (result)
        : "r" (args_size_bytes), "r" (args_ptr), "mr" (func_ptr)
        : "%eax", "%edx", "%ecx" /* eax, ecx, edx are caller-save */, "cc"
    );
#else
#pragma error "Replacement for inline assembler required"
#endif
    return result;
}

这只是一点胶水，使编写测试用例更容易：

template<typename FuncPtr, typename ArgType>
float float_invoke(FuncPtr f, int nargs, ArgType * args)
{
    uint64_t result = stdcall_invoke_return_float(
        nargs * sizeof(ArgType),
        reinterpret_cast<const char *>(args),
        reinterpret_cast<void *>(f)
    );
    return *reinterpret_cast<float *>(&result);
}

现在我有一些调用这个函数的测试用例：

__stdcall float TestReturn1_0Float()
{ return 1.0f; }
__stdcall float TestFloat(float a)
{ return a; }
__stdcall float TestSum2Floats(float a, float b)
{ return a + b; }
static const float args[2] = { 10.0f, -1.0f };
assert_equals(1.0f, float_invoke(TestReturn1_0Float, 0, args)); // test 1
assert_equals(10.0f, float_invoke(TestFloat, 1, args));         // test 2
assert_equals(-1.0f, float_invoke(TestFloat, 1, args + 1));     // test 3
assert_equals(9.0f, float_invoke(TestSumTwoFloats, 2, args));   // test 4

问题

随机地，测试3给我垃圾输出，而不是返回-1.0。

我想知道我是不是

未能在call指令之前保留某些状态
用fsts指令扰乱某些状态
从根本上误解了如何从返回float的stdcall函数中获得float值

非常感谢所有的帮助。

由于缺少windows机器，我无法完全测试它；在Linux上，下面给我一个float函数的返回代码：

extern float something(int);
#include 
#include 
int main(int argc, char **argv)
{
    int val = atoi(argv[1]);
    float ret;
    asm("pushl %1nt"
        "call * %2nt"
        "addl $4, %%esp"
       : "=t"(ret)
       : "r"(val), "r"(something)
       : "%eax", "%ecx", "%edx", "memory", "cc");
    printf("something(%d) == %fn", val, ret);
    return 0;
}

关键是"=t"(ret)约束的使用-它获得浮点堆栈的顶部，请参阅机器约束（来自gcc手册）。如果Windows stdcall在ST(0)中也返回float结果，那么应该可以，不需要fld/fst，因为编译器可以在必要时为您执行这些操作。

当从内联程序集中调用函数时，还需要指定memory和cc clobber。

如果允许对函数指针进行内存引用，GCC可能会在内联程序集不会更改它的错误假设下构造相对于堆栈指针的引用。