DirectXMath矢量运算精度

DirectXMath vector operations precision

本文关键字：精度运算 DirectXMath 更新时间：2023-10-16

XMVector3AngleBetweenVectors函数的结果很奇怪。考虑这个代码：

float angle = XMConvertToDegrees(XMVectorGetX(
        XMVector3AngleBetweenVectors(GMathFV(XMFLOAT3(0.0f, 100.0f, 0.0f)), 
        GMathFV(XMFLOAT3(0.0f, 200.0f, 0.0f)))));

它在寻找两个3D矢量之间的角度，由XMFLOAT3结构描述。GMathFV是用户定义的函数，它将XMFLOAT3转换为XMVECTOR，如下所示：

inline XMVECTOR GMathFV(XMFLOAT3& val)
{
    return XMLoadFloat3(&val);    
}

其他的都是directxmath.h库。这里一切都很好，结果角度是0.00000，正如预期的那样。

但对于其他y轴值为负的矢量，例如：

float angle = XMConvertToDegrees(XMVectorGetX(
        XMVector3AngleBetweenVectors(GMathFV(XMFLOAT3(0.0f, -100.0f, 0.0f)), 
        GMathFV(XMFLOAT3(0.0f, -99.0f, 0.0f)))));

结果是0.0197823402，我很难称之为零角。

请有人帮我解决这个问题。是负数精度、矢量坐标太近还是其他原因？

UPD：太神奇了，但a(0.0f, 100.0f, 0.0f) x b(0.0f, 99.0f, 0.0f)的值为0.0197823402，而a(0.0f, 101.0f, 0.0f) x b(0.0f, 100.0f, 0.0f)的值为0.000000

DirectXMath是为32位浮点运算而设计的。您看到浮点错误升级。以下是XMVector3AngleBetweenVectors的定义。

inline XMVECTOR XM_CALLCONV XMVector3AngleBetweenVectors(FXMVECTOR V1, FXMVECTOR V2)
{
    XMVECTOR L1 = XMVector3ReciprocalLength(V1);
    XMVECTOR L2 = XMVector3ReciprocalLength(V2);
    XMVECTOR Dot = XMVector3Dot(V1, V2);
    L1 = XMVectorMultiply(L1, L2);
    XMVECTOR CosAngle = XMVectorMultiply(Dot, L1);
    CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne.v, g_XMOne.v);
    return XMVectorACos(CosAngle);
}

在第一个例子中，CosAngle等于1.00000000

在第二个例子中，CosAngle等于0.999999940

XMVectorACos（0.9999999 40）=0.000345266977

这个大误差来自于ACos的多项式近似。一般来说，你应该尽可能避免三角函数反演。它们又慢又吵。这是定义，所以你可以了解它的大小。

inline XMVECTOR XM_CALLCONV XMVectorACos (FXMVECTOR V)
{
    __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero);
    __m128 mvalue = _mm_sub_ps(g_XMZero, V);
    __m128 x = _mm_max_ps(V, mvalue);  // |V|
    // Compute (1-|V|), clamp to zero to avoid sqrt of negative number.
    __m128 oneMValue = _mm_sub_ps(g_XMOne, x);
    __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue);
    __m128 root = _mm_sqrt_ps(clampOneMValue);  // sqrt(1-|V|)
    // Compute polynomial approximation
    const XMVECTOR AC1 = g_XMArcCoefficients1;
    XMVECTOR vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(3, 3, 3, 3) );
    __m128 t0 = _mm_mul_ps(vConstants, x);
    vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(2, 2, 2, 2) );
    t0 = _mm_add_ps(t0, vConstants);
    t0 = _mm_mul_ps(t0, x);
    vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(1, 1, 1, 1) );
    t0 = _mm_add_ps(t0, vConstants);
    t0 = _mm_mul_ps(t0, x);
    vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(0, 0, 0, 0) );
    t0 = _mm_add_ps(t0, vConstants);
    t0 = _mm_mul_ps(t0, x);
    const XMVECTOR AC0 = g_XMArcCoefficients0;
    vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(3, 3, 3, 3) );
    t0 = _mm_add_ps(t0, vConstants);
    t0 = _mm_mul_ps(t0, x);
    vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(2, 2, 2, 2) );
    t0 = _mm_add_ps(t0, vConstants);
    t0 = _mm_mul_ps(t0, x);
    vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(1, 1, 1, 1) );
    t0 = _mm_add_ps(t0, vConstants);
    t0 = _mm_mul_ps(t0, x);
    vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(0, 0, 0, 0) );
    t0 = _mm_add_ps(t0, vConstants);
    t0 = _mm_mul_ps(t0, root);
    __m128 t1 = _mm_sub_ps(g_XMPi, t0);
    t0 = _mm_and_ps(nonnegative, t0);
    t1 = _mm_andnot_ps(nonnegative, t1);
    t0 = _mm_or_ps(t0, t1);
    return t0;
}