std::max_element skip over NANs

本文关键字：over NANs skip max std element 更新时间：2023-10-16

我有一个可能包含多个NAN值的std::vector<double>。我想找到向量中最大的元素。如何有效地跳过比较中的NAN？我想避免在每个元素上调用isnan。有什么想法吗？

// std::max_element([NAN,NAN,NAN,-31,-89]) = NAN 
// because NAN > -31 returns NAN.
// how can I skip all NANs in the comparison?
// test 2 below is my use case.
#include <vector>
#include <iostream>
#include <cmath>
void vector_max(std::vector<double> v, double &max, int &imax){
    std::vector<double>::iterator v_iter;
    v_iter = std::max_element(v.begin(),v.end());
    imax = std::distance(v.begin(), v_iter);
    max  = *v_iter;
}
int main(){
    std::vector<double> v_vec;
    std::vector<double>::iterator v_vec_iter;
    int imax;
    double val;
    std::cout << "test 1. " << std::endl;
    v_vec.push_back( -33.0 );
    v_vec.push_back( -124.0 );
    v_vec.push_back( -31.0 );
    v_vec.push_back( 18.4 );
    vector_max(v_vec,val,imax);
    std::cout << "max(v_vec) = " << val << std::endl;
    std::cout << "indmax(v_vec) = " << imax << std::endl;
    std::cout << "test 2: my case. " << std::endl;
    v_vec.clear();
    v_vec.push_back( NAN );
    v_vec.push_back( NAN );
    v_vec.push_back( NAN );
    v_vec.push_back( -33.0 );
    v_vec.push_back( -124.0 );
    v_vec.push_back( -31.0 );
    v_vec.push_back( 31.0 );
    vector_max(v_vec,val,imax);
    std::cout << "max(v_vec) = " << val << std::endl;
    std::cout << "indmax(v_vec) = " << imax << std::endl;
};

这将返回：

test 1. 
max(v_vec) = 18.4
indmax(v_vec) = 3
test 2. 
max(v_vec) = nan
indmax(v_vec) = 0

您可以为max_element提供自定义比较：

void vector_max(std::vector<double> v, double &max, int &imax){
    std::vector<double>::iterator v_iter;
    v_iter = std::max_element(v.begin(),v.end(),
    [] (auto x, auto y)
    {
        return x < y ? true : isnan(x);
    });
    imax = std::distance(v.begin(), v_iter);
    max  = *v_iter;
}

我会尝试这样的事情：

void vector_max(std::vector<double> v, double &max, int &imax){
    std::vector<double>::size_type p=0;
    imax = -1;
    max = std::numeric_limits<double>::lowest();
    for (auto &val : v)
    {
        if (!std::isnan(val) && val>max)
        {
            imax = p;
            max = val;
        }
        p++;
    }
}

问题是std::max_element默认使用 std::less 作为其比较器。根据它处理向量元素的顺序，NAN可能会出现在比较的右侧。由于所有与NAN的比较都返回false，这意味着NAN可以大于所有其他元素。

换句话说，当您在具有 NAN s 的向量上使用带有默认比较器的 std::max_element 时，结果实际上是未定义的，因为它取决于实现和元素的顺序。例如，在 GCC 上，如果我将所有NAN放在向量的末尾，我（随机）得到所需的结果。

因此，除了提供自己的比较运算符外，您别无选择：

#include <vector>
#include <iostream>
#include <cmath>
#include <algorithm>
template <typename T>
struct NaNAwareLess
{
  bool operator () (T a, T b) const
  {
    if (std::isnan(b))
    {
      return false; // Assume NaN is less than *any* non-NaN value.
    }
    if (std::isnan(a))
    {
      return true; // Assume *any* non-NaN value is greater than NaN.
    }
    return (a < b);
  }
};
void vector_max(std::vector<double> v, double &max, int &imax){
    std::vector<double>::iterator v_iter;
    v_iter = std::max_element<std::vector<double>::iterator, NaNAwareLess<double> >(v.begin(),v.end(),NaNAwareLess<double>());
    imax = std::distance(v.begin(), v_iter);
    max  = *v_iter;
}
int main(){
    std::vector<double> v_vec;
    std::vector<double>::iterator v_vec_iter;
    int imax;
    double val;
    std::cout << "test 1. " << std::endl;
    v_vec.push_back( -33.0 );
    v_vec.push_back( -124.0 );
    v_vec.push_back( -31.0 );
    v_vec.push_back( 18.4 );
    vector_max(v_vec,val,imax);
    std::cout << "max(v_vec) = " << val << std::endl;
    std::cout << "indmax(v_vec) = " << imax << std::endl;
    std::cout << "test 2: my case. " << std::endl;
    v_vec.clear();
    v_vec.push_back( NAN );
    v_vec.push_back( NAN );
    v_vec.push_back( NAN );
    v_vec.push_back( -33.0 );
    v_vec.push_back( -124.0 );
    v_vec.push_back( -31.0 );
    v_vec.push_back( 31.0 );
    vector_max(v_vec,val,imax);
    std::cout << "max(v_vec) = " << val << std::endl;
    std::cout << "indmax(v_vec) = " << imax << std::endl;
    std::cout << std::boolalpha << std::less<double>()(NAN, -33.0) << std::endl;
    std::cout << std::boolalpha << std::less<double>()(-33.0, NAN) << std::endl;
};

我认为你不能避免打电话给isnan. 还有另一个重要方面：根据个人经验，我发现对NAN值执行操作比对任何其他值执行操作要慢得多（可能是因为 FPU 异常处理）。因此，虽然使用isnan可能会很烦人，但它也可能对性能产生相当大的积极影响。