通过在迭代指针键映射上出错来捕获非确定性

Catch nondeterminism by error'ing on iterating pointer-keyed maps

本文关键字：出错确定性非确定映射迭代指针更新时间：2023-10-16

我们已经有几次在我正在处理的代码库中发现了非确定性问题，到目前为止，它几乎是使用std::[unordered_]map/set<T*,U>引起的，其中键是一个指针，结合映射上的迭代，通常以基于范围的 for 循环的形式(因为指针值可能会在执行之间发生变化，迭代顺序是不确定的)。

我想知道当在这样的容器上调用begin()时，是否有一些黑色模板魔法可以用来注入static_assert。我认为begin()是执行此操作的最佳位置，或者可能是iterator::operator++，因为否则构造迭代器(例如find()的结果)是可以的。

我以为我可以重载std::begin，但是基于范围的循环规则规定，如果存在，则使用.begin()。所以，我没主意了。有没有一个聪明的技巧来做到这一点？

进一步澄清：不涉及自定义比较器，指针的直接值(即目标对象的地址)是关键。这对于插入和查找很好，并且仅在迭代容器时成为问题，因为顺序基于不可预测的指针值。我正在尝试在大型现有代码库中找到这样的现有案例。

您几乎可以通过部分专业化实现所需的行为：

20.5.4.2.1 除非另有说明，否则如果C++程序将声明或定义添加到命名空间 std 或命名空间 std 内的命名空间，则未定义它的行为。仅当声明依赖于用户定义的类型并且专用化满足原始模板的标准库要求且未明确禁止时，程序才能将任何标准库模板的模板专用化添加到命名空间 std。

因此，可以使用 std：：map 的简单专用化来检测使用指针键类型实例化模板的尝试：

#include <map>
namespace internal
{
// User-defined type trait
template<class Key, class T>
class DefaultAllocator
{
public:
using type = std::allocator<std::pair<const Key, T>>;
};
// Effectively the same as std::allocator, but a different type
template<class T>
class Allocator2 : public std::allocator<T> {};
}
namespace std
{
// Specialization for std::map with a pointer key type and the default allocator.
// The class inherits most of the implementation from
// std::map<Key*, T, Compare, ::internal::Allocator2<std::pair<Key*, T>>>
// to mimic the standard implementation.
template<class Key, class T, class Compare>
class map<Key*, T, Compare, typename ::internal::DefaultAllocator<Key*, T>::type> :
public map<Key*, T, Compare, ::internal::Allocator2<std::pair<Key*, T>>>
{
using base = map<Key*, T, Compare, ::internal::Allocator2<std::pair<Key*, T>>>;
using base::iterator;
using base::const_iterator;
public:
// Overload begin() and cbegin()
iterator begin() noexcept
{
static_assert(false, "OH NOES, A POINTER");
}
const_iterator begin() const noexcept
{
static_assert(false, "OH NOES, A POINTER");
}
const_iterator cbegin() const noexcept
{
static_assert(false, "OH NOES, A POINTER");
}
};
}
int main()
{
std::map<int, int> m1;
std::map<int*, int> m2;
// OK, not a specialization
m1[0] = 42;
for (auto& keyval : m1)
{
(void)keyval;
}
m2[nullptr] = 42;       // Insertion is OK
for (auto& keyval : m2) // static_assert failure
{
(void)keyval;
}
}

然而

我还没有找到一种方法来扩展自定义分配器：专用化的声明必须取决于某些用户定义的类型。
这是一个可怕的难题，所以我只会用它来查找现有案例(而不是保留为静态检查器)。

为指定的指针类型实现编译时失败的一种方法是删除 std：：less、std：：greater、std：：hash 等专用化，这些专用化适用于易受非确定性行为影响的特定指针类型(即由接口返回)。有许多选项可以为指针集合提供"安全"功能。

下面是一个综合示例：

#include <cassert>
#include <memory>
#include <set>
#include <map>
#include <unordered_set>
#include <unordered_map>
#define DISABLE_NUMERIC_POINTER_SPECIALIZATIONS(T) 
namespace std { 
template <> struct hash<const T*> { std::size_t operator()(const T* obj) const = delete; }; 
template <> struct hash<T*> { std::size_t operator()(T* obj) const = delete; }; 
template <> struct less<const T*> { bool operator()(const T* lhs, const T* rhs) const = delete; }; 
template <> struct less<T*> { bool operator()(T* lhs, T* rhs) const = delete; }; 
template <> struct greater<const T*> { bool operator()(const T* lhs, const T* rhs) const = delete; }; 
template <> struct greater<T*> { bool operator()(T* lhs, T* rhs) const = delete; }; 
template <> struct less_equal<const T*> { bool operator()(const T* lhs, const T* rhs) const = delete; }; 
template <> struct less_equal<T*> { bool operator()(T* lhs, T* rhs) const = delete; }; 
template <> struct greater_equal<const T*> { bool operator()(const T* lhs, const T* rhs) const = delete; }; 
template <> struct greater_equal<T*> { bool operator()(T* lhs, T* rhs) const = delete; }; 
}
namespace NS {
class C {
public:
explicit C(int id) : m_id{id} {}
int id() const { return m_id; }
private:
int m_id;
};
inline bool operator ==(const C& lhs, const C& rhs) { return lhs.id() == rhs.id(); }
inline bool operator <(const C& lhs, const C& rhs) { return lhs.id() < rhs.id(); }
}   // namespace NS
namespace std {
template <> struct hash<NS::C> { std::size_t operator()(const NS::C& obj) const { return obj.id(); } };
}
DISABLE_NUMERIC_POINTER_SPECIALIZATIONS(NS::C)
struct IndirectEqual {
template <typename T>
bool operator()(const T* lhs, const T* rhs) const {
return (lhs && rhs) ? *lhs == *rhs : lhs == rhs;
}
};
struct IndirectLess {
template <typename T>
bool operator()(const T* lhs, const T* rhs) const {
return (lhs && rhs) ? *lhs < *rhs : lhs < rhs;
}
};
struct IndirectGreater {
template <typename T>
bool operator()(const T* lhs, const T* rhs) const {
return (lhs && rhs) ? *lhs > *rhs : lhs > rhs;
}
};
struct IndirectHash {
template <typename T>
std::size_t operator()(const T* ptr) const {
return ptr ? std::hash<T>{}(*ptr) : std::numeric_limits<std::size_t>::max();
}
};
struct BuiltinLess {
template <typename T>
bool operator()(const T& lhs, const T& rhs) const { return lhs < rhs; }
};
struct SPLess {
template <typename T>
bool operator()(const std::shared_ptr<T>& lhs, const std::shared_ptr<T>& rhs) const { return lhs.get() < rhs.get(); }
};
struct BuiltinGreater {
template <typename T>
bool operator()(const T& lhs, const T& rhs) const { return lhs < rhs; };
};
struct PtrHash {
template <typename T>
std::size_t operator()(const T* ptr) const { return static_cast<std::size_t>(ptr); };
};
template <typename T>
class BasicSet : private std::set<T, BuiltinLess> {
public:
using std::set<T, BuiltinLess>::set;
using std::set<T, BuiltinLess>::find;
using std::set<T, BuiltinLess>::insert;
using std::set<T, BuiltinLess>::emplace;
using std::set<T, BuiltinLess>::end;
};
template <typename T>
class BasicSet<std::shared_ptr<T>> : private std::set<std::shared_ptr<T>, SPLess> {
public:
using std::set<std::shared_ptr<T>, SPLess>::set;
using std::set<std::shared_ptr<T>, SPLess>::find;
using std::set<std::shared_ptr<T>, SPLess>::insert;
using std::set<std::shared_ptr<T>, SPLess>::emplace;
using std::set<std::shared_ptr<T>, SPLess>::end;
};
int main()
{
// All of these decls result in a compiler error
// std::set<NS::C*> unsafe_s{new NS::C{1}, new NS::C{2}}; 
// std::map<NS::C*, int> unsafe_m{ {new NS::C{1}, 100} };
// std::unordered_set<NS::C*> unsafe_us{new NS::C{1}, new NS::C{2}}; 
// std::unordered_map<NS::C*, int> unsafe_um{ {new NS::C{1}, 123} };
std::set<NS::C*, IndirectLess> s{ new NS::C{1} };
std::unordered_set<NS::C*, IndirectHash> us1{ new NS::C{1} };
std::unordered_set<NS::C*, IndirectHash, IndirectEqual> us2{ new NS::C{1} };
auto c = new NS::C{1};
assert (s.find(c) != s.end());
assert (us1.find(c) == us1.end());  // pointers aren't equal
assert (us2.find(c) != us2.end());  // objects are equal
BasicSet<NS::C*> bs{ new NS::C{1} };
assert (bs.find(c) == bs.end());    // pointers aren't equal 
auto sp1 = std::make_shared<NS::C>(10);
auto sp2 = std::make_shared<NS::C>(20);
BasicSet<std::shared_ptr<NS::C>> spset{sp1, sp2};
assert(spset.find(sp1) != spset.end());
return 0;
}

注意：这并不完美。例如，需要禁用"易失性 T*"和"常量易失性 T*"变体。我敢肯定还有其他问题。