矢量迭代器在尝试对图形进行深度优先搜索时导致分割错误 11

Vector Iterator causes Segmentation Fault 11 while attempting Depth First Search on a graph

本文关键字：深度优先搜索分割错误迭代器图形更新时间：2023-10-16

我正在尝试使用基于矢量迭代器的DFS来计算生成的图形的最大连接集群。然而，当图是一定的大小（种子概率高于0.4）时，DFS函数处会发生分割错误11。

#include <iostream>
#include <vector>
#include <random>
float seedingProbability = 0.6;
int latticeSize = 1024;
int graphSize;
bool *visited;
long largestClusterSize = 0;
std::vector<int> *graph;
std::vector<int> currentCluster;
std::uniform_real_distribution<float> unif(0, 1);
std::random_device rd;
std::mt19937 rand_engine(rd());
//Is a random number between 0 and 1 higher than (1-seeding probability)
bool isCriticalThreshold() {
    float r = unif(rand_engine); //a random float between 0 and 1
    bool isCritical = r > (1-seedingProbability);
    return isCritical;
}
//This helper function is used to determine the site index
int siteFor(int x, int y) {
    return (latticeSize*x)+y;
}
//Add an undirected edge to the graph between two sites
void addEdge(int site1, int site2) {
    graph[site1].push_back(site2);
    graph[site2].push_back(site1);
}
void DFS(int v) {
    visited[v] = true;
    currentCluster.push_back(v);
    for (std::vector<int>::iterator it = graph[v].begin(); it != graph[v].end(); it++) {
        if(!visited[*it]) {
            DFS(*it);
        }
    }
}
void connectedComponents() {
    for (int i=0; i < (graphSize); i++) {
        visited[i] = false;
    }
    for (int i=0; i<(graphSize); i++) {
        if (visited[i] == false) {
            currentCluster.clear();
            DFS(i);
            long clusterSize = currentCluster.size();
            if (clusterSize > largestClusterSize) {
                largestClusterSize = clusterSize;
            }
        }
    }
}
void createGraph() {
    for (int x = 0; x < latticeSize; x++) {
        for (int y = 0; y < latticeSize; y++) {
            //Down bond
            if (isCriticalThreshold()) {
                if (y == latticeSize-1) { addEdge(siteFor(x, y), siteFor(x, 0)); }
                else { addEdge(siteFor(x, y), siteFor(x, y+1)); }
            }
            //Right bond
            if (isCriticalThreshold()) {
                if (x == latticeSize-1) { addEdge(siteFor(x, y), siteFor(0, y)); }
                else { addEdge(siteFor(x, y), siteFor(x+1, y)); }
            }
        }
    }
}
int main(int argc, const char * argv[]) {
    std::cout << "Running..." << std::endl;
    graphSize = latticeSize*latticeSize;
    graph = new std::vector<int>[graphSize];
    visited = new bool[graphSize];
    createGraph();
    connectedComponents();
    std::cout << "Largest cluster: " << largestClusterSize << std::endl;
    return 0;
}

我基于DFS的依据： http://www.geeksforgeeks.org/connected-components-in-an-undirected-graph/

正如jszpilewski所指出的，主要问题是你可能耗尽了堆栈内存。像DFS这样的递归算法可以非常快速地填满堆栈。您可以将非递归 DFS 算法与显式托管堆栈一起使用。

我根据您的代码整理了一个示例程序来说明这一点。我冒昧地用对象替换原始指针，用 std::vector s 替换数组。我认为这在内存管理方面要安全得多。每个节点都是一个简单的struct，x和y坐标作为晶格上的位置。主要变化是 DFS() ，它显式管理每个未访问节点的堆栈。请随时编辑此内容以满足您的需求。我已经用高达 4096 的 latticeSize 测试了代码，它可以工作。请注意，Node对象只需要存储到所有目标节点的边缘，而不需要存储到源节点的边缘。

#include <iostream>
#include <vector>
#include <random>
#include <functional>
#include <stack>
class Node;
typedef std::reference_wrapper<Node> NodeRef;
float seedingProbability = 0.8;
size_t latticeSize = 4096;
size_t largestClusterSize = 0;
std::vector<size_t> clusters;
std::vector<std::vector<Node>> graph;
std::uniform_real_distribution<float> unif(0.0, 1.0);
std::random_device rd;
std::mt19937 rand_engine(rd());
struct Node
{
    size_t x;
    size_t y;
    bool visited;
    std::vector<NodeRef> targets;
    // Add edges
    void addEdge(Node& _target)
    {
        targets.emplace_back(_target);
    }
    explicit Node(size_t _x, size_t _y)
        :
          x(_x),
          y(_y),
          visited(false)
    {}
};
// Is a random number between 0 and 1 lower than the seeding probability
bool isCriticalThreshold()
{
    return unif(rand_engine) < seedingProbability;
}
void DFS()
{
    for (auto& row : graph)
    {
        for (auto& node : row)
        {
            node.visited = false;
        }
    }
    for (auto& row : graph)
    {
        for (auto& node : row)
        {
            if (!node.visited)
            {
                size_t clusterSize(0);
                std::stack<NodeRef> stack;
                stack.push(node);
                while (!stack.empty())
                {
                    Node& cur_node(stack.top());
                    stack.pop();
                    if (!cur_node.visited)
                    {
                        cur_node.visited = true;
                        ++clusterSize;
                        for (auto& tgt : cur_node.targets)
                        {
                            stack.push(tgt);
                        }
                    }
                }
                clusters.push_back(clusterSize);
            }
        }
    }
}
void connectedComponents() 
{
    largestClusterSize = 0;
    for (const auto& clusterSize : clusters)
    {
        if (clusterSize > largestClusterSize)
        {
            largestClusterSize = clusterSize;
        }
    }
}
void createGraph()
{
    // Generate the lattice
    for (size_t x = 0; x < latticeSize; ++x)
    {
        graph.emplace_back(std::vector<Node>());
        for (size_t y = 0; y < latticeSize; ++y)
        {
            graph.back().emplace_back(x, y);
        }
    }
    // Add edges
    for (size_t  x = 0; x < latticeSize; ++x)
    {
        for (size_t y = 0; y < latticeSize; ++y)
        {
            // Down bond
            if (isCriticalThreshold())
            {
                graph.at(x).at(y).addEdge(graph.at(x).at((y + 1) % latticeSize));
            }
            // Right bond
            if (isCriticalThreshold())
            {
                graph.at(x).at(y).addEdge(graph.at((x + 1) % latticeSize).at(y));
            }
        }
    }
}
int main(int argc, const char * argv[]) {
    std::cout << "Running..." << std::endl;
    createGraph();
    DFS();
    connectedComponents();
    //  std::cout << "Cluster sizes:n";
    //  for (size_t c = 0; c < clusters.size(); ++c)
    //  {
    //      std::cout << "tCluster " << c << ": " << clusters.at(c) << "n";
    //  }
    std::cout << "Largest cluster: " << largestClusterSize << std::endl;
    return 0;
}

堆栈溢出是在大型图上运行递归 DPS 时最典型的问题。在您的情况下，您还需要构建一个辅助数据收集currentCluster因此请检查您的程序是否没有用完 32 位内存空间（大约 2-4 GB，具体取决于操作系统）。因此，如果您确实在 32 位模式下编译，请尝试生成和测试程序的 64 位版本。