以有效的方式查找一组边中的现有圆

Finding existing circles in a set of edges in an efficient manner

本文关键字：一组方式有效查找更新时间：2023-10-16

摘要

给定一组边(从3到1000000)，高效地组装闭环(为了方便起见，我称之为圆)。我正在将其作为一个更大的Python项目的一部分运行，因此我希望最好的解决方案将使用带有Python绑定的C++/CUDA编写(这是我尝试过的)。

问题

组装环，给定一组满足以下条件的边(由两个顶点索引定义)：

没有杂散边(所有边都用于创建圆)
所有环闭合(例如，如果存在边1-2，则存在将闭合它的边n-1)
边具有无序索引(例如，边可以是1-2或2-1)

我的一般方法

拾取列表后面的边，并将一个顶点设置为新环的起点(pStart)，将另一个顶点设定为链中的下一点(pEnd)
在边缘列表中搜索CCD_ 3
一旦找到边就将pEnd更新为不等于pEnd的顶点并将其添加到环列表中
重复上述两个步骤，直到CCD_
如果没有更多的边停止，否则重复上面的步骤

我的实现

在C++中，我实现了一个串行和并行版本。我使用一组45000条边对其进行了测试，并获得了以下结果：

串行(105秒)
平行-CUDA推力(28秒)

序列号：

#include <algorithm>
#include <vector>
#include <string>
#include <boost/algorithm/string.hpp>
#include <fstream>
#include <iostream>
#include <chrono>
std::vector< std::vector<int> > rings_from_edges(std::vector<std::vector<int>> edges)
{
int pStart, pEnd;
std::vector<int> temp;
std::vector<std::vector<int>> rings;
temp = edges.back();
edges.pop_back();
pStart = temp[0];
pEnd = temp[1];
int p1, p2;
while(not edges.empty())
// Scan edges list until pEnd is found.
for(auto const& pts: edges)
{
p1 = pts[0];
p2 = pts[1];
// Check if the start of the edge corresponds with the end of the ring.
if(p1 == pEnd)
{
temp.push_back(p2);
pEnd = p2;
edges.erase(std::remove(edges.begin(), edges.end(), pts), edges.end());
// Check if the beginning of the ring is the same as the end of the newly appended edge.
if (p2 == pStart)
{
// Add the newly created ring to the rings list.
rings.push_back(temp);
temp.clear();
// If the edges list contains more edges, reset the starting and end points to search for a new ring.
if(not edges.empty())
{
temp = edges.back();
edges.pop_back();
pStart = temp[0];
pEnd = temp[1];
}
}
break;
}
// Check if the end of the edge corresponds with the end of the ring.
else if(p2 == pEnd)
{
temp.push_back(p1);
pEnd = p1;
edges.erase(std::remove(edges.begin(), edges.end(), pts), edges.end());
// Check if the beginning of the ring is the same as the end of the newly appended edge.
if (p1 == pStart)
{
// Add the newly created ring to the rings list.
rings.push_back(temp);
temp.clear();
// If the edges list contains more edges, reset the starting and end points to search for a new ring.
if(not edges.empty())
{
temp = edges.back();
edges.pop_back();
pStart = temp[0];
pEnd = temp[1];
}
}
break;
}
}
return rings;
}
int main() {
std::chrono::steady_clock::time_point t1 = std::chrono::steady_clock::now();
std::vector< std::vector<int> > vectIN, vectOUT;

std::string fileName = "PATH TO CSV FILE";
std::string delimeter = ",";
std::ifstream file(fileName);
std::string line = "";
while (getline(file, line))
{
std::vector<std::string> vec;
boost::algorithm::split(vec, line, boost::is_any_of(delimeter));
std::vector<int> vec2;
vec2.emplace_back(std::stoi(vec.data()[0]));
vec2.emplace_back(std::stoi(vec.data()[1]));
vectIN.push_back(vec2);
}
file.close();
std::chrono::steady_clock::time_point t2 = std::chrono::steady_clock::now();
vectOUT = rings_from_edges(vectIN);
std::chrono::steady_clock::time_point t3 = std::chrono::steady_clock::now();
for (auto const& ring:vectOUT)
{
for(auto const& pt:ring)
{
if(pt>=0)
std::cout << pt << " ";
}
std::cout << std::endl;
}
std::chrono::steady_clock::time_point t4 = std::chrono::steady_clock::now();
long t1_t2 = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
long t2_t3 = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2).count();
long t3_t4 = std::chrono::duration_cast<std::chrono::milliseconds>(t4 - t3).count();
std::cout << "Load csv:      " << t1_t2 << std::endl;
std::cout << "Ring assembly: " << t2_t3 << std::endl;
std::cout << "Output:        " << t3_t4 << std::endl;
std::cout << "----------------- THAT'S ALL FOLKS!!! -----------------" << std::endl;
return 0;
}

CUDA推力版本：

#include <thrust/remove.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/tuple.h>
#include <thrust/copy.h>
#include <thrust/sort.h>
#include <thrust/find.h>
#include <thrust/functional.h>
#include <iostream>
#include <string>
#include <vector>
#include <fstream>
#include <iterator>
#include <algorithm>
#include <boost/algorithm/string.hpp>
#include <chrono>
#include <thrust/binary_search.h>
#include <thrust/uninitialized_copy.h>
#include <thrust/device_malloc.h>
#include <thrust/device_vector.h>

int main(){
std::chrono::steady_clock::time_point t1 = std::chrono::steady_clock::now();
std::string fileName = "PATH TO CSV HERE";
std::string delimeter = ",";
std::ifstream file(fileName);
std::vector<std::vector<int>> vectIN;
std::string line = "";
while (getline(file, line))
{
std::vector<std::string> vec;
boost::algorithm::split(vec, line, boost::is_any_of(delimeter));
std::vector<int> vec2;
vec2.emplace_back(std::stoi(vec.data()[0]));
vec2.emplace_back(std::stoi(vec.data()[1]));
vectIN.push_back(vec2);
}
file.close();
std::chrono::steady_clock::time_point t2 = std::chrono::steady_clock::now();

std::vector<int> h_edge1, h_edge2;
h_edge1.reserve(vectIN.size());
h_edge2.reserve(vectIN.size());
for(auto const& pts: vectIN)
{
h_edge1.emplace_back(pts[0]);
h_edge2.emplace_back(pts[1]);
}
std::chrono::steady_clock::time_point t3 = std::chrono::steady_clock::now();
thrust::device_vector<int> d_pStart(1);
thrust::device_vector<int> d_pEnd(1);
thrust::host_vector<int> h_rings;
thrust::device_vector<int> d_rings;
// Initialize edge vectors / pStart / pEnd /  while minimizing copying with CudaMalloc
thrust::device_vector<int> d_edge1(vectIN.size());
thrust::device_vector<int> d_edge2(vectIN.size());
thrust::copy(thrust::make_zip_iterator(thrust::make_tuple(h_edge1.begin(), h_edge2.begin())),
thrust::make_zip_iterator(thrust::make_tuple(h_edge1.end(),   h_edge2.end())),
thrust::make_zip_iterator(thrust::make_tuple(d_edge1.begin(), d_edge2.begin())));
// Arrange edges with edge1 as key and edge2 as value
thrust::sort_by_key(d_edge1.begin(), d_edge1.end(), d_edge2.begin());
d_rings.push_back(d_edge1.back());
d_rings.push_back(d_edge2.back());
d_edge1.pop_back();
d_edge2.pop_back();
d_pStart[0] = d_rings[0];
d_pEnd[0] = d_rings[1];
thrust::device_vector<int> element(1), p1(1), p2(1);
while(not d_edge1.empty())
{
element.clear();
int temp = d_pEnd[0];
auto iter1 = thrust::equal_range(thrust::device, d_edge1.begin(), d_edge1.end(), temp);

if(iter1.first != iter1.second)
{
element[0] = thrust::distance(d_edge1.begin(), iter1.first);
}
else
{
auto iter2 = thrust::find(thrust::device, d_edge2.begin(), d_edge2.end(), d_pEnd[0]);
element[0] = thrust::distance(d_edge2.begin(), iter2);
}
// EDGE START INDEX (P1) AND END INDEX (P2)
p1[0] = d_edge1[element[0]];
p2[0] = d_edge2[element[0]];
// ERASE THE EDGE FROM DEVICE LIST
d_edge1.erase(d_edge1.begin()+element[0]);
d_edge2.erase(d_edge2.begin()+element[0]);
if(p1[0] == d_pEnd[0])
{
d_pEnd[0] = p2[0];
if( d_pStart[0] == d_pEnd[0])
{
d_rings.push_back(-p2[0]);
if(not d_edge1.empty())
{
d_pStart[0] = d_edge1.back();
d_pEnd[0]   = d_edge2.back();
d_rings.push_back(d_pStart[0]);
d_rings.push_back(d_pEnd[0]);
d_edge1.pop_back();
d_edge2.pop_back();
}
}
else
{
d_rings.push_back(p2[0]);
}
}
else if(p2[0] == d_pEnd[0])
{
d_pEnd[0] = p1[0];
if(d_pStart[0] == d_pEnd[0])
{
d_rings.push_back(-p1[0]);
if(not d_edge1.empty())
{
d_pStart[0] = d_edge1.back();
d_pEnd[0]   = d_edge2.back();
d_rings.push_back(d_pStart[0]);
d_rings.push_back(d_pEnd[0]);
d_edge1.pop_back();
d_edge2.pop_back();
}
}
else
{
d_rings.push_back(p1[0]);
}
}
}
std::chrono::steady_clock::time_point t4 = std::chrono::steady_clock::now();
// Copy rings to host and print them.
h_rings = d_rings;
for(auto const& pt:h_rings)
{
if(pt>=0)
std::cout << pt << " ";
else
std::cout << -pt << std::endl;
}
std::cout << std::endl;
long t1_t2 = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
long t2_t3 = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2).count();
long t3_t4 = std::chrono::duration_cast<std::chrono::milliseconds>(t4 - t3).count();
std::cout << "Load csv:      " << t1_t2 << std::endl;
std::cout << "Create vector: " << t2_t3 << std::endl;
std::cout << "Ring assembly: " << t3_t4 << std::endl;
std::cout << "----------------- THAT'S ALL FOLKS!!! -----------------" << std::endl;
return 0;
}

其他

我已经实现了类似于上面CUDA代码的东西，但将数据组织到桶中，这样只需要对有限数量的数据进行搜索。不幸的是，我还没有让它完全发挥作用。

最近，我一直在研究绘制库的图形，看看我是否可以这样做，但我也没有成功地将这种方式应用到工作中。我知道CUDA工具包有一个和助推。

最后备注

我想至少在10秒内完成这项工作，但理想情况下，我希望在一秒钟内完成一百万次边缘。我不知道这是否现实，但我希望用Cuda加速它可以实现这一点，或者一起找到一种不同的算法。我想看看是否有人能帮助我实现这一目标。

我打赌，Hierholzer算法的串行C++实现可以在不到一秒钟的时间内在10^6边上运行Euler循环，这没有问题，因为渐近运行时间是O(|E|)。考虑到这个教程，我们仍然需要将其分解为简单的循环，我们可以使用这样的Python代码来完成(警告：未经测试)。

def simple_cycles(tour_vertices):
stack = []
index = {}
for v in tour_vertices:
stack.append(v)
i = index.get(v)
if i is None:
index[v] = len(stack) - 1
continue
yield stack[i:]
for w in stack[i+1:]:
del index[w]
del stack[i+1:]

以下是我所想的完整的C++代码。编译，但在其他方面完全未经测试。绝对没有任何保证。

#include <list>
#include <stack>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
struct HalfEdge {
int head;
std::list<HalfEdge>::iterator opposite;
};
std::vector<std::vector<int>> SimpleCyclesFromEdges(
const std::vector<std::pair<int, int>>& edges) {
std::unordered_map<int, std::list<HalfEdge>> graph;
for (const auto& edge : edges) {
auto& first_neighbors = graph[edge.first];
auto& second_neighbors = graph[edge.second];
auto forward = first_neighbors.emplace(first_neighbors.begin());
auto backward = second_neighbors.emplace(second_neighbors.begin());
*forward = {edge.second, backward};
*backward = {edge.first, forward};
}
std::vector<std::vector<int>> simple_cycles;
for (auto& item : graph) {
int v = item.first;
std::unordered_set<int> on_stack;
std::stack<int> stack;
while (true) {
if (on_stack.insert(v).second) {
stack.push(v);
} else {
std::vector<int> cycle = {v};
while (stack.top() != v) {
cycle.push_back(stack.top());
on_stack.erase(stack.top());
stack.pop();
}
simple_cycles.push_back(std::move(cycle));
}
auto& neighbors = graph[v];
if (neighbors.empty()) {
break;
}
auto forward = neighbors.begin();
v = forward->head;
graph[v].erase(forward->opposite);
neighbors.erase(forward);
}
}
return simple_cycles;
}