自定义类向量的类函数的分割错误
Segmentation fault for class function of vector of custom class
我使用以下代码在鸢尾花数据集上运行kmeans算法 - https://github.com/marcoscastro/kmeans/blob/master/kmeans.cpp
我已经修改了上面的代码以从文件中读取输入。下面是我的代码 -
#include <iostream>
#include <vector>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include <algorithm>
#include <fstream>
using namespace std;
class Point
{
private:
int id_point, id_cluster;
vector<double> values;
int total_values;
string name;
public:
Point(int id_point, vector<double>& values, string name = "")
{
this->id_point = id_point;
total_values = values.size();
for(int i = 0; i < total_values; i++)
this->values.push_back(values[i]);
this->name = name;
this->id_cluster = -1;
}
int getID()
{
return id_point;
}
void setCluster(int id_cluster)
{
this->id_cluster = id_cluster;
}
int getCluster()
{
return id_cluster;
}
double getValue(int index)
{
return values[index];
}
int getTotalValues()
{
return total_values;
}
void addValue(double value)
{
values.push_back(value);
}
string getName()
{
return name;
}
};
class Cluster
{
private:
int id_cluster;
vector<double> central_values;
vector<Point> points;
public:
Cluster(int id_cluster, Point point)
{
this->id_cluster = id_cluster;
int total_values = point.getTotalValues();
for(int i = 0; i < total_values; i++)
central_values.push_back(point.getValue(i));
points.push_back(point);
}
void addPoint(Point point)
{
points.push_back(point);
}
bool removePoint(int id_point)
{
int total_points = points.size();
for(int i = 0; i < total_points; i++)
{
if(points[i].getID() == id_point)
{
points.erase(points.begin() + i);
return true;
}
}
return false;
}
double getCentralValue(int index)
{
return central_values[index];
}
void setCentralValue(int index, double value)
{
central_values[index] = value;
}
Point getPoint(int index)
{
return points[index];
}
int getTotalPoints()
{
return points.size();
}
int getID()
{
return id_cluster;
}
};
class KMeans
{
private:
int K; // number of clusters
int total_values, total_points, max_iterations;
vector<Cluster> clusters;
// return ID of nearest center (uses euclidean distance)
int getIDNearestCenter(Point point)
{
double sum = 0.0, min_dist;
int id_cluster_center = 0;
for(int i = 0; i < total_values; i++)
{
sum += pow(clusters[0].getCentralValue(i) -
point.getValue(i), 2.0);
}
min_dist = sqrt(sum);
for(int i = 1; i < K; i++)
{
double dist;
sum = 0.0;
for(int j = 0; j < total_values; j++)
{
sum += pow(clusters[i].getCentralValue(j) -
point.getValue(j), 2.0);
}
dist = sqrt(sum);
if(dist < min_dist)
{
min_dist = dist;
id_cluster_center = i;
}
}
return id_cluster_center;
}
public:
KMeans(int K, int total_points, int total_values, int max_iterations)
{
this->K = K;
this->total_points = total_points;
this->total_values = total_values;
this->max_iterations = max_iterations;
}
void run(vector<Point> & points)
{
if(K > total_points)
return;
vector<int> prohibited_indexes;
printf("Inside run n");
// choose K distinct values for the centers of the clusters
printf(" K distinct clustern");
for(int i = 0; i < K; i++)
{
while(true)
{
int index_point = rand() % total_points;
if(find(prohibited_indexes.begin(), prohibited_indexes.end(),
index_point) == prohibited_indexes.end())
{
printf("i= %dn",i);
prohibited_indexes.push_back(index_point);
points[index_point].setCluster(i);
Cluster cluster(i, points[index_point]);
clusters.push_back(cluster);
break;
}
}
}
int iter = 1;
printf(" Each point to nearest clustern");
while(true)
{
bool done = true;
// associates each point to the nearest center
for(int i = 0; i < total_points; i++)
{
int id_old_cluster = points[i].getCluster();
int id_nearest_center = getIDNearestCenter(points[i]);
if(id_old_cluster != id_nearest_center)
{
if(id_old_cluster != -1)
clusters[id_old_cluster].removePoint(points[i].getID());
points[i].setCluster(id_nearest_center);
clusters[id_nearest_center].addPoint(points[i]);
done = false;
}
}
// recalculating the center of each cluster
for(int i = 0; i < K; i++)
{
for(int j = 0; j < total_values; j++)
{
int total_points_cluster = clusters[i].getTotalPoints();
double sum = 0.0;
if(total_points_cluster > 0)
{
for(int p = 0; p < total_points_cluster; p++)
sum += clusters[i].getPoint(p).getValue(j);
clusters[i].setCentralValue(j, sum / total_points_cluster);
}
}
}
if(done == true || iter >= max_iterations)
{
cout << "Break in iteration " << iter << "nn";
break;
}
iter++;
}
// shows elements of clusters
for(int i = 0; i < K; i++)
{
int total_points_cluster = clusters[i].getTotalPoints();
cout << "Cluster " << clusters[i].getID() + 1 << endl;
for(int j = 0; j < total_points_cluster; j++)
{
cout << "Point " << clusters[i].getPoint(j).getID() + 1 << ": ";
for(int p = 0; p < total_values; p++)
cout << clusters[i].getPoint(j).getValue(p) << " ";
string point_name = clusters[i].getPoint(j).getName();
if(point_name != "")
cout << "- " << point_name;
cout << endl;
}
cout << "Cluster values: ";
for(int j = 0; j < total_values; j++)
cout << clusters[i].getCentralValue(j) << " ";
cout << "nn";
}
}
};
int main(int argc, char *argv[])
{
srand(time(NULL));
int total_points, total_values, K, max_iterations, has_name;
ifstream inFile("datafile.txt");
if (!inFile) {
cerr << "Unable to open file datafile.txt";
exit(1); // call system to stop
}
inFile >> total_points >> total_values >> K >> max_iterations >> has_name;
cout << "Details- n";
vector<Point> points;
string point_name,str;
int i=0;
while(inFile.eof())
{
string temp;
vector<double> values;
for(int j = 0; j < total_values; j++)
{
double value;
inFile >> value;
values.push_back(value);
}
if(has_name)
{
inFile >> point_name;
Point p(i, values, point_name);
points.push_back(p);
i++;
}
else
{
inFile >> temp;
Point p(i, values);
points.push_back(p);
i++;
}
}
inFile.close();
KMeans kmeans(K, total_points, total_values, max_iterations);
kmeans.run(points);
return 0;
}
代码的输出是 -
Details-
15043100000Inside run
K distinct cluster i= 0
Segmentation fault
当我在 gdb 中运行时,显示的错误是 -
Program received signal SIGSEGV, Segmentation fault.
0x0000000000401db6 in Point::setCluster (this=0x540, id_cluster=0)
at kmeans.cpp:41
41 this->id_cluster = id_cluster;
我被困在这一点上,因为我找不到这个分段错误的原因。
我的数据集文件如下所示 -
150 4 3 10000 1
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
. . .
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
. . .
KMeans::run(vector<Point>&)
你称呼points[index_point].setCluster(i);
,而不保证index_point
在范围内。
index_point
由int index_point = rand() % total_points;
确定,total_points
从输入文件"datafile.txt"中检索,可以是任何内容。它当然不必匹配points.size()
,但它应该。确保它这样做,或者只是使用points.size()
。
有点题外话,但使用 rand()
并且只使用模几乎总是错误的。如果您使用 C++11 或更高版本,请考虑使用 std::uniform_int_distribution。
points[index_point].setCluster(i);
可能会越界访问向量。您引用的代码实际上总是在调用run
之前在向量points
中设置total_points
数,而您修改后的代码只是读取直到文件末尾,并且不能保证传递给KMeans
构造函数的总点数与points
中的条目值匹配。修复文件 I/O 或修复边界检查逻辑。
相关文章:
- C++映射分割错误(核心转储)
- 由cin中的字符串中未捕获空白引起的分割错误
- 删除映射和分割错误中的一个过去结束元素
- 在指向函数中读取变量时出现分割错误
- 在链表中的第 n 位插入显示分割错误
- 较高值 n 的分割错误(例如 n=999997)
- 尝试通过memcpy复制大尺寸浮点向量时的分割错误
- 分割错误:向量中的擦除功能
- 向量向量的分割错误
- 我在C++中编写了一个方法来打印树类的预序,但它显示了分割错误
- C ++分割错误,为什么使用"long long"我没有得到答案?
- 在尝试使用递归查找集合子集的总数时,我遇到了分割错误
- 分割错误 11:尝试使用 cin 输入 B[1] 时
- 集合布局上的 Qt 分割错误
- 高达20亿的筛子会产生分割错误
- 对向量使用推回函数时的分割错误
- 绘制精灵会导致分割错误
- 将矢量的整数内容打印为字符串会导致分割错误
- 分割错误:使用向量时为 11 c++
- 结构的分割错误错误