自定义类向量的类函数的分割错误

Segmentation fault for class function of vector of custom class

本文关键字:分割 错误 类函数 自定义 向量      更新时间:2023-10-16

我使用以下代码在鸢尾花数据集上运行kmeans算法 - https://github.com/marcoscastro/kmeans/blob/master/kmeans.cpp

我已经修改了上面的代码以从文件中读取输入。下面是我的代码 -

#include <iostream>
#include <vector>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include <algorithm>
#include <fstream>
using namespace std;
class Point
{
private:
    int id_point, id_cluster;
    vector<double> values;
    int total_values;
    string name;
public:
    Point(int id_point, vector<double>& values, string name = "")
    {
        this->id_point = id_point;
        total_values = values.size();
        for(int i = 0; i < total_values; i++)
            this->values.push_back(values[i]);
        this->name = name;
        this->id_cluster = -1;
    }
    int getID()
    {
        return id_point;
    }
    void setCluster(int id_cluster)
    {
        this->id_cluster = id_cluster;
    }
    int getCluster()
    {
        return id_cluster;
    }
    double getValue(int index)
    {
        return values[index];
    }
    int getTotalValues()
    {
        return total_values;
    }
    void addValue(double value)
    {
        values.push_back(value);
    }
    string getName()
    {
        return name;
    }
};
class Cluster
{
private:
    int id_cluster;
    vector<double> central_values;
    vector<Point> points;
public:
    Cluster(int id_cluster, Point point)
    {
        this->id_cluster = id_cluster;
        int total_values = point.getTotalValues();
        for(int i = 0; i < total_values; i++)
            central_values.push_back(point.getValue(i));
        points.push_back(point);
    }
    void addPoint(Point point)
    {
        points.push_back(point);
    }
    bool removePoint(int id_point)
    {
        int total_points = points.size();
        for(int i = 0; i < total_points; i++)
        {
            if(points[i].getID() == id_point)
            {
                points.erase(points.begin() + i);
                return true;
            }
        }
        return false;
    }
    double getCentralValue(int index)
    {
        return central_values[index];
    }
    void setCentralValue(int index, double value)
    {
        central_values[index] = value;
    }
    Point getPoint(int index)
    {
        return points[index];
    }
    int getTotalPoints()
    {
        return points.size();
    }
    int getID()
    {
        return id_cluster;
    }
};
class KMeans
{
private:
    int K; // number of clusters
    int total_values, total_points, max_iterations;
    vector<Cluster> clusters;
    // return ID of nearest center (uses euclidean distance)
    int getIDNearestCenter(Point point)
    {
        double sum = 0.0, min_dist;
        int id_cluster_center = 0;
        for(int i = 0; i < total_values; i++)
        {
            sum += pow(clusters[0].getCentralValue(i) -
                       point.getValue(i), 2.0);
        }
        min_dist = sqrt(sum);
        for(int i = 1; i < K; i++)
        {
            double dist;
            sum = 0.0;
            for(int j = 0; j < total_values; j++)
            {
                sum += pow(clusters[i].getCentralValue(j) -
                           point.getValue(j), 2.0);
            }
            dist = sqrt(sum);
            if(dist < min_dist)
            {
                min_dist = dist;
                id_cluster_center = i;
            }
        }
        return id_cluster_center;
    }
public:
    KMeans(int K, int total_points, int total_values, int max_iterations)
    {
        this->K = K;
        this->total_points = total_points;
        this->total_values = total_values;
        this->max_iterations = max_iterations;
    }
    void run(vector<Point> & points)
    {
        if(K > total_points)
            return;
        vector<int> prohibited_indexes;
                printf("Inside run n"); 
        // choose K distinct values for the centers of the clusters
        printf(" K distinct clustern");
        for(int i = 0; i < K; i++)
        {
            while(true)
            {
                int index_point = rand() % total_points;
                if(find(prohibited_indexes.begin(), prohibited_indexes.end(),
                        index_point) == prohibited_indexes.end())
                {
                    printf("i= %dn",i);
                    prohibited_indexes.push_back(index_point);
                    points[index_point].setCluster(i);
                    Cluster cluster(i, points[index_point]);
                    clusters.push_back(cluster);
                    break;
                }
            }
        }
        int iter = 1;
        printf(" Each point to nearest  clustern");
        while(true)
        {
            bool done = true;
            // associates each point to the nearest center
            for(int i = 0; i < total_points; i++)
            {
                int id_old_cluster = points[i].getCluster();
                int id_nearest_center = getIDNearestCenter(points[i]);
                if(id_old_cluster != id_nearest_center)
                {
                    if(id_old_cluster != -1)
                        clusters[id_old_cluster].removePoint(points[i].getID());
                    points[i].setCluster(id_nearest_center);
                    clusters[id_nearest_center].addPoint(points[i]);
                    done = false;
                }
            }
            // recalculating the center of each cluster
            for(int i = 0; i < K; i++)
            {
                for(int j = 0; j < total_values; j++)
                {
                    int total_points_cluster = clusters[i].getTotalPoints();
                    double sum = 0.0;
                    if(total_points_cluster > 0)
                    {
                        for(int p = 0; p < total_points_cluster; p++)
                            sum += clusters[i].getPoint(p).getValue(j);
                        clusters[i].setCentralValue(j, sum / total_points_cluster);
                    }
                }
            }
            if(done == true || iter >= max_iterations)
            {
                cout << "Break in iteration " << iter << "nn";
                break;
            }
            iter++;
        }
        // shows elements of clusters
        for(int i = 0; i < K; i++)
        {
            int total_points_cluster =  clusters[i].getTotalPoints();
            cout << "Cluster " << clusters[i].getID() + 1 << endl;
            for(int j = 0; j < total_points_cluster; j++)
            {
                cout << "Point " << clusters[i].getPoint(j).getID() + 1 << ": ";
                for(int p = 0; p < total_values; p++)
                    cout << clusters[i].getPoint(j).getValue(p) << " ";
                string point_name = clusters[i].getPoint(j).getName();
                if(point_name != "")
                    cout << "- " << point_name;
                cout << endl;
            }
            cout << "Cluster values: ";
            for(int j = 0; j < total_values; j++)
                cout << clusters[i].getCentralValue(j) << " ";
            cout << "nn";
        }
    }
};
int main(int argc, char *argv[])
{
    srand(time(NULL));
    int total_points, total_values, K, max_iterations, has_name;
    ifstream inFile("datafile.txt");
    if (!inFile) {
        cerr << "Unable to open file datafile.txt";
        exit(1);   // call system to stop
        }
        inFile >> total_points >> total_values >> K >> max_iterations >> has_name;
        cout << "Details- n";
        vector<Point> points;
        string point_name,str;
        int i=0;
        while(inFile.eof())
        {
            string temp;
            vector<double> values;
            for(int j = 0; j < total_values; j++)
                {
                double value;
                inFile >> value;
                values.push_back(value);
                } 
            if(has_name)
            {
                inFile >> point_name;
                Point p(i, values, point_name);
                points.push_back(p);
                i++;  
            }
            else
            {   
                inFile >> temp;
                Point p(i, values);
                points.push_back(p);
                i++; 
            }
        }
    inFile.close();
    KMeans kmeans(K, total_points, total_values, max_iterations);
    kmeans.run(points);
    return 0;
}

代码的输出是 -

 Details- 
 15043100000Inside run
 K distinct cluster i= 0  
 Segmentation fault

当我在 gdb 中运行时,显示的错误是 -

Program received signal SIGSEGV, Segmentation fault.
0x0000000000401db6 in Point::setCluster (this=0x540, id_cluster=0)
    at kmeans.cpp:41
41                      this->id_cluster = id_cluster;

我被困在这一点上,因为我找不到这个分段错误的原因。

我的数据集文件如下所示 -

150 4 3 10000 1
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa 
. . .
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor 
. . .

KMeans::run(vector<Point>&)你称呼points[index_point].setCluster(i);,而不保证index_point在范围内。

index_pointint index_point = rand() % total_points;确定,total_points从输入文件"datafile.txt"中检索,可以是任何内容。它当然不必匹配points.size(),但它应该。确保它这样做,或者只是使用points.size()

有点题外话,但使用 rand() 并且只使用模几乎总是错误的。如果您使用 C++11 或更高版本,请考虑使用 std::uniform_int_distribution。

points[index_point].setCluster(i);可能会越界访问向量。您引用的代码实际上总是在调用run之前在向量points中设置total_points数,而您修改后的代码只是读取直到文件末尾,并且不能保证传递给KMeans构造函数的总点数与points中的条目值匹配。修复文件 I/O 或修复边界检查逻辑。