(C++)K-Means聚类问题

(C++) K-Means Clustering trouble

本文关键字:聚类 问题 K-Means C++      更新时间:2023-10-16

我在使用K-means聚类算法时遇到了一些问题。输入文件看起来像:

4

60 60

23 45

25 11

30 11

在一个60x60的图像网格内,总共有4个簇。该算法似乎有效,但当它再次开始重新计算质心并更改标签时,网格中的标签慢慢开始仅变为1。在大约5次迭代之后,所有标签都变为1。据我所知,我已经一遍又一遍地查看了代码,我不明白为什么所有标签都只变成1。感谢您的帮助!

#include <iostream>
#include <fstream>
#include <cmath>
using namespace std;
class Node {
public:
    int x;
    int y;
    int label;
    double distace;
    Node* next;
    void printNode() {
        cout << "X:t" << x << endl;
        cout << "Y:t" << y << endl;
        cout << "Label:t" << label << endl;
    }
    Node(int i, int j) {
        x = i;
        y = j;
        next = nullptr;
    }
};
class LinkedList {
public:
    Node* head;
    int length;
    Node* scanner;
    LinkedList() {
        Node* n = new Node(-999, -999);
        head = n;
        scanner = head;
        length = 0;
    }
    void insert(Node* n) {
        if (!head->next) {
            head->next = n;
            length++;
            return;
        }
        n->next = head->next;
        head->next = n;
        length++;
    }
    void deleteNode(Node* n) {
        Node* prev = head;
        Node* current = head->next;
        while (current) {
            if (n->x == current->x && 
                n->y == current->y &&
                n->label == current->label) {
                    prev->next = current->next;
                    return;
                }
                prev = prev->next;
                current = current->next;
        }
        length--;
    }
    void printList() {
        Node* current = head->next;
        while (current) {
            current->printNode();
            cout << endl;
            current = current->next;
        }
    }
    Node* scan() {
        scanner = scanner->next;
        if (!scanner) return 0;
        return scanner;
    }
    void resetScanner() {
        scanner = head;
    }
    void changeLabelTo(int x, int y, int newLabel) {
        Node* current = head->next;
        while (current) {
            if (current->x == x &&
                current->y == y) {
                    current->label = newLabel;
                }
            current = current->next;
        }
    }
};
class KMeans {
public:
    struct xycoord {
      int x;
      int y;
    };
    int k;
    xycoord* kcentroids;
    LinkedList ll;
    int row;
    int col;
    int** image;
    int tracker;
    int getLabel() {
        int x = tracker++;
        if (tracker > k) { tracker = 1; }
        return x;
    }
    KMeans(int clusters, int r, int c) { 
        k = clusters;
        tracker = 1;
        kcentroids = new xycoord[k+1];
        row = r;
        col = c;
        image = new int*[row];
        for (int i = 0; i < row; i++)
            image[i] = new int[col];
    }
    ~KMeans() {
        delete [] kcentroids;
        for (int i = 0; i < row; i++)
            delete [] image[i];
        delete [] image;
        cout << "Called!" << endl;
    }
    void displayImage() {
        for (int i = 0; i < row; i++) {
            for (int j = 0; j < col; j++) {
                if (image[i][j] == 0) cout << " ";
                else { cout << image[i][j]; }
            }
            cout << endl;
        }
    }
    void imageOutput() {
        for (int i = 0; i < ll.length; i++) {
            Node* n = ll.scan();
            if (n) {
                image[n->x][n->y] = n->label;
            }
        }
        displayImage();
    }
    void computeCentroids() {
        for (int i = 1; i <= k; i++) {
            kcentroids[i].x = 0;
            kcentroids[i].y = 0;
        }
        int* count = new int[k+1];
        for (int i  = 0; i < ll.length; i++) {
            Node* n = ll.scan();
            if (n) {
                kcentroids[n->label].x += n->x;
                kcentroids[n->label].y += n->y;
                count[n->label]++;
            }
        }
        cout << endl;
        for (int i = 1; i <= k; i++) {
            kcentroids[i].x = kcentroids[i].x / count[i];
            kcentroids[i].y = kcentroids[i].y / count[i];
            /*
             * i - label
             * 6 - 4
             * 7 - 3
             * 8 - 2
             * 9 - 1
             */
            //image[kcentroids[i].x][kcentroids[i].y] = 10-i;
        }

        delete [] count;
    }
    void computeDistanceAndSetLabels() {
        for (int i = 0; i < ll.length; i++) {
            int minLabel = 0;
            double min = 99999.0;
            Node* n = ll.scan();
            for (int j = 1; j <= k; j++) {
                double m = 0.0;
                // distance formula
                m = sqrt(pow(n->x-kcentroids[j].x, 2) + pow(n->y-kcentroids[j].y,2));
                if (m < min) {
                    min = m;
                    minLabel = j;
                }
            }
            cout << i << " " << n->x << " " << n->y << " " << n->label << " ";
            n->label = minLabel;
            cout << minLabel << " " << n->label <<endl;
        }
    }
    void startClustering() {
        // more than 2 starts showing 1 take over all labels, 
        // this is to be changed to something better however
        for (int i = 0; i < 4; i++) {
            ll.resetScanner();
            imageOutput();
            ll.resetScanner();
            computeCentroids();
            ll.resetScanner();
            computeDistanceAndSetLabels();
            ll.resetScanner();
            imageOutput();
       }
    }
};
Node* createNode(int x, int y, int k) {
    Node* n = new Node(x, y);
    n->label = k;
    return n;
}
int main(int argc, char* argv[]) {
    if (argc < 4) {
        cout << "Please start program with: program in.txt out1.txt out2.txt" << endl;
        return -1;
    }
    ifstream in(argv[1]);
    if (!in) {
        cout << "File: " << argv[1] << " could not be read" << endl;
        return -2;
    }
    int k, rows, cols;
    in >> k;
    in >> rows;
    in >> cols;
    cout << k << " " << rows << " " << cols << endl;
    KMeans km(k, rows, cols);
    km.displayImage();
    LinkedList ll;
    int num;
    while (in >> num) {
        int num2;
        in >> num2;
        ll.insert(createNode(num, num2, km.getLabel()));
    }
    km.ll = ll;
    km.ll.printList();
    km.startClustering();
    km.displayImage();
    return 0;
}

您没有在computeCentroids()中将计数初始化为零,这会导致未定义的行为。将它们与质心一起归零:

int* count = new int[k+1];
for (int i = 1; i <= k; i++) {
    kcentroids[i].x = 0;
    kcentroids[i].y = 0;
    count[i] = 0;
}

当你像这样新建int数组时,你也可以将它们清零:

int* count = new int[k+1]();