K_Means算法的简单实现C++
1.K_Means算法核心思想:(计算,递归) (1) 从 n个数据对象任意选择 k 个对象作为初始聚类中心; (2) 根据每个聚类对象的均值(中心对象),计算每个对象与这些中心对象的距离;并根据最小距离重新对相应对象进行划分; (3) 重新计算每个(有变化)聚类的均值(中心对象) (4) 循环(2)到(3)直到每个聚类不再发生变化为止 2.简单实列: 问题:假设数据挖掘的任务是将8个点聚类成3个类别,A1(2,10),A2(2,5),A3(8,4),B1(5,8),B2(7,B3(6,C1(1,2),C2(3,7),C3(4,9),距离函数是Euclidean距离。 假设初始选择A1,B1,C1分别作为每个聚类的中心用 K-means来给出, 代码(C++实现): 第一次循环执行后的三个聚类中心是多少?最后的三个中心是多少? (题目来自某位我喜欢的AI领域老师的课后习题。) #include #include #include #include using namespace std; enum center_flag { set_1, set_2, set_3 };//集合标志 struct Point { float x{0}; float y{0}; center_flag flag{ set_1 }; }Point_A1,Point_A2,Point_A3, Point_B1,Point_B2,Point_B3, Point_C1,Point_C2,Point_C3; Point cur_cluster_1{ 2,10,set_1 }, cur_cluster_2{ 5,8,set_2 }, cur_cluster_3{ 1,2,set_3 },//三个中心点 new_cluster_1{0,set_1}, new_cluster_2{ 0, new_cluster_3{ 0,set_3 }; vector vector vector void Init_Point(){ Point_A1.x = 2; Point_A1.y = 10; Point_A2.x = 2; Point_A2.y = 5; Point_A3.x = 8; Point_A3.y = 4; Point_B1.x = 5; Point_B1.y = 8; Point_B2.x = 7; Point_B2.y = 5; Point_B3.x = 6; Point_B3.y = 4; Point_C1.x = 1; Point_C1.y = 2; Point_C2.x = 3; Point_C2.y = 7; Point_C3.x = 4; Point_C3.y = 9; } float Get_Euclidean_Distance(Point A,Point B){ float d = sqrt(pow((A.x - B.x),2) + pow((A.y - B.y),2)); return d; } //当前点获得归属集 center_flag Get_flag(Point curPoint,Point cluster1,Point cluster2,Point cluster3) { float a=0,b=0,c=0; a = Get_Euclidean_Distance(curPoint,cluster1); b = Get_Euclidean_Distance(curPoint,cluster2); c = Get_Euclidean_Distance(curPoint,cluster3); if (a <= b && a <= c) return set_1; else if (b <= c && b <= a) return set_2; else return set_3; } //求新的中心点 Point Get_new_cluaster(vector float new_cluaster_x = 0; float new_cluaster_y = 0; int n = 0; center_flag new_cluaster_flag = set_1; Point point_temp; Point cur_cluaster; for (auto i = point_set.begin(); i != point_set.end(); i++) { point_temp =*i; new_cluaster_x += point_temp.x; new_cluaster_y += point_temp.y; } n = point_set.size(); new_cluaster_x = new_cluaster_x/n ; new_cluaster_y = new_cluaster_y/ n; new_cluaster_flag = point_temp.flag; cur_cluaster.x = new_cluaster_x; cur_cluaster.y = new_cluaster_y; cur_cluaster.flag = new_cluaster_flag; return cur_cluaster; } bool center_NotChange() { if(cur_cluster_1.x==new_cluster_1.x&& cur_cluster_1.y == new_cluster_1.y&& cur_cluster_2.x == new_cluster_2.x&& cur_cluster_2.y == new_cluster_2.y&& cur_cluster_3.x == new_cluster_3.x&& cur_cluster_3.y == new_cluster_3.y) return true; else { cur_cluster_1.x = new_cluster_1.x; cur_cluster_1.y = new_cluster_1.y; cur_cluster_2.x = new_cluster_2.x; cur_cluster_2.y = new_cluster_2.y; cur_cluster_3.x = new_cluster_3.x; cur_cluster_3.y = new_cluster_3.y; return false; } } int main() { int m = 0; Init_Point(); const vector Point_A1,Point_C3 }; while (1) { for (auto point : Allpoint) { point.flag = Get_flag(point,cur_cluster_1,cur_cluster_2,cur_cluster_3); switch (point.flag) { case set_1: {point_set_1.push_back(point); break; } case set_2: {point_set_2.push_back(point); break; } case set_3: {point_set_3.push_back(point); break; } default: string errStr = "there are something wrong with the point"; //cout << errStr << endl; break; } } point_set_1.push_back(cur_cluster_1); point_set_2.push_back(cur_cluster_2); point_set_3.push_back(cur_cluster_3); //重新计算中心/ m++; new_cluster_1 = Get_new_cluaster(point_set_1); new_cluster_2 = Get_new_cluaster(point_set_2); new_cluster_3 = Get_new_cluaster(point_set_3); //清空集合容器 point_set_1.clear(); point_set_2.clear(); point_set_3.clear(); if (center_NotChange()==true) { cout << "最终中心点"< cout << "中心点1: " << new_cluster_1.x << "," << new_cluster_1.y << endl; cout << "中心点2: " << new_cluster_2.x << "," << new_cluster_2.y << endl; cout << "中心点3: " << new_cluster_3.x << "," << new_cluster_3.y << endl; break; } else { cout << "第" << m << "次计算后的中心点" << endl; cout << "中心点1: " << new_cluster_1.x << "," << new_cluster_3.y << endl; //数组清零 } //输出中心点 cin >> m; return 0; } 输出效果: (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |