在线时间:8:00-16:00
迪恩网络APP
随时随地掌握行业动态
扫描二维码
关注迪恩网络微信公众号
以下代码以最简单的二维向量演示了k-means的处理流程
有一堆二维向量<x1,y1>,<x2,y2>..<xn,yn>,聚成两个类
聚类中心就是类中所有向量的均值<xm,ym>, xm = (x1+x2+..+xn)/2, ym = (y1+y2+..+yn)/2
每次聚类都重新计算各个向量和聚类中心的距离(用欧氏距离),将各个向量分配到离各自最近的类
然后比较本次的聚类中心是否与上次相等,如果相等,即表示所有类已经平衡,聚类结束.
代码
using System;
using System.Collections.Generic; using System.Text; using System.Drawing; namespace Clustering.Demo { /// <summary> /// 利用点坐标(二维向量)来演示k-means算法 /// </summary> class Kmeans { /// <summary> /// 点的坐标数组(coordinates) /// </summary> private static readonly int[,] coords = new int[,] { { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, { 6, 6 }, { 7, 6 }, { 8, 6 }, { 6, 7 }, { 7, 7 }, { 8, 7 }, { 9, 7 }, { 7, 8 }, { 8, 8 }, { 9, 8 }, { 8, 9 }, { 9, 9 }, //{ 12, 12 }, { 12, 13 }, { 13, 14 }, { 14, 14 }, { 14, 15 }, { 15, 16 }, { 16, 14 }, { 17, 15 }, { 16, 18 }, { 16, 17 }, }; /// <summary> /// 聚类的数量 /// </summary> private static readonly int k = 2; /// <summary> /// /// </summary> public static void Cluster() { Point[] points = GetPoints(); //获取原始数据点 PointF[] means = new PointF[k]; //当前的聚类中心 PointF[] prevMeans = new PointF[k]; //之前的聚类中心 int[] pointAssigns = new int[points.Length]; //每个点所属的类 InitMeans(points, k, means); //随机选k个点作为聚类中心 PrintInit(points, k, means); //打印初始数据 int iter = 0; //迭代次数(iteration times) while (true) { Classify(points, k, means, pointAssigns); //将每个点与各个聚类中心对比进行归类划分 int conv = 0; //收敛类数(convergent clusters) for (int j = 0; j < k; j++) { means[j] = CalcMeans(points, pointAssigns, j); //重新计算聚类中心 if (Compare(means[j], prevMeans[j])) conv++; else prevMeans[j] = means[j]; } if (conv == k) //新旧聚类中心一样表示该聚类已收敛至平衡 { break; } iter++; PrintIter(points, k, means, pointAssigns, iter); //打印迭代数据 } } /// <summary> /// 初始化点数组(根据自定义坐标) /// </summary> private static Point[] GetPoints() { int len = coords.GetLength(0); Point[] points = new Point[len]; for (int i = 0; i < len; i++) { points[i] = new Point(coords[i, 0], coords[i, 1]); } return points; } /// <summary> /// 随机选k个点作为聚类中心 /// </summary> private static void InitMeans(Point[] points, int k, PointF[] means) { Random random = new Random(); for (int i = 0; i < k; i++) { means[i] = points[random.Next(points.Length)]; } } /// <summary> /// 将每个点与各个聚类中心对比进行归类划分 /// </summary> private static void Classify(Point[] points, int k, PointF[] means, int[] pointAssigns) { for (int i = 0; i < points.Length; i++) { double minDist = double.MaxValue; //最短距离 for (int j = 0; j < k; j++) { double dist = Distance(points[i], means[j]); if (dist < minDist) { minDist = dist; pointAssigns[i] = j; } } //Console.WriteLine("{0}归入类{1}", points[i], pointAssigns[i]); } } #region 欧氏距离(Euclidean distance) /// <summary> /// 计算欧氏距离(勾股定理的多维空间扩展) /// </summary> /// <remarks> /// 勾股定理: x^2 + y^2 = z^2 /// 欧氏距离: distance = sqrt(x1^2 + x2^2 + ... + xn^2) /// 欧氏距离的变换式: x1^2 + x2^2 + ... + xn^2 = distance^2 /// /// 二维空间的两点p1(x1,y1)和p(x2,y2),两点间距离dist=sqrt((x1-x2)^2+(y1-y2)^2) /// </remarks> private static double Distance(PointF p1, PointF p2) { double pow2X = Math.Pow(p1.X - p2.X, 2); double pow2Y = Math.Pow(p1.Y - p2.Y, 2); return Math.Sqrt(pow2X + pow2Y); } #endregion /// <summary> /// 计算新的聚类中心(均值) /// </summary> /// <remarks> /// meanX = (x1 + x2 + ... + xn) / n /// meanY = (y1 + y2 + ... + yn) / n /// </remarks> private static PointF CalcMeans(Point[] points, int[] pointAssigns, int j) { PointF mean = new PointF(); int n = 0; for (int i = 0; i < points.Length; i++) { if (pointAssigns[i] == j) { mean.X += points[i].X; mean.Y += points[i].Y; n++; } } mean.X /= (float)n; mean.Y /= (float)n; return mean; } /// <summary> /// 比较两个聚类中心是否相等 /// </summary> private static bool Compare(PointF a, PointF b) { if (((int)(a.X * 10) == (int)(b.X * 10)) && ((int)(a.Y * 10) == (int)(b.Y * 10))) { return true; } else { return false; } } #region 打印数据(print datas) /// <summary> /// 打印初始数据 /// </summary> private static void PrintInit(Point[] points, int k, PointF[] means) { Console.WriteLine("总共{0}个样本:", points.Length); for (int i = 0; i < points.Length; i++) { Console.WriteLine("{0}, {1}", points[i].X, points[i].Y); } Console.WriteLine("\n初始化时随机选取k个样本作为聚类中心:"); for (int i = 0; i < k; i++) { Console.WriteLine("聚类{0}的中心: {1}, {2}", i, means[i].X, means[i].Y); } } /// <summary> /// 打印迭代数据 /// </summary> private static void PrintIter(Point[] points, int k, PointF[] means, int[] pointAssigns, int iter) { Console.WriteLine("\n\n--------第{0}次迭代的结果--------", iter); for (int j = 0; j < k; j++) { Console.WriteLine("\n第{0}个类的成员:", j); for (int i = 0; i < points.Length; i++) { if (pointAssigns[i] == j) { Console.WriteLine("{0}, {1}", points[i].X, points[i].Y); } } Console.WriteLine("均值(聚类中心): {0}, {1}", means[j].X, means[j].Y); } } #endregion } }
|
请发表评论