Python:获取K-Means中心点最近的样本

Vanessa ·
更新时间:2024-11-13
· 888 次阅读

import numpy as np import pandas as pd from sklearn import datasets as DS import matplotlib.pyplot as plt def euclideanDist(A, B): return np.sqrt(sum((A - B) ** 2)) def RandomCenters(dataSet, k): n = dataSet.shape[0] centerIndex = np.random.choice(range(n), size=k, replace=False) centers = dataSet[centerIndex] return centers def KMeans(dataSet, k): Centers = RandomCenters(dataSet, k) n, m = dataSet.shape DistMatrix = np.zeros((n, 2)) #n*2的矩阵用于存储 类簇索引 centerChanged = True while centerChanged == True: centerChanged = False for i in range(n): minDist = np.inf minIndex = -1 for j in range(k): dist = euclideanDist(dataSet[i, :], Centers[j, :]) if dist < minDist: #获取每个样本聚类最近的聚类中心点及其聚类 minDist = dist minIndex = j if DistMatrix[i, 0] != minIndex: centerChanged = True DistMatrix[i, 0] = minIndex #存储的是索引 DistMatrix[i, 1] = minDist #存储的是距离 if centerChanged == True: # 如何聚类中心有变化,那么接下来就要更新聚类中心 for i in range(k): dataMean = dataSet[DistMatrix[:, 0] == i] # dataMean中是相同类簇的样本 Centers[i] = np.mean(dataMean, axis=0) return Centers, DistMatrix def PointSelection(DistMatrix,k,n): points = [] for i in range(k): minDist = np.inf closeIndex = -1 for j in range(n): if DistMatrix[j,0] == i: if DistMatrix[j,1] < minDist: minDist = DistMatrix[j,1] closeIndex = j points.append(closeIndex) return points if __name__ == "__main__": path = r"D:\dataset\clusterData\bolbs_1.csv" Data = np.array(pd.read_csv(path, header=None)) X = Data[:, :2] n = len(X) k = 2 Center, DistMat = KMeans(X, k) Points = PointSelection(DistMat,k,n) plt.scatter(X[:,0],X[:,1], c=DistMat[:,0] ) CP = X[Points] plt.scatter(CP[:,0],CP[:,1],marker="*",s=200) plt.show()
作者:DeniuHe



样本 k-means Python

需要 登录 后方可回复, 如果你还没有账号请 注册新账号