如图所示,假设平面上有8个点,坐标为A1=(2,10), A2=(2,5), A3=(8,4), A4=(5,8),A5=(7,5), A6=(6,4), A7=(1,2), A8=(4,9)。假设初始以A1和A4为两个簇的中心, 请使用K-均值聚类方法,计算并画出各点最终的聚类归属,各自新的簇中心坐标,也在图中 画出。
1条回答 默认 最新
kaili_ya 2021-05-11 14:24关注from numpy import * import matplotlib.pyplot as plt import operator INF = 9999999.0 def distEclud(vecA, vecB): # 计算欧式距离 return sqrt(sum(power(vecA - vecB, 2))) def kMeans(dataSet, k, distMeans=distEclud): """ 输入:数据集, 聚类个数, 距离计算函数, 生成随机质心函数 输出:质心矩阵, 簇分配和距离矩阵 """ m = shape(dataSet)[0] clusterAssment = mat(zeros((m, 2))) centroids = mat([[2,10],[5,8]] ) clusterChanged = True while clusterChanged: clusterChanged = False for i in range(m): # 寻找最近的质心 minDist = INF minIndex = -1 for j in range(k): distJI = distMeans(centroids[j, :], dataSet[i, :]) if distJI < minDist: minDist = distJI minIndex = j if clusterAssment[i, 0] != minIndex: clusterChanged = True clusterAssment[i, :] = minIndex, minDist**2 for cent in range(k): # 更新质心的位置 ptsInClust = dataSet[nonzero(clusterAssment[:, 0].A == cent)[0]] centroids[cent, :] = mean(ptsInClust, axis=0) return centroids, clusterAssment def plotFeature(dataSet, centroids, clusterAssment): m = shape(centroids)[0] fig = plt.figure() scatterMarkers = ['s', 'o'] scatterColors = ['black', 'red'] ax = fig.add_subplot(111) for i in range(m): ptsInCurCluster = dataSet[nonzero(clusterAssment[:, 0].A == i)[0], :] markerStyle = scatterMarkers[i % len(scatterMarkers)] colorSytle = scatterColors[i % len(scatterColors)] ax.scatter(ptsInCurCluster[:, 0].flatten().A[0], ptsInCurCluster[:, 1].flatten().A[0], marker=markerStyle, c=colorSytle, s=90) ax.scatter(centroids[:, 0].flatten().A[0], centroids[:, 1].flatten().A[0], marker='+', c='red', s=300) if __name__ == '__main__': dataSet = mat([[2, 10], [2, 5],[8, 4],[5, 8], [7, 5],[6, 4],[1, 2],[4, 9]]) resultCentroids, clustAssing = kMeans(dataSet, 2) print('*******************') print(resultCentroids) print('*******************') plotFeature(dataSet, resultCentroids, clustAssing) plt.show()解决 无用评论 打赏 举报