因为代码能力不是很强,想实现西瓜书上的简单例子,但是实现最终的效果和最后的书上的不太一样,哪位大神能帮忙改一下,能让效果达到和书上的差不多;
这个是我自己写的代码(代码能力很差,写的有点不太好)
import copy
import random
from operator import itemgetter
import numpy as np
import matplotlib.pyplot as plt
def loadDataSet(fileName):
dataMat = []
fr = open(fileName)
for line in fr.readlines():
curLine = line.strip().split('\t')
fltLine = map(float,curLine)
dataMat.append(list(fltLine))
return dataMat
def centroid(dataMat,k,cent):
First=[]
Second=[]
Third=[]
Fourth=[]
Fiveth=[]
T = dict()
T[0]=[]
T[1]=[]
T[2]=[]
T[3]=[]
T[4]=[]
Tm=[]
for i in dataMat:
V = []
for j in cent:
V.append(compute(i,j))
min = V[0]
m = 0
for n in range(1,k):
if V[n]<min:
min=V[n]
m = n #值变得大了
if m==0:
First.append(i)
T[m]=First
elif m==1:
Second.append(i)
T[m]=Second
elif m==2:
Third.append(i)
T[m]=Third
elif m==3:
Fourth.append(i)
T[3]=Fourth
elif m==4:
Fiveth.append(i)
T[3]=Fiveth
Tm.append(First)
Tm.append(Second)
Tm.append(Third)
Tm.append(Fourth)
Tm.append(Fiveth)
l=[]
max=0
for h in range(1,3):
if len(T[h])==0:
l.append(h)
else:
if len(T[h])>len(T[max]):
max = h
if l!=[]:
gh = T[max]
n = len(T[max])
for op in range(0,int(n/2)):
Hu=[]
Hu.append(gh[op])
T[max]= Hu
for opl in range(int(n/2),n):
Hu=[]
Hu.append(gh[opl])
T[l[0]]= Hu
return T,Tm
def updataCent(Tm,k):
cent=[]
first=0
second=0
for i in range(0,k):
for j in range(0,len(T[i])):
first += T[i][j][0]
second+=T[i][j][1]
if T[i]!=[]:
cent.append([first/len(T[i]),second/len(T[i])])
else:
cent.append([0,0])
cent.append(random.sample(T[i-1],1))
return cent
def show(T,cent,n):
for i in range(0,n):
for j in range(0,len(T[i])):
plt.plot(T[i][j][0],T[i][j][1],mark[i])
mark = ['Dr', 'Db', 'Dg', 'Dk', '^b', '+b', 'sb', 'db', '<b', 'pb']
for i in range(0,n):
plt.plot(cent[i][0], cent[i][1], mark[i], markersize = 12)
plt.show()
def compute(A,B):
return (A[0]-B[0])**2+(A[1]-B[1])**2
fileName ='F:/Yan/Particle-Swarm-Optimization-with-Python-master/Test/SA/K-means Data.txt'
dataMat=loadDataSet(fileName)
# print(centroid(dataMat,3,cent))
cent = random.sample(dataMat, 3)
M =dict()
M[(1,(2,3))]=2
for i in range(0,5):
T,Tm = centroid(dataMat,3,cent)
show(T, cent, 3)
cent = updataCent(T,3)
书上的伪代码:
图上的效果图:
所需要的数据集:
https://pan.baidu.com/s/1VbjHfno1kewCFg9EdhJD5Q 提取码:7stx