槑醇谷
2019-06-21 15:23
采纳率: 0%
浏览 691

使用K均值算法对聚类的簇进行划分

因为代码能力不是很强,想实现西瓜书上的简单例子,但是实现最终的效果和最后的书上的不太一样,哪位大神能帮忙改一下,能让效果达到和书上的差不多;
这个是我自己写的代码(代码能力很差,写的有点不太好)

import copy
import random
from operator import itemgetter

import numpy as np
import matplotlib.pyplot as plt


def loadDataSet(fileName):
    dataMat = []
    fr = open(fileName)
    for line in fr.readlines():
        curLine = line.strip().split('\t')
        fltLine = map(float,curLine)
        dataMat.append(list(fltLine))
    return dataMat
def centroid(dataMat,k,cent):
    First=[]
    Second=[]
    Third=[]
    Fourth=[]
    Fiveth=[]
    T = dict()
    T[0]=[]
    T[1]=[]
    T[2]=[]
    T[3]=[]
    T[4]=[]
    Tm=[]
    for i in dataMat:
        V = []
        for j in cent:
            V.append(compute(i,j))
        min = V[0]
        m = 0
        for n in range(1,k):
            if V[n]<min:
                min=V[n]
                m = n #值变得大了
        if m==0:
            First.append(i)
            T[m]=First
        elif m==1:
            Second.append(i)
            T[m]=Second
        elif m==2:
            Third.append(i)
            T[m]=Third
        elif m==3:
            Fourth.append(i)
            T[3]=Fourth
        elif m==4:
            Fiveth.append(i)
            T[3]=Fiveth
        Tm.append(First)
        Tm.append(Second)
        Tm.append(Third)
        Tm.append(Fourth)
        Tm.append(Fiveth)
    l=[]
    max=0
    for h in range(1,3):
        if len(T[h])==0:
            l.append(h)
        else:
            if len(T[h])>len(T[max]):
                max = h
    if l!=[]:
        gh = T[max]
        n = len(T[max])
        for op in range(0,int(n/2)):
            Hu=[]
            Hu.append(gh[op])
        T[max]= Hu
        for opl in range(int(n/2),n):
            Hu=[]
            Hu.append(gh[opl])
        T[l[0]]= Hu
    return T,Tm
def updataCent(Tm,k):
    cent=[]
    first=0
    second=0
    for i in range(0,k):
        for j in range(0,len(T[i])):
            first += T[i][j][0]
            second+=T[i][j][1]
        if T[i]!=[]:
            cent.append([first/len(T[i]),second/len(T[i])])
        else:
            cent.append([0,0])
            cent.append(random.sample(T[i-1],1))
    return cent
def show(T,cent,n):
    for i in range(0,n):
        for j in range(0,len(T[i])):
            plt.plot(T[i][j][0],T[i][j][1],mark[i])
    mark = ['Dr', 'Db', 'Dg', 'Dk', '^b', '+b', 'sb', 'db', '<b', 'pb']
    for i in range(0,n):
        plt.plot(cent[i][0], cent[i][1], mark[i], markersize = 12)
    plt.show()
def compute(A,B):
    return (A[0]-B[0])**2+(A[1]-B[1])**2 
fileName ='F:/Yan/Particle-Swarm-Optimization-with-Python-master/Test/SA/K-means Data.txt'
dataMat=loadDataSet(fileName)
# print(centroid(dataMat,3,cent))
cent = random.sample(dataMat, 3)
M =dict()
M[(1,(2,3))]=2
for i in range(0,5):
    T,Tm = centroid(dataMat,3,cent)
    show(T, cent, 3)
    cent = updataCent(T,3)

书上的伪代码:
图片说明
图上的效果图:
图片说明
所需要的数据集:
https://pan.baidu.com/s/1VbjHfno1kewCFg9EdhJD5Q 提取码:7stx

  • 写回答
  • 关注问题
  • 收藏
  • 邀请回答

1条回答 默认 最新

相关推荐 更多相似问题