结果显示迭代没问题,但是簇间距离是0那应该就是把所有的数据都算作一类了
My_kmeans<-function(data,k,max.iter=10){
rows<-nrow(data)
cols<-ncol(data)
within<-matrix(0,nrow=k,ncol=1)
between<-0
iter=0
indexMatrix<-matrix(0,nrow=rows,ncol=2)
centers<-matrix(0,nrow=k,ncol=cols)
randSeveralInteger<-as.vector(sample(1:rows,size=k))
for(i in 1:k){
indexMatrix[randSeveralInteger[i],1]<-i
centers[i,]<-data[randSeveralInteger[i],]
centers<-matrix(centers,k,cols)
}
changed=TRUE #changed
while(changed){
if(iter>=max.iter)
break
changed=FALSE
for(i in 1:rows){
initialDistance<-10000
previousCluster<-indexMatrix[i,1]
for(j in 1:k){
currentDistance<-(sum((data[i,]-centers[j,])^2))^0.5
if(currentDistance<=initialDistance)
initialDistance<-currentDistance
indexMatrix[i,1]<-j
indexMatrix[i,2]<-currentDistance
}
}
if(previousCluster!=indexMatrix[i,1])
changed=TRUE
}
for(m in 1:k){
clusterMatrix<-data[indexMatrix[,1]==m,]
clusterMatrix<-as.matrix(clusterMatrix)
if(nrow(clusterMatrix)>0){
centers[m,]<-colMeans(clusterMatrix)
}
else{
centers[m,]<-centers[m,]
}
iter=(iter+1)
}
ss<-function(x) sum(scale(x,scale=FALSE)^2)
between<-ss(centers[indexMatrix[,1],])
within<-sapply(split(as.data.frame(data),indexMatrix[,1]),ss)
twithin<-sum(within) #total within-clustersum of squares
result<-list(cluster=indexMatrix[,1],tot.withinss=twithin,betweenss=between,iteration=iter)
return(result)
}