机器学习k近邻算法算出结果不对

输入代码

from numpy import *
import operator

def createDataSet():
    group = array([[ 1.0, 1.1],[ 1.0, 1.0],[0,0],[0, 0.1]])
    labels = ['A','A','B','B']
    return group, labels

def classify0(inX,dataSet,labels,k):
    dataSetSize = dataSet.shape[0]
    #❶(以下三行)距离计算
    diffMat = tile(inX,(dataSetSize,1)) - dataSet
    sqDiffMat = diffMat**2
    sqDistances=sqDiffMat.sum(axis=1)
    distances = sqDistances**0.5
    sortedDistIndicies=distances.argsort()
    classCount={}
    #❷(以下两行)选择距离最小的k个点
    for i in range(k):
        voteIlabel = labels[sortedDistIndicies[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel,0)+1
        #❸排序
        sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1), reverse=True)
        return sortedClassCount[0][0]

def file2matrix(filename):
    fr = open(filename)
    arrayOlines= fr.readlines()
    numberOfLines = len(arrayOlines) #❶ 得到文件行数
    returnMat = zeros((numberOfLines,3)) #❷ 创建返回的Numpy 矩阵 classLabelVector = []
    classLabelVector = []
    index = 0
    #❸ (以下三行)解析文件数据到列表
    for line in arrayOlines:
        line = line.strip()
        listFromLine = line.split('\t')
        returnMat[index,:] = listFromLine[0:3]
        classLabelVector.append(int(listFromLine[-1]))
        index += 1
    return returnMat,classLabelVector

错误执行结果

>>> datingDataMat
array([[4.092000e+04, 8.326976e+00, 9.539520e-01],
       [0.000000e+00, 0.000000e+00, 0.000000e+00],
       [0.000000e+00, 0.000000e+00, 0.000000e+00],
       ...,
       [0.000000e+00, 0.000000e+00, 0.000000e+00],
       [0.000000e+00, 0.000000e+00, 0.000000e+00],
       [0.000000e+00, 0.000000e+00, 0.000000e+00]])
>>> datingLabels[0:20]
[3]

正确的应该是

>>> datingDataMat 
array([[ 7. 29170000e+ 04, 7. 10627300e+ 00, 2. 23600000e- 01], 
[ 1. 42830000e+ 04, 2. 44186700e+ 00, 1. 90838000e- 01], 
[ 7. 34750000e+ 04, 8. 31018900e+ 00, 8. 52795000e- 01], 
..., 
[ 1. 24290000e+ 04, 4. 43233100e+ 00, 9. 24649000e- 01], 
[ 2. 52880000e+ 04, 1. 31899030e+ 01, 1. 05013800e+ 00], 
[ 4. 91800000e+ 03, 3. 01112400e+ 00, 1. 90663000e- 01]]) 
>>> datingLabels[ 0: 20] 
[3, 2, 1, 1, 1, 1, 3, 3, 1, 3, 1, 1, 2, 1, 1, 1, 1, 1, 2, 3]

请大佬看下哪里出错了

1个回答

Csdn user default icon
上传中...
上传图片
插入图片
抄袭、复制答案,以达到刷声望分或其他目的的行为,在CSDN问答是严格禁止的,一经发现立刻封号。是时候展现真正的技术了!
立即提问