输入代码
from numpy import *
import operator
def createDataSet():
group = array([[ 1.0, 1.1],[ 1.0, 1.0],[0,0],[0, 0.1]])
labels = ['A','A','B','B']
return group, labels
def classify0(inX,dataSet,labels,k):
dataSetSize = dataSet.shape[0]
#❶(以下三行)距离计算
diffMat = tile(inX,(dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances=sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies=distances.argsort()
classCount={}
#❷(以下两行)选择距离最小的k个点
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0)+1
#❸排序
sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
def file2matrix(filename):
fr = open(filename)
arrayOlines= fr.readlines()
numberOfLines = len(arrayOlines) #❶ 得到文件行数
returnMat = zeros((numberOfLines,3)) #❷ 创建返回的Numpy 矩阵 classLabelVector = []
classLabelVector = []
index = 0
#❸ (以下三行)解析文件数据到列表
for line in arrayOlines:
line = line.strip()
listFromLine = line.split('\t')
returnMat[index,:] = listFromLine[0:3]
classLabelVector.append(int(listFromLine[-1]))
index += 1
return returnMat,classLabelVector
错误执行结果
>>> datingDataMat
array([[4.092000e+04, 8.326976e+00, 9.539520e-01],
[0.000000e+00, 0.000000e+00, 0.000000e+00],
[0.000000e+00, 0.000000e+00, 0.000000e+00],
...,
[0.000000e+00, 0.000000e+00, 0.000000e+00],
[0.000000e+00, 0.000000e+00, 0.000000e+00],
[0.000000e+00, 0.000000e+00, 0.000000e+00]])
>>> datingLabels[0:20]
[3]
正确的应该是
>>> datingDataMat
array([[ 7. 29170000e+ 04, 7. 10627300e+ 00, 2. 23600000e- 01],
[ 1. 42830000e+ 04, 2. 44186700e+ 00, 1. 90838000e- 01],
[ 7. 34750000e+ 04, 8. 31018900e+ 00, 8. 52795000e- 01],
...,
[ 1. 24290000e+ 04, 4. 43233100e+ 00, 9. 24649000e- 01],
[ 2. 52880000e+ 04, 1. 31899030e+ 01, 1. 05013800e+ 00],
[ 4. 91800000e+ 03, 3. 01112400e+ 00, 1. 90663000e- 01]])
>>> datingLabels[ 0: 20]
[3, 2, 1, 1, 1, 1, 3, 3, 1, 3, 1, 1, 2, 1, 1, 1, 1, 1, 2, 3]
请大佬看下哪里出错了