本人python小白,在网上东拼西凑弄的代码
>>> def toInt(array):
array=mat(array)
m,n=shape(array)
newArray=zeros((m,n))
for i in range(m):
for j in range(n):
newArray[i,j]=int(array[i,j])
return newArray
>>> def nomalizing(array):
m,n=shape(array)
for i in range(m):
for j in range(n):
if array[i,j]!=0:
array[i,j]=1
return array
>>> import csv
>>> def loadTrainData():
l=[]
with open('fashion-mnist_train.csv') as file:
lines=csv.reader(file)
for line in lines:
l.append(line)
l.remove(l[0])
l=array(l)
label=l[:,0]
data=l[:,1:]
return nomalizing(toInt(data)),toInt(label)
>>> def loadTestData():
l=[]
with open('test_data.csv') as file:
lines=csv.reader(file)
for line in lines:
l.append(line)
l.remove(l[0])
l=array(l)
data=l[:,1:]
return nomalizing(toInt(data))
>>> def loadTestResult():
l=[]
with open('sample_submission.csv') as file:
lines=csv.reader(file)
for line in lines:
l.append(line)
l.remove(l[0])
label=array(l)
return toInt(label[:,1])
>>> def classify0(inX, dataSet, labels, k):
inX=mat(inX)
dataSet=mat(dataSet)
labels=mat(labels)
dataSetSize = dataSet.shape[0]
diffMat = tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = array(diffMat)**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[0,sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
>>> import KNN
>>> from numpy import *
>>> import operator
>>> def handwritingClassTest():
trainData,trainLabel=loadTrainData()
testData=loadTestData()
testLabel=loadTestResult()
m,n=shape(testData)
errorCount=0
resultList=[]
for i in range(m):
classifierResult = classify0(testData[i], trainData, trainLabel, 5)
resultList.append(classifierResult)
print ("the classifier came back with: %d, the real answer is: %d") % (classifierResult, testLabel[0,i])
if (classifierResult != testLabel[0,i]): errorCount += 1.0
print ("\nthe total number of errors is: %d") % errorCount
print ("\nthe total error rate is: %f") % (errorCount/float(m))
saveResult(resultList)
>>> handwritingClassTest()
运行了3个多小时,运行过程中的图片如下
结果如下
result.csv也在桌面上显示了,但是为0字节
求问各位大神,如何修改code呢?是不是再次运行还得3个小时呀、