lucyzhang9410 2017-10-11 07:00
浏览 1145

kaggle fashion-mnist.csv python的问题

本人python小白,在网上东拼西凑弄的代码

 >>> def toInt(array):
    array=mat(array)
    m,n=shape(array)
    newArray=zeros((m,n))
    for i in range(m):
        for j in range(n):
            newArray[i,j]=int(array[i,j])
    return newArray

>>> def nomalizing(array):
    m,n=shape(array)
    for i in range(m):
        for j in range(n):
            if array[i,j]!=0:
                array[i,j]=1
    return array

>>> import csv
>>> def loadTrainData():
    l=[]
    with open('fashion-mnist_train.csv') as file:
        lines=csv.reader(file)
        for line in lines:
            l.append(line)
    l.remove(l[0])
    l=array(l)
    label=l[:,0]
    data=l[:,1:]
    return nomalizing(toInt(data)),toInt(label)

>>> def loadTestData():
    l=[]
    with open('test_data.csv') as file:
        lines=csv.reader(file)
        for line in lines:
            l.append(line)
    l.remove(l[0])
    l=array(l)
    data=l[:,1:]
    return nomalizing(toInt(data))

>>> def loadTestResult():
    l=[]
    with open('sample_submission.csv') as file:
        lines=csv.reader(file)
        for line in lines:
            l.append(line)
    l.remove(l[0])
    label=array(l)
    return toInt(label[:,1])
    >>> def classify0(inX, dataSet, labels, k):
    inX=mat(inX)
    dataSet=mat(dataSet)
    labels=mat(labels)
    dataSetSize = dataSet.shape[0]
    diffMat = tile(inX, (dataSetSize,1)) - dataSet
    sqDiffMat = array(diffMat)**2
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances**0.5
    sortedDistIndicies = distances.argsort()
    classCount={}
    for i in range(k):
        voteIlabel = labels[0,sortedDistIndicies[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]
    >>> import KNN
>>> from numpy import *
>>> import operator
>>> def handwritingClassTest():
    trainData,trainLabel=loadTrainData()
    testData=loadTestData()
    testLabel=loadTestResult()
    m,n=shape(testData)
    errorCount=0
    resultList=[]
    for i in range(m):
        classifierResult = classify0(testData[i], trainData, trainLabel, 5)
        resultList.append(classifierResult)
        print ("the classifier came back with: %d, the real answer is: %d") % (classifierResult, testLabel[0,i])
        if (classifierResult != testLabel[0,i]): errorCount += 1.0
    print ("\nthe total number of errors is: %d") % errorCount
    print ("\nthe total error rate is: %f") % (errorCount/float(m))
    saveResult(resultList)
>>> handwritingClassTest()

运行了3个多小时,运行过程中的图片如下图片说明

结果如下
图片说明

result.csv也在桌面上显示了,但是为0字节

求问各位大神,如何修改code呢?是不是再次运行还得3个小时呀、

  • 写回答

0条回答 默认 最新

    报告相同问题?

    悬赏问题

    • ¥15 安卓adb backup备份应用数据失败
    • ¥15 eclipse运行项目时遇到的问题
    • ¥15 关于#c##的问题:最近需要用CAT工具Trados进行一些开发
    • ¥15 南大pa1 小游戏没有界面,并且报了如下错误,尝试过换显卡驱动,但是好像不行
    • ¥15 没有证书,nginx怎么反向代理到只能接受https的公网网站
    • ¥50 成都蓉城足球俱乐部小程序抢票
    • ¥15 yolov7训练自己的数据集
    • ¥15 esp8266与51单片机连接问题(标签-单片机|关键词-串口)(相关搜索:51单片机|单片机|测试代码)
    • ¥15 电力市场出清matlab yalmip kkt 双层优化问题
    • ¥30 ros小车路径规划实现不了,如何解决?(操作系统-ubuntu)