各位大神,小弟入门数据分析现在,python也就会个基础语法,以下是照着机器学习的代码敲的,数据集是自己编的,但是运行结果报错:unhashable type: 'list',希望大神可以给我讲解一下,谢谢啦
from math import log
import operator
def CreateDataset():
dataset=[[0,1,1,'high'],
[0,1,1,'high'],
[0,1,1,'high'],
[0,0,1,'high'],
[0,1,1,'high'],
[0,0,1,'high'],
[0,1,0,'high'],
[1,1,1,'high'],
[1,1,0,'high'],
[1,1,1,'high'],
[1,1,1,'high'],
[1,1,1,'high'],
[1,1,1,'high'],
[0,1,1,'high'],
[1,0,1,'high'],
[1,0,1,'high'],
[1,0,1,'high'],
[1,0,1,'high'],
[1,0,0,'high'],
[0,0,0,'high'],
[0,0,1,'low'],
[0,0,1,'low'],
[0,0,1,'low'],
[0,0,0,'low'],
[0,1,0,'low'],
[1,0,1,'low'],
[1,0,1,'low'],
[0,0,0,'low'],
[0,0,0,'low'],
[1,0,0,'low'],
[0,1,0,'low'],
[1,0,1,'low'],
[1,0,0,'low'],
[1,0,0,'low']]
labels=['weather','weekend','sales','volumes']
return dataset,labels
def calcShannonEnt(dataset):
numEntries=len(dataset)
labelCounts={}
for featVec in dataset:
currentLabel=featVec[-1]
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel]=0
labelCounts[currentLabel]+=1
shannonEnt=0.0
for key in labelCounts:
prob=float(labelCounts[key])/numEntries
shannonEnt-=prob *log(prob,2)
return shannonEnt
def splitDataSet(dataset,axis,value):
retDataSet=[]
for featVec in dataset:
if featVec[axis]==value:
reduceFeatVec=featVec[:axis]
reduceFeatVec.extend(featVec[axis+1:])
retDataSet.append(reduceFeatVec)
return retDataSet
def chooseBestFeatureToSplit(dataset):
numFeatures=len(dataset[0])-1
baseEntropy=calcShannonEnt(dataset)
bestInfoGain=0.0
bestFeature=-1
for i in range(numFeatures):
featList=[example[i] for example in dataset ]
uniqueVals=set(featList)
newEntropy=0.0
for value in uniqueVals:
subDataSet=splitDataSet(dataset,i,value)
prob=len(subDataSet)/float(len(dataset))
newEntropy +=prob * calcShannonEnt(subDataSet)
infoGain=baseEntropy-newEntropy
if(infoGain>bestInfoGain):
bestInfoGain=infoGain
bestFeature=i
return bestFeature
def majorityCnt(classList):
classCount={}
for vote in classList:
if vote not in classCount.keys():
classCount[vote]=0
classCount[vote]+=1
sortedClassCount=sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
def createTree(dataset,labels):
classList=[example[-1] for example in dataset]
if classList.count(classList[0])==len(classList):
return classList
if len(dataset[0])==1:
return majorityCnt(dataset)
bestFeat=chooseBestFeatureToSplit(dataset)
bestFeatLabel=labels[bestFeat]
myTree={bestFeatLabel:{}}
del(labels[bestFeat])
featValues=[example[bestFeat] for example in dataset]
uniqueVals=set(featValues)
for value in uniqueVals:
subLabels=labels[:]
myTree[bestFeatLabel][value]=createTree(splitDataSet(dataset,bestFeat,value),subLabels)
return myTree
myDat,labels = CreateDataset()
print(calcShannonEnt(myDat))
print(splitDataSet(myDat, 1, 1))
print(chooseBestFeatureToSplit(myDat))
print(createTree(myDat, labels))