哈哈贞贞 2016-04-05 13:29 采纳率: 50%
浏览 1897

决策树ID3python编程遇到的问题

各位大神,小弟入门数据分析现在,python也就会个基础语法,以下是照着机器学习的代码敲的,数据集是自己编的,但是运行结果报错:unhashable type: 'list',希望大神可以给我讲解一下,谢谢啦

from math import log
import operator
def CreateDataset():
dataset=[[0,1,1,'high'],
[0,1,1,'high'],
[0,1,1,'high'],
[0,0,1,'high'],
[0,1,1,'high'],
[0,0,1,'high'],
[0,1,0,'high'],
[1,1,1,'high'],
[1,1,0,'high'],
[1,1,1,'high'],
[1,1,1,'high'],
[1,1,1,'high'],
[1,1,1,'high'],
[0,1,1,'high'],
[1,0,1,'high'],
[1,0,1,'high'],
[1,0,1,'high'],
[1,0,1,'high'],
[1,0,0,'high'],
[0,0,0,'high'],
[0,0,1,'low'],
[0,0,1,'low'],
[0,0,1,'low'],
[0,0,0,'low'],
[0,1,0,'low'],
[1,0,1,'low'],
[1,0,1,'low'],
[0,0,0,'low'],
[0,0,0,'low'],
[1,0,0,'low'],
[0,1,0,'low'],
[1,0,1,'low'],
[1,0,0,'low'],
[1,0,0,'low']]
labels=['weather','weekend','sales','volumes']
return dataset,labels
def calcShannonEnt(dataset):
numEntries=len(dataset)
labelCounts={}
for featVec in dataset:
currentLabel=featVec[-1]
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel]=0
labelCounts[currentLabel]+=1
shannonEnt=0.0
for key in labelCounts:
prob=float(labelCounts[key])/numEntries
shannonEnt-=prob *log(prob,2)
return shannonEnt

def splitDataSet(dataset,axis,value):
retDataSet=[]
for featVec in dataset:
if featVec[axis]==value:
reduceFeatVec=featVec[:axis]
reduceFeatVec.extend(featVec[axis+1:])
retDataSet.append(reduceFeatVec)
return retDataSet

def chooseBestFeatureToSplit(dataset):
numFeatures=len(dataset[0])-1
baseEntropy=calcShannonEnt(dataset)
bestInfoGain=0.0
bestFeature=-1
for i in range(numFeatures):
featList=[example[i] for example in dataset ]
uniqueVals=set(featList)
newEntropy=0.0
for value in uniqueVals:
subDataSet=splitDataSet(dataset,i,value)
prob=len(subDataSet)/float(len(dataset))
newEntropy +=prob * calcShannonEnt(subDataSet)
infoGain=baseEntropy-newEntropy
if(infoGain>bestInfoGain):
bestInfoGain=infoGain
bestFeature=i
return bestFeature

def majorityCnt(classList):
classCount={}
for vote in classList:
if vote not in classCount.keys():
classCount[vote]=0
classCount[vote]+=1
sortedClassCount=sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]

def createTree(dataset,labels):
classList=[example[-1] for example in dataset]
if classList.count(classList[0])==len(classList):
return classList
if len(dataset[0])==1:
return majorityCnt(dataset)
bestFeat=chooseBestFeatureToSplit(dataset)
bestFeatLabel=labels[bestFeat]
myTree={bestFeatLabel:{}}
del(labels[bestFeat])
featValues=[example[bestFeat] for example in dataset]
uniqueVals=set(featValues)
for value in uniqueVals:
subLabels=labels[:]
myTree[bestFeatLabel][value]=createTree(splitDataSet(dataset,bestFeat,value),subLabels)
return myTree

myDat,labels = CreateDataset()
print(calcShannonEnt(myDat))

print(splitDataSet(myDat, 1, 1))

print(chooseBestFeatureToSplit(myDat))

print(createTree(myDat, labels))

  • 写回答

1条回答

  • threenewbee 2016-04-05 19:49
    关注
    评论

报告相同问题?

悬赏问题

  • ¥15 执行 virtuoso 命令后,界面没有,cadence 启动不起来
  • ¥50 comfyui下连接animatediff节点生成视频质量非常差的原因
  • ¥20 有关区间dp的问题求解
  • ¥15 多电路系统共用电源的串扰问题
  • ¥15 slam rangenet++配置
  • ¥15 有没有研究水声通信方面的帮我改俩matlab代码
  • ¥15 ubuntu子系统密码忘记
  • ¥15 保护模式-系统加载-段寄存器
  • ¥15 电脑桌面设定一个区域禁止鼠标操作
  • ¥15 求NPF226060磁芯的详细资料