rururur 2015-11-19 06:26 采纳率: 0%
浏览 3996

python 回归树问题,报错求解决

错误提示:
Traceback (most recent call last):

File "", line 1, in
runfile('F:/desktop/新建文件夹 (2)/书/machinelearninginaction/Ch09/regTrees.py', wdir='F:/desktop/新建文件夹 (2)/书/machinelearninginaction/Ch09')

File "C:\Users\shiying\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 685, in runfile
execfile(filename, namespace)

File "C:\Users\shiying\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 71, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)

File "F:/desktop/新建文件夹 (2)/书/machinelearninginaction/Ch09/regTrees.py", line 140, in

File "F:/desktop/新建文件夹 (2)/书/machinelearninginaction/Ch09/regTrees.py", line 76, in createTree

File "F:/desktop/新建文件夹 (2)/书/machinelearninginaction/Ch09/regTrees.py", line 58, in chooseBestSplit

TypeError: unhashable type: 'matrix'

  • 写回答

2条回答 默认 最新

  • rururur 2015-11-19 06:30
    关注

    源码如下:

    from numpy import *

    def loadDataSet(fileName): #general function to parse tab -delimited floats
    dataMat = [] #assume last column is target value
    fr = open(fileName)
    for line in fr.readlines():
    curLine = line.strip().split('\t')
    fltLine = map(float,curLine) #map all elements to float()
    dataMat.append(fltLine)
    return dataMat

    def binSplitDataSet(dataSet, feature, value):
    mat0 = dataSet[nonzero(dataSet[:,feature] > value)[0],:][0]
    mat1 = dataSet[nonzero(dataSet[:,feature] <= value)[0],:][0]
    return mat0,mat1

    def regLeaf(dataSet):#returns the value used for each leaf
    return mean(dataSet[:,-1])

    def regErr(dataSet):
    return var(dataSet[:,-1]) * shape(dataSet)[0]

    def linearSolve(dataSet): #helper function used in two places
    m,n = shape(dataSet)
    X = mat(ones((m,n))); Y = mat(ones((m,1)))#create a copy of data with 1 in 0th postion
    X[:,1:n] = dataSet[:,0:n-1]; Y = dataSet[:,-1]#and strip out Y
    xTx = X.T*X
    if linalg.det(xTx) == 0.0:
    raise NameError('This matrix is singular, cannot do inverse,\n\
    try increasing the second value of ops')
    ws = xTx.I * (X.T * Y)
    return ws,X,Y

    def modelLeaf(dataSet):#create linear model and return coeficients
    ws,X,Y = linearSolve(dataSet)
    return ws

    def modelErr(dataSet):
    ws,X,Y = linearSolve(dataSet)
    yHat = X * ws
    return sum(power(Y - yHat,2))

    def chooseBestSplit(dataSet, leafType=regLeaf, errType=regErr, ops=(1,4)):
    tolS = ops[0]; tolN = ops[1]
    #if all the target variables are the same value: quit and return value
    if len(set(dataSet[:,-1].T.tolist()[0])) == 1: #exit cond 1
    return None, leafType(dataSet)
    m,n = shape(dataSet)
    #the choice of the best feature is driven by Reduction in RSS error from mean
    S = errType(dataSet)
    bestS = inf; bestIndex = 0; bestValue = 0
    for featIndex in range(n-1):
    for splitVal in set(dataSet[:,featIndex]):
    mat0, mat1 = binSplitDataSet(dataSet, featIndex, splitVal)
    if (shape(mat0)[0] < tolN) or (shape(mat1)[0] < tolN): continue
    newS = errType(mat0) + errType(mat1)
    if newS < bestS:
    bestIndex = featIndex
    bestValue = splitVal
    bestS = newS
    #if the decrease (S-bestS) is less than a threshold don't do the split
    if (S - bestS) < tolS:
    return None, leafType(dataSet) #exit cond 2
    mat0, mat1 = binSplitDataSet(dataSet, bestIndex, bestValue)
    if (shape(mat0)[0] < tolN) or (shape(mat1)[0] < tolN): #exit cond 3
    return None, leafType(dataSet)
    return bestIndex,bestValue#returns the best feature to split on
    #and the value used for that split

    def createTree(dataSet, leafType=regLeaf, errType=regErr, ops=(1,4)):#assume dataSet is NumPy Mat so we can array filtering
    feat, val = chooseBestSplit(dataSet, leafType, errType, ops)#choose the best split
    if feat == None: return val #if the splitting hit a stop condition return val
    retTree = {}
    retTree['spInd'] = feat
    retTree['spVal'] = val
    lSet, rSet = binSplitDataSet(dataSet, feat, val)
    retTree['left'] = createTree(lSet, leafType, errType, ops)
    retTree['right'] = createTree(rSet, leafType, errType, ops)
    return retTree

    def isTree(obj):
    return (type(obj).__name__=='dict')

    def getMean(tree):
    if isTree(tree['right']): tree['right'] = getMean(tree['right'])
    if isTree(tree['left']): tree['left'] = getMean(tree['left'])
    return (tree['left']+tree['right'])/2.0

    def prune(tree, testData):
    if shape(testData)[0] == 0: return getMean(tree) #if we have no test data collapse the tree
    if (isTree(tree['right']) or isTree(tree['left'])):#if the branches are not trees try to prune them
    lSet, rSet = binSplitDataSet(testData, tree['spInd'], tree['spVal'])
    if isTree(tree['left']): tree['left'] = prune(tree['left'], lSet)
    if isTree(tree['right']): tree['right'] = prune(tree['right'], rSet)
    #if they are now both leafs, see if we can merge them
    if not isTree(tree['left']) and not isTree(tree['right']):
    lSet, rSet = binSplitDataSet(testData, tree['spInd'], tree['spVal'])
    errorNoMerge = sum(power(lSet[:,-1] - tree['left'],2)) +\
    sum(power(rSet[:,-1] - tree['right'],2))
    treeMean = (tree['left']+tree['right'])/2.0
    errorMerge = sum(power(testData[:,-1] - treeMean,2))
    if errorMerge < errorNoMerge:
    print "merging"
    return treeMean
    else: return tree
    else: return tree

    def regTreeEval(model, inDat):
    return float(model)

    def modelTreeEval(model, inDat):
    n = shape(inDat)[1]
    X = mat(ones((1,n+1)))
    X[:,1:n+1]=inDat
    return float(X*model)

    def treeForeCast(tree, inData, modelEval=regTreeEval):
    if not isTree(tree): return modelEval(tree, inData)
    if inData[tree['spInd']] > tree['spVal']:
    if isTree(tree['left']): return treeForeCast(tree['left'], inData, modelEval)
    else: return modelEval(tree['left'], inData)
    else:
    if isTree(tree['right']): return treeForeCast(tree['right'], inData, modelEval)
    else: return modelEval(tree['right'], inData)

    def createForeCast(tree, testData, modelEval=regTreeEval):
    m=len(testData)
    yHat = mat(zeros((m,1)))
    for i in range(m):
    yHat[i,0] = treeForeCast(tree, mat(testData[i]), modelEval)
    return yHat

    trainMat = mat(loadDataSet('bikeSpeedVsIq_train.txt'))
    testMat = mat(loadDataSet('bikeSpeedVsIq_test.txt'))
    myregTree=createTree(trainMat, ops=(1,20))
    mymodTree=createTree(trainMat, modelLeaf, modelErr, (1,20))
    yregHat=createForeCast(myregTree, testMat[:,0])
    ymodHat=createForeCast(mymodTree, testMat[:,0], modelTreeEval)
    regCo = corrcoef(yregHat, testMat[:,1], rowvar=0)[0,1]
    modCo = corrcoef(ymodHat, testMat[:,1], rowvar=0)[0,1]
    print "reg", regCo
    1. print "model", modCo

    评论

报告相同问题?

悬赏问题

  • ¥15 seatunnel-web使用SQL组件时候后台报错,无法找到表格
  • ¥15 fpga自动售货机数码管(相关搜索:数字时钟)
  • ¥15 用前端向数据库插入数据,通过debug发现数据能走到后端,但是放行之后就会提示错误
  • ¥30 3天&7天&&15天&销量如何统计同一行
  • ¥30 帮我写一段可以读取LD2450数据并计算距离的Arduino代码
  • ¥15 飞机曲面部件如机翼,壁板等具体的孔位模型
  • ¥15 vs2019中数据导出问题
  • ¥20 云服务Linux系统TCP-MSS值修改?
  • ¥20 关于#单片机#的问题:项目:使用模拟iic与ov2640通讯环境:F407问题:读取的ID号总是0xff,自己调了调发现在读从机数据时,SDA线上并未有信号变化(语言-c语言)
  • ¥20 怎么在stm32门禁成品上增加查询记录功能