对着《python机器学习经典实例》打代码,发现会报错bad input shape ()。感觉问题出在最后一行,菜鸟想问下该怎么解决?非常感谢
数据如下:
med,low,5more,more,med,med,good
med,low,5more,more,med,high,vgood
med,low,5more,more,big,low,unacc
med,low,5more,more,big,med,good
med,low,5more,more,big,high,vgood
low,vhigh,2,2,small,low,unacc
low,vhigh,2,2,small,med,unacc
low,vhigh,2,2,small,high,unacc
...
import numpy as np
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
#读取数据
input_path=u'/Users/zhangbei//Desktop/数据挖掘/机器学习/Python-Machine-Learning-Cookbook-master/Chapter02/car.data.txt'
fo=open(input_path)
lines=fo.readlines()
x=[]
for line in lines:
line=line.strip()
x.append(line.split(','))
x=np.array(x)
#把字符串特征转换为数值
encoder=[]
x_encoded=np.empty(x.shape)
for i,item in enumerate(x[0]):
encoder.append(preprocessing.LabelEncoder())
x_encoded[:,i]=encoder[-1].fit_transform(x[:,i])
x_encoded.astype(int)
x=x_encoded[:,:-1]
y=x_encoded[:,-1]
#转换测试数据
input_data=np.array(['vhigh','vhight','2','2','small','low'])
data_encoded=[-1]*len(input_data)
print data_encoded
for i,item in enumerate(input_data):
data_encoded[i]=int(encoder[i].transform((input_data[i])))