《python数据挖掘从入门到实践第二版》,第一张OneR算法。这张定义了两个函数,问题是两个函数,包括后面一张的测试函数都没有定义ytrue这个变量。请问有先行者遇到过这个问题吗?
from sklearn.datasets import load_iris
from collections import defaultdict as dd
from operator import itemgetter as it
dataset=load_iris()
x=dataset.data
y=dataset.target
attribute_means=x.mean(axis=0)
x_d=np.array(x>=attribute_means,dtype='int')
def train_feature_vaule(x,ytrue,feature,value):
class_count=dd(int)
for sample,y in zip(x,ytrue):
if sample[feature]==value:
class_count[y]+=1
sorted_class_count=sorted(class_count.items(),key=it(1),reverse=True)
most_frequent_class=sorted_class_count[0][0]
nsample=x.shape[1]
error=sum([class_count for class_vaule,class_count in class_count.items() if class_vaule!=most_frequent_class])
return most_frequent_class,error
def train(x,ytrue,feature):
nsamples,nfeatures=x.shape
assert 0<=feature<=nfeatures
values=set(x[:,feature])
predicitors=dict()
errors=[]
for current_value in values:
most_frequent_class,error =train_feature_vaule(x,ytrue,feature,current_value)
predicitors[current_value]=most_frequent_class
errors.append(error)
total_error=sum(errors)
return predicitors,total_error