数据集
编号,色泽,根蒂,敲声,纹理,脐部,触感,密度,含糖率,好瓜
1,青绿,蜷缩,浊响,清晰,凹陷,硬滑,0.697,0.460,1
2,乌黑,蜷缩,沉闷,清晰,凹陷,硬滑,0.774,0.376,1
3,乌黑,蜷缩,浊响,清晰,凹陷,硬滑,0.634,0.264,1
4,青绿,蜷缩,沉闷,清晰,凹陷,硬滑,0.608,0.318,1
5,浅白,蜷缩,浊响,清晰,凹陷,硬滑,0.556,0.215,1
6,青绿,稍蜷,浊响,清晰,稍凹,软粘,0.403,0.237,1
7,乌黑,稍蜷,浊响,稍糊,稍凹,软粘,0.481,0.149,1
8,乌黑,稍蜷,浊响,清晰,稍凹,硬滑,0.437,0.211,1
9,乌黑,稍蜷,沉闷,稍糊,稍凹,硬滑,0.666,0.091,否
10,青绿,硬挺,清脆,清晰,平坦,软粘,0.243,0.267,否
11,浅白,硬挺,清脆,模糊,平坦,硬滑,0.245,0.057,否
12,浅白,蜷缩,浊响,模糊,平坦,软粘,0.343,0.099,否
13,青绿,稍蜷,浊响,稍糊,凹陷,硬滑,0.639,0.161,否
14,浅白,稍蜷,沉闷,稍糊,凹陷,硬滑,0.657,0.198,否
15,乌黑,稍蜷,浊响,清晰,稍凹,软粘,0.36,0.370,否
16,浅白,蜷缩,浊响,模糊,平坦,硬滑,0.593,0.042,否
17,青绿,蜷缩,沉闷,稍糊,稍凹,硬滑,0.719,0.103,否
import pandas as pd
def readfile():
datasets = pd.read_csv(r'test.csv', encoding='utf-8')
del datasets['编号']
del datasets['好瓜']
return datasets
def NaiveBayes(test):
data = readfile()
data = data.values.tolist()
goodMelon = [];badMelon = []
for i in range(len(data)):
if data[i][9] == 1:
goodMelon.append(data[i])
else:
badMelon.append(data[i])
# 计算p(x|C1)=p1与p(x|C2)=p2
p1 = 1.0;p2 = 1.0
for j in range(len(test)):
x=0.0
for k in range(len(goodMelon)):
if goodMelon[k][j] == test[j]:
x = x + 1.0
p1 = p1 * ((x + 1.0) / (len(goodMelon) + 2.0)) # 拉普拉斯平滑
for j in range(len(test)):
x=0.0
for k in range(len(badMelon)):
if badMelon[k][j] == test[j]:
x = x + 1.0
p2 = p2 * ((x + 1.0) / (len(badMelon) + 2.0)) # 拉普拉斯平滑
pc1 = len(goodMelon) / len(data)
pc2 = 1 - pc1
#贝叶斯公式
p_good = p1*pc1;p_bad=p2*pc2
if p_good > p_bad:
print('好瓜')
else:
print('坏瓜')
if __name__ == '__main__':
test=['青绿','蜷缩','浊响','清晰','凹陷','硬滑',0.697,0.460]
NaiveBayes(test)
老是出现这样的报错,