## # 写了一个代码,但是在jupyter notebook上修改一个字符都会报错是什么原因,还有我的代码可能不够完善,请各位大佬看到指点一一下
#%%
导包
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import datasets, model_selection, naive_bayes,metrics
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import numpy as np
#读数据
data = pd.read_csv('./cs-training.csv')
x = data[data.columns[1:]]
y = data['SeriousDlqin2yrs']
#标准化
sc_p = StandardScaler()
x = sc_p.fit_transform(x)
#划分测试集和验证集 因为你的test那个文件没有label,没法用来画roc,所以这里对训练集进行划分,分出一部分来当验证集,以验证模型性能和画roc
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=0)
print("训练集大小:",len(X_train))
print("验证集大小:",len(X_test))
GaussianNB 训练部分
cls = naive_bayes.GaussianNB()
cls.fit(X_train, y_train)
print('GaussianNB Training Score: %.2f' % cls.score(X_train, y_train))
print('GaussianNB Testing Score: %.2f' % cls.score(X_test, y_test))
#输出验证集预测label
y_predict = cls.predict(X_test)
#计算roc需要的指标,并画出roc曲线,并得到auc值
fpr, tpr, threshold = metrics.roc_curve(y_test, y_predict)
rocauc = metrics.auc(fpr, tpr)#计算AUC
plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % rocauc)#生成ROC曲线
plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('真正率')
plt.xlabel('假正率')
plt.show()
#%%
GaussianNB 训练部分
cls = naive_bayes.GaussianNB()
cls.fit(X_train, y_train)
print('GaussianNB Training Score: %.2f' % cls.score(X_train, y_train))
print('GaussianNB Testing Score: %.2f' % cls.score(X_test, y_test))
#输出验证集预测label
y_predict = cls.predict(X_test)
#计算roc需要的指标,并画出roc曲线,并得到auc值
fpr, tpr, threshold = metrics.roc_curve(y_test, y_predict)
rocauc = metrics.auc(fpr, tpr)#计算AUC
plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % rocauc)#生成ROC曲线
plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('真正率')
plt.xlabel('假正率')
plt.show()