python进行XGBoost分析多分类问题时,采用precision、recall、f1-macro、f1-weighted和test score来评价模型并绘制ROC曲线。
结果是以上评价指标都是1,但是ROC曲线并不符合评价指标的结果,并出现了以下错误:
请问我要如何解决这个问题?
完整的代码如下:
```python
from xgboost import XGBClassifier
from xgboost import plot_importance
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score as CVS, train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score,roc_curve, auc
from sklearn.preprocessing import label_binarize
#读取数据和标签
X = pd.read_excel("C:/Users/linyifeng/Desktop/data/SVM/freq_data.xlsx",sheet_name='train_x')
y = pd.read_excel("C:/Users/linyifeng/Desktop/data/SVM/freq_data.xlsx",sheet_name='train_y')
y = y.values.ravel()
#将数据分为训练集、验证集和一部分测试集
X_trainval,X_test,y_trainval,y_test = train_test_split(X,y,test_size=0.2,random_state=777)
le = LabelEncoder()
y_trainval = le.fit_transform(y_trainval)
y_test = le.fit_transform(y_test)
XGB = XGBClassifier(
learning_rate = 0.17,
n_estimators = 90,
max_depth=3,
min_child_weight=2,
gamma=0.07,
subsample=0.8,
colsample_bytree=0.8,
objective= 'multi:softmax',
num_class=7,
seed=27)
XGB.fit(X_trainval,y_trainval)
#评估模型
XGB.fit(X_test,y_test)
y_pred = XGB.predict(X_test)#返回训练后的预测结果
print(y_test)
print(y_pred)
p = precision_score(y_test, y_pred, average='weighted')#计算精确率
print(p)
r = recall_score(y_test, y_pred, average='weighted')#计算召回率
print(r)
f1sore = f1_score(y_test, y_pred, average='macro')#计算f1_macro 分数
print(f1sore)
f1sore_weight = f1_score(y_test, y_pred, average='weighted')#计算f1_weight 分数
print(f1sore_weight)
test_score = XGB.score(X_test,y_test)#计算test score
print("Score on testing set:{:.2f}".format(test_score))
#绘制ROC曲线
import matplotlib.pyplot as plt
y_ROC = y_test
y_ROC = label_binarize(y_ROC,classes=[1,2,3,4,5])#将label二值化
y_pred = label_binarize(y_pred,classes=[1,2,3,4,5])#将label二值化
n_classes = y_ROC.shape[1]#读取二维矩阵的类数,这里为5
y_score = XGB.predict_proba(X_test)#获取每个分类的概率值
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_ROC[:, i], y_score[:, i])#遍历五种分类的结果来计算五种分类的fpr和tpr
roc_auc[i] = auc(fpr[i], tpr[i])#计算roc曲线面积
fpr["micro"], tpr["micro"], _ = roc_curve(y_ROC .ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
plt.figure()
lw = 2
plt.plot(fpr["micro"], tpr["micro"], color='darkorange',
lw=lw, label='ROC curve (area = %0.2f)' % roc_auc["micro"])
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.ylim([0.0, 1.0])
plt.xlim([0.0, 1.0])
plt.legend(loc="lower right")
plt.title("Precision-Recall")
plt.show()
```