如题,其它类没有roc,auc为nan,而且由predict_proba()预测的概率没有归一化
```python
import numpy as np
import seaborn as sns
import pandas as pd
import gc
import os
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score,roc_auc_score
from sklearn.preprocessing import label_binarize
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier
import warnings
import time
from sklearn.metrics import roc_curve, auc, precision_score, recall_score
#jet表
jet = pd.read_csv('complex_train_R04_jet.csv')
print('jet.shape:',jet.shape)
train_num = jet.shape[0]
gc.collect()
feature = [i for i in jet.columns if i not in ['event_id', 'jet_id', 'label']]
x= jet[feature].copy().reset_index(drop=True)
label_map = {21:0,1:1,4:2,5:3}
y = jet['label'].copy()
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)
#随机森林
seed = 9999
# 构建随机森林模型
rf_model = RandomForestClassifier(n_estimators=100, random_state=seed, verbose=2)
# 在训练集上训练模型
rf_model.fit(x_train,y_train)
#在测试集上预测
pred_test_rf = rf_model.predict_proba(x_test)
true_labels_binary_rf = label_binarize(y_test, classes=[0, 1, 2, 3])
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(4): # 遍历每个类别
fpr[i], tpr[i], _ = roc_curve(true_labels_binary_rf[:, i], pred_test_rf[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
print(roc_auc[i])
# 绘制每个类别的 ROC 曲线
for i in range(4):
plt.plot(fpr[i], tpr[i], lw=2,
label='ROC curve of class {0} (area = {1:0.2f})'
''.format(i, roc_auc[i]))
plt.legend()
上图为特征集
图为roc曲线