请问对两个模型(逻辑回归和随机森林)在多个不平衡率的数据集中各个性能度量指标(训练集的acc、auc、precision、recall、f1已经得出具体数值)是否有显著差异性进行Nemenyi检验的Python代码应该怎么写?
以下是现在已完成的在其中一个数据集中得出两个模型的性能指标的代码:
# 初始化随机森林分类器
lg=LogisticRegression(random_state=42)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
# 交叉验证
skfolds=StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accuracy_score_list1,precision_score_list1,recall_score_list1,f1_score_list1, auc_score_list1=[], [], [], [], []
accuracy_score_list2,precision_score_list2,recall_score_list2,f1_score_list2, auc_score_list2=[], [], [], [], []
# 拆分数据集
for train_index,test_index in skfolds.split(X,y):
X_train=X.iloc[train_index]
y_train=y.iloc[train_index]
X_test=X.iloc[test_index]
y_test=y.iloc[test_index]
#随机过采样,预测
X_resampled,y_resampled=ros.fit_resample(X_train,y_train)
lg.fit(X_resampled,y_resampled)
y_pred1=lg.predict(X_test)
rf_classifier=rf.fit(X_resampled,y_resampled)
y_pred2=rf_classifier.predict(X_test)
#评估
#lg
from sklearn import metrics
y_score1=lg.predict_proba(X_test)[:,1]
fpr1, tpr1,threshold1=metrics.roc_curve(y_test,y_score1)
roc_auc1=metrics.auc(fpr1,tpr1)
AccuracyScore1=accuracy_score(y_test,y_pred1)
PrecisionScore1=precision_score(y_test,y_pred1)
RecallScore1=recall_score(y_test,y_pred1)
F1Score1=f1_score(y_test, y_pred1)
accuracy_score_list1.append(AccuracyScore1)
precision_score_list1.append(PrecisionScore1)
recall_score_list1.append(RecallScore1)
f1_score_list1.append(F1Score1)
auc_score_list1.append(roc_auc1)
#rf
y_score2=rf.predict_proba(X_test)[:,1]
fpr2, tpr2,threshold2=metrics.roc_curve(y_test,y_score2)
roc_auc2=metrics.auc(fpr2,tpr2)
AccuracyScore2=accuracy_score(y_test,y_pred2)
PrecisionScore2=precision_score(y_test,y_pred2)
RecallScore2=recall_score(y_test,y_pred2)
F1Score2=f1_score(y_test, y_pred2)
accuracy_score_list2.append(AccuracyScore2)
precision_score_list2.append(PrecisionScore2)
recall_score_list2.append(RecallScore2)
f1_score_list2.append(F1Score2)
auc_score_list2.append(roc_auc2)
print('Accuracy1:%0.3f(+/-%0.3f)'%(np.average(accuracy_score_list1),np.std(accuracy_score_list1)*2))
print('Precision1:%0.3f(+/-%0.3f)'%(np.average(precision_score_list1),np.std(precision_score_list1)*2))
print('Recall1:%0.3f(+/-%0.3f)'%(np.average(recall_score_list1),np.std(recall_score_list1)*2))
print('F1_score1:%0.3f(+/-%0.3f)'%(np.average(f1_score_list1),np.std(f1_score_list1)*2))
print('AUC1:%0.3f(+/-%0.3f)'%(np.average(auc_score_list1),np.std(auc_score_list1)*2))
print('Classification Report:\n', classification_report(y_test, y_pred1))
print('Accuracy2:%0.3f(+/-%0.3f)'%(np.average(accuracy_score_list2),np.std(accuracy_score_list2)*2))
print('Precision2:%0.3f(+/-%0.3f)'%(np.average(precision_score_list2),np.std(precision_score_list2)*2))
print('Recall2:%0.3f(+/-%0.3f)'%(np.average(recall_score_list2),np.std(recall_score_list2)*2))
print('F1_score2:%0.3f(+/-%0.3f)'%(np.average(f1_score_list2),np.std(f1_score_list2)*2))
print('AUC2:%0.3f(+/-%0.3f)'%(np.average(auc_score_list2),np.std(auc_score_list2)*2))
print('Classification Report:\n', classification_report(y_test, y_pred2))