问题遇到的现象和发生背景
非平衡数据比例未86002:1825,已经处于严重不平衡状态,但是在计算ROC值得出值就为1,不知道是什么原因,感激不尽。
问题相关代码,请勿粘贴截图
#导入一些相关库
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import clas
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import scale
import pandas as pd
#导入数据
df=pd.read_excel(r"D:/Datasource 02.xlsx").fillna(0)
x=df.iloc[:,1:-1]
y=df["Fatal"]
print(y.value_counts())
print("-------------------------")
print(y.value_counts(normalize=True))
def get_result_data(x,y):
x_=scale(x,with_mean=True,with_std=True)
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.4,random_state=0)
model=LogisticRegression()
clf=model.fit(x_train,y_train)
print("LR模型测试成绩:{:.2f}".format(clf.score(x_test,y_test)))
y_pred=clf.predict(x_test)
target_names = ['class 0', 'class 1']
print(classification_report(y_test, y_pred, target_names=target_names))
y_pred1=clf.decision_function(x_test)
fpr,tpr,threshold=roc_curve(y_test,y_pred1)
rocauc=auc(fpr,tpr)#计算AUC
print("ROC分数:{:.2f}".format(rocauc))
if __name__=="__main__":
get_result_data(x,y)