使用pso参数优化的LightGBM分类模型来预测数据集,模型评估指标全是1,使用其他模型进行预测没问题
代码如下:
import numpy as np
import lightgbm as lgb
import pandas as pd
from sklearn.metrics import roc_curve, auc
from pyswarm import pso
from sklearn.metrics import precision_score,recall_score, f1_score, accuracy_score, auc
# 生成随机分类数据集
train=pd.read_excel("D:\LCIS_train_test.xlsx",sheet_name="train")
test=pd.read_excel("D:\LCIS_train_test.xlsx",sheet_name="test")
y_train = train['标当前状态']
y_test = test['标当前状态']
X_train = train.drop('标当前状态', axis=1)
X_test =test.drop('标当前状态', axis=1)
# 划分训练集和测试集
# 定义PSO优化函数
def pso_optimization(params):
# 参数解析
num_leaves = int(params[0])
learning_rate = params[1]
subsample = params[2]
colsample_bytree = params[3]
# 构建LightGBM模型
clf = lgb.LGBMClassifier(num_leaves=num_leaves, learning_rate=learning_rate, subsample=subsample,
colsample_bytree=colsample_bytree, objective='binary', n_jobs=-1)
# 训练模型
clf.fit(X_train, y_train)
# 在测试集上进行预测
y_pred = clf.predict_proba(X_test)[:, -1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)
# 返回负的ROC AUC,因为PSO最小化目标函数
return -roc_auc
# 定义PSO搜索空间的边界
lb = [2, 0.01, 0.5, 0.5] # 下限
ub = [50, 0.5, 1.0, 1.0] # 上限
# 使用PSO算法进行参数优化
best_params, _ = pso(pso_optimization, lb, ub, swarmsize=10, maxiter=20)
# 输出最优参数
print("Best Parameters:")
print("num_leaves:", int(best_params[0]))
print("learning_rate:", best_params[1])
print("subsample:", best_params[2])
print("colsample_bytree:", best_params[3])
# 使用最优参数构建最终的LightGBM模型
best_clf = lgb.LGBMClassifier(num_leaves=int(best_params[0]), learning_rate=best_params[1],
subsample=best_params[2], colsample_bytree=best_params[3],
objective='binary', n_jobs=-1)
# 训练最终模型
best_clf.fit(X_train, y_train)
# 获取特征重要性排序
feature_importance = best_clf.feature_importances_
# 打印特征重要性排序结果
print("Feature Importance Ranking:")
for i, importance in enumerate(feature_importance):
print(f"Feature {i+1}: {importance}")
# 在测试集上进行预测
y_pred = best_clf.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)
y_pred_binary = np.round(y_pred)
accuracy = accuracy_score(y_test, y_pred_binary)
precision = precision_score(y_test, y_pred_binary)
recall = recall_score(y_test, y_pred_binary)
f1 = f1_score(y_test, y_pred_binary)
# 输出模型评估指标
print("\nModel Evaluation Metrics:")
print(f"ROC AUC: {roc_auc:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
# 绘制ROC曲线
import matplotlib.pyplot as plt
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()
有没有人帮忙看看究竟哪里有问题,帮我修改一下可以输出正确的模型评估指标和ROC曲线