# 分离特征和目标变量
y = '23h结果'
X= [x for x in data.columns if x not in [y, '23h结果']]
# 初始化测试集
X_test = data[X].sample(frac=0.2, random_state=42)
y_test = data[y].loc[X_test.index]
# 初始化评价指标列表
mae_list = []
mse_list = []
rmse_list = []
r2_list = []
# 初始化选中特征列表
selected_features = []
for i in range(len(X)):
mae_min = float('inf')
mse_min = float('inf')
rmse_min = float('inf')
r2_max = float('-inf')
best_feature = None
for feature in X:
# 若特征已经被选中则跳过
if feature in selected_features:
continue
# 加入待选特征
cur_features = selected_features + [feature]
# 划分训练集和验证集
X_train = data[cur_features].dropna()
y_train = data[y].loc[X_train.index]
# 构建模型
model = lgb.LGBMRegressor(random_state=42)
model.fit(X_train, y_train)
# 评估模型
y_pred = model.predict(X_test[cur_features])
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
# 记录最佳特征
if mae < mae_min:
mae_min = mae
best_feature = feature
if mse < mse_min:
mse_min = mse
if rmse < rmse_min:
rmse_min = rmse
if r2 > r2_max:
r2_max = r2
# 记录评价指标
mae_list.append(mae_min)
mse_list.append(mse_min)
rmse_list.append(rmse_min)
r2_list.append(r2_max)
# 记录选中特征
selected_features.append(best_feature)
# 输出评价指标
print(f'Round {i + 1}:')
print(f'Selected Feature: {best_feature}')
print(f'MAE: {mae_min:.4f}')
print(f'MSE: {mse_min:.4f}')
print(f'RMSE: {rmse_min:.4f}')
print(f'R2: {r2_max:.4f}\n')
我利用上述代码进行顺序向前选择时,为什么我无论用什么数据,显示出的评价指标都很完美,是哪里出现问题了呢?