# 分离特征和目标变量
y = '23h结果'
X= [x for x in data.columns if x not in [y, '23h结果']]
X=pd.DataFrame(X)
# 定义LightGBM模型
params = {'boosting_type': 'gbdt',
'objective': 'regression',
'metric': ('mae','mse','r2'),# 默认使用MAE作为评价指标
'num_leaves': 31,
'learning_rate': 0.05,
'feature_fraction': 0.9}
# 顺序向前选择特征
selected_features = []
score_list = []
for i in range(len(X.columns)):
scores = []
for feature in X.columns:
if feature not in selected_features:
features = selected_features + [feature]
X_selected = X[features]
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)
lgb_train = lgb.Dataset(X_train, y_train)
lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)
# 训练模型并预测
gbm = lgb.train(params, lgb_train, num_boost_round=1000, valid_sets=lgb_test, early_stopping_rounds=10,
verbose_eval=False)
y_pred = gbm.predict(X_test)
运行上述代码时,出现了如下错误
Traceback (most recent call last):
File "C:/Users/86187/PycharmProjects/pythonProject10/23h预测.py", line 49, in <module>
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)
File "D:\py\anaconda3\lib\site-packages\sklearn\model_selection\_split.py", line 2559, in train_test_split
arrays = indexable(*arrays)
File "D:\py\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 443, in indexable
check_consistent_length(*result)
File "D:\py\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 397, in check_consistent_length
raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [18, 5]
具体解决方是什么?