请教各位,我在病例对照研究进行随机森林筛选特征时,输入以上的代码后,得出的特征重要性为0,这是为啥啊
import pandas as pd
import numpy as np
from sklearn.model_selection import GroupShuffleSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
# 加载数据
df = pd.read_csv("nested_case_control_data.csv")
# 按配对组划分训练集和验证集
splitter = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, val_idx = next(splitter.split(df, groups=df["pair_id"]))
train_df, val_df = df.iloc[train_idx], df.iloc[val_idx]
# 定义候选特征
matched_vars = ["age", "sex"]
candidate_features = df.columns.difference(["pair_id", "status"] + matched_vars)
X_train = train_df[candidate_features]
y_train = train_df["status"]
# 训练模型
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# 在验证集上评估特征重要性
X_val = val_df[candidate_features]
y_val = val_df["status"]
result = permutation_importance(model, X_val, y_val, n_repeats=10, random_state=42)
# 输出结果
mean_importance = result.importances_mean
sorted_idx = np.argsort(mean_importance)[::-1]
print("特征重要性排序(验证集):")
for idx in sorted_idx:
print(f"{X_val.columns[idx]}: {mean_importance[idx]:.3f}")