import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
# 读取含有热误差数据的CSV文件
file1 = open('data1.csv',encoding='utf-8')
df1 = pd.read_csv(file1)
file2 = open('data.csv',encoding='utf-8')
df2 = pd.read_csv(file2)
# 对数据进行预处理
x = df1.iloc[:, :-1] # 输入变量
y = df1.iloc[:, -1] # 输出的热误差值
z = df2.iloc[:, :-1] # 输入变量
t = df2.iloc[:, -1] # 输出的热误差值
X=x+z
Y=y+t
X_scaled = preprocessing.scale(X) # 对输入变量进行标准化,避免某些维度权值过大或过小
# 划分数据集
X_train, X_test, Y_train, Y_test =x,z,y,t
# 创建决策树模型
model = DecisionTreeRegressor(max_depth=7)
#训练模型a
model.fit(X_train, Y_train)
param_grid = {
'max_depth': [6,7,8],
'min_samples_split': [2, 4, 6],
'min_samples_leaf': [1, 2, 4],
}
grid_search = GridSearchCV(DecisionTreeRegressor(), param_grid, cv=5)
grid_search.fit(X_train, Y_train)
best_model = grid_search.best_estimator_
# 预测结果
Y_pred = model.predict(X_test)
print(Y_pred)
# 评估模型
mse = mean_squared_error(Y_test, Y_pred)
print('MSE:', mse)
有没有兄弟帮忙看看下面的错误是什么原因该怎么改
ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- 5.31
- 5.5
- 5.69
- 5.81
- 5.81.1
- ...
Feature names seen at fit time, yet now missing:
- 5.75
- 6.13
- 6.19
- 6.25
- 6.38
- ...