机器学习随机森林实现共享单车预测,超参数怎么寻找最优,我写了循环,把r2可视化,但是画出来的折线每次运行都不一样
```python
import csv
import numpy as np
import sklearn.utils as su
import sklearn.ensemble as se
import sklearn.metrics as sm
import matplotlib.pyplot as plt
# 设置打印区域全部显示
np.set_printoptions(threshold = np.inf)
# 设置中文显示,负号显示
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False
# 基于天的数据训练与预测
# 读取文件中的样本数据
x, y = [], []
with open("bike_day.csv", "r") as f:
reader = csv.reader(f)
for row in reader:
x.append(row[2:13])
y.append(row[-3:])
for i in range(0,len(x)):
x[i][0] = x[i][0].replace('-','')
# 变量名称
feature_names = np.array(x[0])
# 去掉标题
x = np.array(x[1:], dtype=float)
y = np.array(y[1:], dtype=float)
# 样本随机化(消除样本数据的影响)
x, y = su.shuffle(x, y[:,-3], random_state=7)
data = []
a = [50,100,150,200,250,300,350,400,450,500,1000]
for i in a:
train_size = int(len(x)*0.8)
train_x = x[:train_size] # 训练集输入部分
test_x = x[train_size:] # 测试集输入部分
train_y = y[:train_size] # 训练集输出部分
test_y = y[train_size:] # 测试集输出部分
# 定义模型(决策树最大深度,防止过拟合)
model = se.RandomForestRegressor(max_depth=10, n_estimators=i, min_samples_split=2)
model.fit(train_x,train_y)
pred_y = model.predict(test_x) # 使用测试集预测
# 特征重要性
fi = model.feature_importances_
# 计算并打印测试指标R2
r2 = sm.r2_score(test_y, pred_y)
print('r2:', r2)
data.append(r2)
plt.figure()
plt.title('决策树数量对r2的影响')
plt.ylabel('r2')
plt.xticks(range(len(a)), a)
plt.plot(data,'b-',)
plt.tight_layout()
plt.show()
```