问题遇到的现象和发生背景
用60天的最高价、最低价、开盘价、收盘价作为特征,预测第61天的收盘价
训练集维度(1637, 60, 4) 标签维度(1637, 1)
验证集维度(239, 60, 4) 验证集标签维度(239, 1)
预测输入维度(241, 60, 4) 输出维度(241,60,1)
输出维度应该是(241,1),不知道为什么多了一维?
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dropout, Dense, SimpleRNN
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.python.keras.layers import LSTM
csv_data_path csv文件路径 str
count 总天数据减去count1作为训练集 int
predict_days 预测多少天数据 int
inputcount =60 count=300
def predict(predictDays, column_start, column_end, input_count, count ,csv_data_path, checkpoints_path, weights_path):
csv_data = pd.read_csv(csv_data_path) # 读取股票文件
csv_data = csv_data.iloc[::-1]
# 前闭后开 步长 前面的是之前的 现在的放到最后
if column_start == 3: #open high low close
slice_end = 0
elif column_start ==4:
slice_end = 1
elif column_start ==5:
slice_end = 2
elif column_start ==6:
slice_end = 3
training_set = csv_data.iloc[0:len(csv_data) - count, 3:7].values
#训练集
test_set = csv_data.iloc[-count:, 3:7].values
#测 试 集 最后300个作为测 试 集
# ---------------归一化-----------
sc = MinMaxScaler(feature_range=(0, 1))
# 定义归一化:归一化到(0,1)之间
TrainingSetScaled = sc.fit_transform(training_set)
# 求得训练集的最大值,最小值这些训练集固有的属性,并在训练集上进行归一化
test_set = sc.transform(test_set)
# 利用训练集的属性对测试集进行归一化
x_train = []
y_train = []
x_test = []
y_test = []
x_test_predicted = []
for i in range(input_count, len(TrainingSetScaled) - predictDays):
x_train.append(TrainingSetScaled[i - input_count:i, 0:4]) #数组切片
y_train.append(TrainingSetScaled[i:i + predictDays,slice_end])
# 对训练集进行打乱
np.random.seed(7)
np.random.shuffle(x_train)
np.random.seed(7)
np.random.shuffle(y_train)
tf.random.set_seed(7)
# 将训练集由list格式变为array格式
x_train, y_train = np.array(x_train), np.array(y_train)
print('111111')
print(x_train.shape)
print(y_train.shape)
for i in range(input_count, (len(test_set) - predictDays)):
x_test.append(test_set[i - input_count:i, 0:4])
y_test.append(test_set[i:(i + predictDays), slice_end])
for i in range(input_count, len(test_set) + 1):
x_test_predicted.append(test_set[i - input_count:i, 0:4])
x_test, y_test = np.array(x_test), np.array(y_test)
x_test_predicted = np.array(x_test_predicted)
print('22222')
print(x_test.shape)
print(y_test.shape)
print( x_test_predicted.shape)
print('22222222')
model = tf.keras.Sequential([
Dense(64,activation = 'relu',),
#Dropout(0.2),
Dense(64,activation = 'relu'),
Dense(5),
#Dropout(0.2),
Dense(predictDays)
])
model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=5e-5),#optimizers.RMSprop(learning_rate=2e-4),
loss='mean_squared_error', metrics=['mae','mse']) # 损失函数用均方误差
history = model.fit(x_train, y_train, batch_size=64, epochs=2, validation_data=(x_test, y_test), #验证集
validation_freq=1)
model.summary()
predicted_stock_price = model.predict(x_test_predicted)
print('333333333')#241,1
print(predicted_stock_price.shape)
sc1 = MinMaxScaler(feature_range=(0, 1))
TestSetIndexScaled = sc1.fit_transform(test_set_index)#RNN和LSTM
#TestSetIndexScaled = sc1.fit_transform(test_set_index)#BP
predicted_stock_price = sc1.inverse_transform(predicted_stock_price)
# 对真实数据还原---从(0,1)反归一化到原始范围
realY = sc1.inverse_transform(test_set[input_count:,slice_end:slice_end+1])
predicted_stock_price_predict = np.hstack((predicted_stock_price[:-1, 0], predicted_stock_price[-1, :]))
predicted_stock_price_predict_return = predicted_stock_price[-1, :]
x_days = csv_data.iloc[input_count - count:len(csv_data) + 1, 2].values # -240到最后 第2列 也就是把日期抽出来
return (predicted_stock_price_predict_return, csv_data_path, x_days, predictDays, realY,
predicted_stock_price_predict,history)