问题遇到的现象和发生背景
我用lstm对一个很“平滑有规律的时序预测,得到的结果却非常差,我怀疑是输出的顺序打乱了,但是我设置split参数是False”
问题相关代码,请勿粘贴截图
#数据划分设计
-- coding: utf-8 --
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import MinMaxScaler
import time
from sklearn.model_selection import train_test_split
from keras.layers import Input, Dense, LSTM
from keras.models import Model
from keras.layers import *
from keras.models import *
from keras.optimizers import *
print('df',df,df.shape,'\n')
tf.random.set_seed(42)
random_seed = 42
TIME_STEPS = 16
train_data_rate = 0.8
valid_data_rate = 0.2
predict_day = 0
data_len = df.shape[0]
train_len = int(data_len * train_data_rate)
test_len = len(df)-train_len
print(data_len,train_len,test_len,'\n')
shuffle_train_data = False # 是否对训练数据做shuffle
All_Train = df[:train_len,14:]
print('All_Train','\n',All_Train,All_Train.shape,'\n')
scaler = MinMaxScaler(feature_range=(0,1))
All_Train_Scaler = scaler.fit_transform(All_Train)
print('All_Train_Scaler','\n',All_Train_Scaler,All_Train_Scaler.shape,'\n')
print(scaler.data_max_)
SF_RE_T
SF_train = SF_RE_T[:train_len]
print('SF_train','\n',SF_train,SF_train.shape,'\n')
MF_train = All_Train_Scaler
print('MF_train','\n',MF_train,MF_train.shape,'\n')
MF_train_3d,SF_train_3d = [],[]
for i in range(train_len- TIME_STEPS - predict_day):
a = MF_train[i:(i+TIME_STEPS),:]
MF_train_3d.append(a)
SF_train_3d.append(SF_train[i + TIME_STEPS, :])
train_x = np.array(MF_train_3d)
train_y = np.array(SF_train_3d)
print('train_x','\n','\n',train_x.shape,'\n')
print('train_y','\n',train_y,train_y.shape)
train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=valid_data_rate, random_state=random_seed, shuffle=shuffle_train_data) # 划分训练和验证集,并打乱
plt.figure(figsize=(20,6))
plt.plot(valid_y)
print('valid_x',valid_x,valid_x.shape,valid_x[-1])
All_Test = df[train_len:,14:]
print('All_Test','\n',All_Test,'\n',All_Test.shape,'\n')
All_Test_Scaler = scaler.transform(All_Test)
print('All_Test_Scaler','\n',All_Test_Scaler,'\n',All_Test_Scaler.shape,'\n')
MF_test = All_Test_Scaler
print('MF_test','\n',MF_test,MF_test.shape,'\n')
MF_test_3d = []
for i in range(test_len- TIME_STEPS - predict_day):
a = MF_test[i:(i+TIME_STEPS),:]
MF_test_3d.append(a)
test_x = np.array(MF_test_3d)
test_y = SF_RE_T[train_len+TIME_STEPS + predict_day:,0]
test_y = test_y.reshape(len(test_y),1)
print('test_x','\n',test_x.shape,'\n')
print('test_y','\n',test_y,test_y.shape)
X_train = train_x
y_train = train_y
X_test = test_x
y_test = test_y
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
print(X_train.shape, y_train.shape,valid_x.shape,valid_y.shape, X_test.shape, y_test.shape)
print(X_train.shape[0])
运行结果及报错内容
整个时间序列
剧烈波动的预测值
我的解答思路和尝试过的方法
1.时间序列我特地split的时候shuffle设置为False,预测结果很差