python基于EEMD-LSTM的光伏功率预测结果误差很大的问题:
采用常见的数据分解算法+深度学习模型的方式进行光伏功率预测,先对光伏历史功率数据进行EEMD分解,分解结果如下图1所示:

图1
对每一个IMF分量分别建立LSTM模型进行预测,然后将各子序列的预测结果相叠加,得到最终的预测结果,预测值与实际值的曲线如下图2所示:

图2
可以看出,预测值的发展趋势和实际值基本吻合,但普遍比实际值高一些,也就是说,预测结果曲线在实际值曲线上方,这是怎么回事,该怎么解决?
代码如下:
#导入数据
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
df = pd.read_excel(r"C:/Users/ASUS/Desktop/光伏功率数据/国能日新光伏功率数据/测试.xlsx")
df
df = df.fillna(0)
signal = df["实际功率"].values
#EEMD分解
from PyEMD import EEMD
import numpy as np
import matplotlib.pyplot as plt
# 使用EEMD进行信号分解
eemd = EEMD()
eemd(signal)
imfs, res = eemd.get_imfs_and_residue()
# 可视化分解后的IMFs
plt.figure(figsize=(12, 20))
for i in range(imfs.shape[0]):
plt.subplot(imfs.shape[0] + 1, 1, i+1)
plt.plot(imfs[i, :])
plt.title(f'IMF {i+1}')
# 可视化剩余部分
plt.subplot(imfs.shape[0] + 1, 1, imfs.shape[0] + 1)
plt.plot(res)
plt.title('Residue')
plt.tight_layout()
plt.show()
#信号合并
sig_df = pd.DataFrame(imfs.T)
# sig_df[10] = residue
sig_df
seq_length = 7
x = df[["实际功率"]].values
#xy = xy[:2265, :]
y = df[["实际功率"]].values # label
# build a dataset
dataX = []
dataY = []
for i in range(0, len(y) - seq_length):
_x = x[i:i + seq_length]
_y = y[i + seq_length] # Next close price
print(_x, "->", _y)
dataX.append(_x)
dataY.append(_y)
# In[6]:
train_size = int(len(dataY) * 0.8)
test_size = len(dataY) - train_size
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
na, nb, nc = trainX.shape
trainX = trainX.reshape(na, nb * nc)
na, nb, nc = testX.shape
testX = testX.reshape(na, nb * nc)
#分解算法
def get_train_test(x, y):
# build a dataset
dataX = []
dataY = []
for i in range(0, len(y) - seq_length):
_x = x[i:i + seq_length]
_y = y[i + seq_length] # Next close price
# print(_x, "->", _y)
dataX.append(_x)
dataY.append(_y)
# In[6]:
train_size = int(len(dataY) * 0.8)
test_size = len(dataY) - train_size
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
na, nb, nc = trainX.shape
trainX = trainX.reshape(na, nb * nc)
na, nb, nc = testX.shape
testX = testX.reshape(na, nb * nc)
return trainX, trainY, testX, testY
#LSTM
from keras.models import Sequential
from keras.layers import LSTM, Dense
import sklearn.linear_model as lm
cols = sig_df.columns
all_pred = []
for col in cols:
x = sig_df[[col]].values
y = sig_df[[col]].values # label
sig_trainX, sig_trainY, sig_testX, sig_testY = get_train_test(x, y)
# 将数据集重塑为符合LSTM输入要求的形状
sig_trainX = np.reshape(sig_trainX, (sig_trainY.shape[0], seq_length, 1))
# 构建LSTM模型
model = Sequential()
model.add(LSTM(units=63, input_shape=(sig_trainX.shape[1], 1)))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')
# 训练模型
model.fit(sig_trainX, sig_trainY, epochs=10, batch_size=36, verbose=2)
predictions = model.predict(sig_testX)
all_pred.append(predictions)
all_pred_final = np.array(all_pred).sum(axis=0)
mse = mean_squared_error(testY, all_pred_final)
rmse = mse ** 0.5
mae = mean_absolute_error(testY, all_pred_final)
mape = mean_absolute_percentage_error(testY, all_pred_final)
print('MSE: %.5f' % mse)
print('RMSE: %.5f' % rmse)
print('MAPE: %.5f' %mape)
print('MAE: %.5f' % mae)
plt.rcParams['font.sans-serif']=['SimHei'] #黑体
plt.rcParams['axes.unicode_minus'] = False
plt.figure(figsize=(10, 5))
plt.plot(testY, label='实际值')
plt.plot(all_pred_final, color='red', label='预测值')
plt.legend()
plt.show()