因为是时间序列预测,所以不能随机划分训练集,数据归一化预测结束后,要怎么反归一化呢
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.linear_model import Lasso, LassoCV
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
#读取数据
df = pd.read_csv(r'D:\PycharmProjects\projects2\fuel_data_3m.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.set_index(['date'], drop=True)
#划分数据集
split_date = pd.Timestamp('2010-03-29 22:30:00')
train = df.loc[:split_date]
test = df.loc[split_date:]
#print(train)
#归一化
scaler = MinMaxScaler()
train_sc = scaler.fit_transform(train)
test_sc = scaler.transform(test)
train_sc = pd.DataFrame(data=train_sc, columns=['fuel', 'level1median', 'level2median', 'portPitch', 'portRudder', 'speedKnots',
'starboardPitch', 'starboardRudder', 'trackDegreeMagnetic', 'trackDegreeTrue',
'trim', 'trueHeading', 'WaterSpeed', 'windAngle', 'windSpeed'])
test_sc = pd.DataFrame(data=test_sc, columns=['fuel', 'level1median', 'level2median', 'portPitch', 'portRudder', 'speedKnots',
'starboardPitch', 'starboardRudder', 'trackDegreeMagnetic', 'trackDegreeTrue',
'trim', 'trueHeading', 'WaterSpeed', 'windAngle', 'windSpeed'])
feature_train = train_sc.columns[1:]
feature_test = test_sc.columns[1:]
X_train = train_sc[feature_train]
y_train = train_sc.fuel
X_test = test_sc[feature_test]
y_test = test_sc.fuel
#构造不同的lambda值
Lambdas = np.logspace(-8, -2, 200)
#设置交叉验证的参数,使用均方误差评估
lasso_cv = LassoCV(alphas=Lambdas, normalize=True, cv=10, max_iter=10000)
lasso_cv.fit(X_train, y_train)
#基于最佳lambda值建模
lasso = Lasso(alpha=lasso_cv.alpha_, normalize=True, max_iter=10000)
lasso.fit(X_train, y_train)
#打印回归系数
print('最优参数:', lasso_cv.alpha_)
print(pd.Series(index=['Intercept']+X_train.columns.tolist(),
data=[lasso.intercept_]+lasso.coef_.tolist()))
#模型评估
lasso_pred = lasso.predict(X_test)
#反归一化