我在建立一个ARIMA模型进行水文预报,有1959-2015年的水文数据,想用来预测一下2011-2015年的水文情况,代码如下,前面的代码运行都没什么问题,就是最后一步进行预测的时候报错,实在是找不到原因了,望各位道友指点迷津。
```python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import itertools
pd.set_option('display.float_format', lambda x: '%.5f' % x)
np.set_printoptions(precision=5, suppress=True)
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
sns.set(style='ticks', context='poster')
# 加载数据集
filename_ts = r'C:\Users\Administrator\Desktop\work\ddy1.xls'
hydrologic = pd.read_excel(filename_ts, index_col='date', parse_dates=['date'])
n_sample = hydrologic.shape[0]
# 切分数据集为测试集和训练集
n_train = int(0.95 * n_sample) + 1
n_forecast = n_sample - n_train
# hydrologic
ts_train = hydrologic.iloc[:n_train]['in_flow']
ts_test = hydrologic.iloc[n_train:]['in_flow']
plt.figure(figsize=(12, 8))
# print(ts_train)
plt.plot(ts_train)
plt.show()
# arima200 = sm.tsa.SARIMAX(ts_train, order=(2, 0, 0))
# model_results = arima200.fit()
# 差分法
hydrologic['in_flow_diff_1'] = hydrologic['in_flow'].diff(1)
hydrologic['in_flow_diff_2'] = hydrologic['in_flow_diff_1'].diff(1)
fig = plt.figure(figsize=(20, 6))
ax1 = fig.add_subplot(131)
ax1.plot(hydrologic['in_flow'])
ax2 = fig.add_subplot(132)
ax2.plot(hydrologic['in_flow_diff_1'])
ax3 = fig.add_subplot(133)
ax3.plot(hydrologic['in_flow_diff_2'])
plt.show()
# 模型识别和定阶
fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(ts_train, lags=20, ax=ax1)
ax1.xaxis.set_ticks_position('bottom')
fig.tight_layout()
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(ts_train, lags=20, ax=ax2)
ax2.xaxis.set_ticks_position('bottom')
fig.tight_layout()
plt.show()
# 遍历,寻找适宜的参数
p_min = 0
d_min = 0
q_min = 0
p_max = 5
d_max = 0
q_max = 5
# Initialize a DataFrame to store the results,,以BIC准则
results_bic = pd.DataFrame(index=['AR{}'.format(i) for i in range(p_min, p_max + 1)],
columns=['MA{}'.format(i) for i in range(q_min, q_max + 1)])
for p, d, q in itertools.product(range(p_min, p_max + 1),
range(d_min, d_max + 1),
range(q_min, q_max + 1)):
if p == 0 and d == 0 and q == 0:
results_bic.loc['AR{}'.format(p), 'MA{}'.format(q)] = np.nan
continue
try:
model = sm.tsa.ARIMA(ts_train, order=(p, d, q),
# enforce_stationarity=False,
# enforce_invertibility=False,
)
results = model.fit()
results_bic.loc['AR{}'.format(p), 'MA{}'.format(q)] = results.bic
except:
continue
results_bic = results_bic[results_bic.columns].astype(float)
fig, ax = plt.subplots(figsize=(10, 8))
ax = sns.heatmap(results_bic,
mask=results_bic.isnull(),
ax=ax,
annot=True,
fmt='.2f',
)
ax.set_title('BIC')
plt.show()
train_results = sm.tsa.arma_order_select_ic(ts_train, ic=['aic', 'bic'], trend='nc', max_ar=8, max_ma=8)
print('AIC', train_results.aic_min_order)
print('BIC', train_results.bic_min_order)
model = sm.tsa.ARIMA(ts_train, order=(3, 1, 2))
results = model.fit()
resid = results.resid # 赋值
fig = plt.figure(figsize=(12, 8))
fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40)
plt.show()
model = sm.tsa.ARIMA(hydrologic, order=(3, 1, 2))
results = model.fit()
predict_sunspots = results.predict(start=str('2011-01'), end=str('2015-12'), dynamic=False)
print(predict_sunspots)
fig, ax = plt.subplots(figsize=(12, 8))
ax = hydrologic.plot(ax=ax)
predict_sunspots.plot(ax=ax)
plt.show()
results.forecast()[0]