import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
train=pd.read_csv('./train.csv',encoding='utf-8')
print(train.info())
print(train.shape)
test=pd.read_csv('./test.csv',encoding='utf-8')
print(test.info())
print(test.shape)
train.index = pd.to_datetime(train['datetime'])
data=train['count'].resample('D').mean()
print(data)
#导入数据
data.plot(figsize=(12,6))
plt.legend(bbox_to_anchor=(1.25,0.5))
plt.title('count')
sns.despine
#差分法(不平稳处理),保存成新的列,1阶差分,dropna() 删除缺失值
data_diff=data.diff().dropna()
plt.figure(figsize=(12,5))
plt.plot(data_diff)
plt.title('diff1')
plt.show()
acf=plot_acf(data_diff,lags=20)
plt.title("ACF")
acf.show()
pacf=plot_pacf(data_diff,lags=20)
plt.title("PACF")
pacf.show()
#ADF检验
x = np.array(data_diff)
adftest = adfuller(x, autolag='AIC')
print (adftest)
#纯随机性检验(白噪声检验)
p_value = acorr_ljungbox(data_diff, lags=1)
print (p_value)
p值小于显著水平0.05,则可以以95%的置信水平拒绝原假设
train_results = sm.tsa.arma_order_select_ic(data, ic=['aic', 'bic'], trend='n', max_ar=8, max_ma=8)
print('AIC', train_results.aic_min_order)
print('BIC', train_results.bic_min_order)
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf #画图定阶
from statsmodels.tsa.arima.model import ARIMA#模型
from statsmodels.stats.stattools import durbin_watson #DW检验
from statsmodels.graphics.api import qqplot #qq图
model=ARIMA(data,order=(4,1,8))
result=model.fit()
print(result.summary())
pred=result.predict('20110105','20121230',dynamic=False,typ='levels')
print(pred)
plt.figure(figsize=(12,8))
plt.xticks(rotation=45)
plt.plot(pred)
plt.plot(data)
#残差
result.plot_diagnostics(figsize=(16,12))
帮忙看看模型怎么样,哪里有问题吗,第一次做,定阶检验这些