|
import pandas as pd
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import statsmodels.api as sm
|
|
from statsmodels.tsa.stattools import adfuller
|
|
from statsmodels.tsa.arima_model import ARIMA
|
|
from pmdarima import auto_arima
|
|
|
|
#打开数据文件
|
|
dataset = pd.read_csv('E:\DaseIntro\COVID-19Analysis\COVID-19\covid-19-all.csv')
|
|
|
|
#数据预处理
|
|
def parse_ymd(s):
|
|
year_s, mon_s, day_s = s.split('-')
|
|
return datetime.datetime(int(year_s), int(mon_s), int(day_s)).strftime("%Y-%m-%d")
|
|
dataset = dataset.fillna(0)
|
|
dataset['Date'] = pd.to_datetime(dataset['Date'])
|
|
dataset = dataset[['Country/Region','Confirmed','Recovered','Deaths','Date']].groupby(['Country/Region','Date']).sum().reset_index()
|
|
CN = dataset[dataset['Country/Region'] == 'China']
|
|
CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D'))
|
|
|
|
#差分后可视化
|
|
fig1 = plt.figure()
|
|
axcon = fig1.add_subplot(221)
|
|
axcon.set_title("Confirmed")
|
|
confirmedSeries = pd.DataFrame(CN['Confirmed'])
|
|
confirmedSeries = confirmedSeries.fillna(0)
|
|
confirmedSeries['Confirmed'] = confirmedSeries['Confirmed'] - confirmedSeries['Confirmed'].shift(1)
|
|
axcon.plot(confirmedSeries)
|
|
|
|
axrec = fig1.add_subplot(222)
|
|
axrec.set_title("Recovered")
|
|
recoveredSeries = pd.DataFrame(CN['Recovered'])
|
|
recoveredSeries = recoveredSeries.fillna(0)
|
|
recoveredSeries['Recovered'] = recoveredSeries['Recovered'] - recoveredSeries['Recovered'].shift(1)
|
|
axrec.plot(recoveredSeries)
|
|
|
|
axdea = fig1.add_subplot(223)
|
|
axdea.set_title("Deaths")
|
|
deathsSeries = pd.DataFrame(CN['Deaths'])
|
|
deathsSeries = deathsSeries.fillna(0)
|
|
deathsSeries['Deaths'] = deathsSeries['Deaths'] - deathsSeries['Deaths'].shift(1)
|
|
axdea.plot(deathsSeries)
|
|
plt.show()
|
|
|
|
#ADF检验
|
|
print(sm.tsa.stattools.adfuller(confirmedSeries.iloc[1:]))
|
|
print(sm.tsa.stattools.adfuller(recoveredSeries.iloc[1:]))
|
|
print(sm.tsa.stattools.adfuller(deathsSeries.iloc[1:]))
|
|
|
|
#ARIMA模型
|
|
modelConfirmed = sm.tsa.ARIMA(confirmedSeries.iloc[1:],(1,0,2)).fit()
|
|
confirmed_pre=modelConfirmed.predict(start="2020-01-23", end="2020-12-31", dynamic = False)
|
|
modelRecovered = sm.tsa.ARIMA(recoveredSeries.iloc[1:],(1,0,2)).fit()
|
|
recovered_pre=modelRecovered.predict(start="2020-01-23", end="2020-12-31", dynamic = False)
|
|
modelDeaths = sm.tsa.ARIMA(deathsSeries.iloc[1:],(1,0,1)).fit()
|
|
deaths_pre=modelDeaths.predict(start="2020-01-23", end="2020-12-31", dynamic = False)
|
|
|
|
#逆差分还原
|
|
temp=np.array(CN['Confirmed'])
|
|
for i in range(len(temp)):
|
|
confirmed_pre[i]+=temp[i]
|
|
for i in range(len(temp),confirmed_pre.shape[0]):
|
|
confirmed_pre[i]+=confirmed_pre[i-1]
|
|
confirmed_pre=pd.DataFrame({'confirmed_pre':confirmed_pre})
|
|
confirmed_pre.index = pd.Index(pd.date_range('2020-01-23','2020-12-31',freq = '1D'))
|
|
|
|
temp=np.array(CN['Recovered'])
|
|
for i in range(len(temp)):
|
|
recovered_pre[i]+=temp[i]
|
|
for i in range(len(temp),confirmed_pre.shape[0]):
|
|
recovered_pre[i]+=recovered_pre[i-1]
|
|
recovered_pre=pd.DataFrame({'recovered_pre':recovered_pre})
|
|
recovered_pre.index = pd.Index(pd.date_range('2020-01-23','2020-12-31',freq = '1D'))
|
|
|
|
temp=np.array(CN['Deaths'])
|
|
for i in range(len(temp)):
|
|
deaths_pre[i]+=temp[i]
|
|
for i in range(len(temp),deaths_pre.shape[0]):
|
|
deaths_pre[i]+=deaths_pre[i-1]
|
|
deaths_pre=pd.DataFrame({'deaths_pre':deaths_pre})
|
|
deaths_pre.index = pd.Index(pd.date_range('2020-01-23','2020-12-31',freq = '1D'))
|
|
|
|
#可视化
|
|
fig2 = plt.figure()
|
|
axcon = fig2.add_subplot(311)
|
|
axcon.plot(CN['Confirmed'],label="confirmed")
|
|
axcon.plot(confirmed_pre,label="ARIMA")
|
|
axrec = fig2.add_subplot(312)
|
|
axrec.plot(CN['Recovered'],label="recovered")
|
|
axrec.plot(recovered_pre,label="ARIMA")
|
|
axdea = fig2.add_subplot(313)
|
|
axdea.plot(CN['Deaths'],label="deaths")
|
|
axdea.plot(deaths_pre,label="ARIMA")
|
|
plt.legend()
|
|
plt.show()
|