@ -1,125 +0,0 @@ | |||
import pandas as pd | |||
import datetime as DT | |||
import numpy as np | |||
import matplotlib.pyplot as plt | |||
import matplotlib.dates as mdates | |||
import math | |||
import statsmodels.api as sm | |||
from sklearn.metrics import mean_squared_error | |||
#打开数据文件 | |||
dataset = pd.read_csv('E:\DaseIntro\COVID-19Analysis\COVID-19\covid-19-all.csv') | |||
#数据预处理 | |||
def parse_ymd(s): | |||
year_s, mon_s, day_s = s.split('-') | |||
return datetime.datetime(int(year_s), int(mon_s), int(day_s)).strftime("%Y-%m-%d") | |||
dataset = dataset.fillna(0) | |||
dataset['Date'] = pd.to_datetime(dataset['Date']) | |||
dataset = dataset[['Country/Region','Confirmed','Recovered','Deaths','Date']].groupby(['Country/Region','Date']).sum().reset_index() | |||
#取出中、美的数据 | |||
CN = dataset[dataset['Country/Region'] == 'China'] | |||
CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) | |||
US = dataset[dataset['Country/Region'] == 'US'] | |||
US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) | |||
print(CN) | |||
print(US) | |||
#划分训练集、测试集 | |||
trainCN = CN[CN['Date'] < '2020-11-01 '] | |||
testCN = CN[CN['Date'] >= '2020-11-01'] | |||
trainUS = US[US['Date'] < '2020-11-01 '] | |||
testUS = US[US['Date'] >= '2020-11-01'] | |||
#自回归移动平均模型(ARIMA) | |||
yCNARIMA = testCN.copy() | |||
yUSARIMA = testUS.copy() | |||
#训练模型 | |||
fitCNconfirmed = sm.tsa.statespace.SARIMAX(trainCN['Confirmed'],trend='c').fit() | |||
fitCNrecovered = sm.tsa.statespace.SARIMAX(trainCN['Recovered'],trend='c').fit() | |||
fitCNdeaths = sm.tsa.statespace.SARIMAX(trainCN['Deaths'],trend='ct').fit() | |||
fitUSconfirmed = sm.tsa.statespace.SARIMAX(trainUS['Confirmed'],trend='ct').fit() | |||
fitUSrecovered = sm.tsa.statespace.SARIMAX(trainUS['Recovered'],trend='ct').fit() | |||
fitUSdeaths = sm.tsa.statespace.SARIMAX(trainUS['Deaths'],trend='ct').fit() | |||
#测试 | |||
yCNARIMA['SARIMAconfirmed'] = fitCNconfirmed.predict(start="2020-11-01", end="2020-12-09", dynamic=True) | |||
yCNARIMA['SARIMArecovered'] = fitCNrecovered.predict(start="2020-11-01", end="2020-12-09", dynamic=True) | |||
yCNARIMA['SARIMAdeaths'] = fitCNdeaths.predict(start="2020-11-01", end="2020-12-09", dynamic=True) | |||
yUSARIMA['SARIMAconfirmed'] = fitUSconfirmed.predict(start="2020-11-01", end="2020-12-09") | |||
yUSARIMA['SARIMArecovered'] = fitUSrecovered.predict(start="2020-11-01", end="2020-12-09", dynamic=True) | |||
yUSARIMA['SARIMAdeaths'] = fitUSdeaths.predict(start="2020-11-01", end="2020-12-09", dynamic=True) | |||
#预测将来七天 | |||
forecastCNARIMA = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) | |||
forecastUSARIMA = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) | |||
forecastCNARIMA['Date'] = pd.to_datetime(forecastCNARIMA['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') | |||
forecastCNARIMA['confirmedPred'] = fitCNconfirmed.predict(start="2020-12-10", end="2020-12-16", dynamic=True) | |||
forecastCNARIMA['recoveredPred'] = fitCNrecovered.predict(start="2020-12-10", end="2020-12-16", dynamic=True) | |||
forecastCNARIMA['deathsPred'] = fitCNdeaths.predict(start="2020-12-10", end="2020-12-16", dynamic=True) | |||
forecastUSARIMA['Date'] = pd.to_datetime(forecastUSARIMA['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') | |||
forecastUSARIMA['confirmedPred'] = fitUSconfirmed.predict(start="2020-12-10", end="2020-12-16", dynamic=True) | |||
forecastUSARIMA['recoveredPred'] = fitUSrecovered.predict(start="2020-12-10", end="2020-12-16", dynamic=True) | |||
forecastUSARIMA['deathsPred'] = fitUSdeaths.predict(start="2020-12-10", end="2020-12-16", dynamic=False) | |||
#RMSE | |||
rmseCNARIMACon = pow(mean_squared_error(np.asarray(testCN['Confirmed']), np.asarray(yCNARIMA['SARIMAconfirmed'])),0.05) | |||
rmseCNARIMARec = pow(mean_squared_error(np.asarray(testCN['Recovered']), np.asarray(yCNARIMA['SARIMArecovered'])),0.05) | |||
rmseCNARIMADea = pow(mean_squared_error(np.asarray(testCN['Deaths']), np.asarray(yCNARIMA['SARIMAdeaths'])),0.5) | |||
rmseUSARIMACon = pow(mean_squared_error(np.asarray(testUS['Confirmed']), np.asarray(yUSARIMA['SARIMAconfirmed'])),0.05) | |||
rmseUSARIMARec = pow(mean_squared_error(np.asarray(testUS['Recovered']), np.asarray(yUSARIMA['SARIMArecovered'])),0.05) | |||
rmseUSARIMADea = pow(mean_squared_error(np.asarray(testUS['Deaths']), np.asarray(yUSARIMA['SARIMAdeaths'])),0.05) | |||
#可视化 | |||
fig = plt.figure() | |||
axCNARIMA = fig.add_subplot(211) | |||
axCNARIMA.set_title("ARIMA (CN)",verticalalignment="bottom",fontsize="13") | |||
CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) | |||
yCNARIMA.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) | |||
forecastCNARIMA.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) | |||
axCNARIMA.plot(CN['Confirmed'],label="confirmed",linestyle=":") | |||
axCNARIMA.plot(CN['Recovered'],label="recovered",linestyle=":") | |||
axCNARIMA.plot(CN['Deaths'],label="deaths",linestyle=":") | |||
axCNARIMA.plot(yCNARIMA['SARIMAconfirmed'],label="confirmed test") | |||
axCNARIMA.plot(yCNARIMA['SARIMArecovered'],label="recovered test") | |||
axCNARIMA.plot(yCNARIMA['SARIMAdeaths'],label="deaths test") | |||
axCNARIMA.plot(forecastCNARIMA['confirmedPred'],label="confirmed prediction") | |||
axCNARIMA.plot(forecastCNARIMA['recoveredPred'],label="recovered prediction") | |||
axCNARIMA.plot(forecastCNARIMA['deathsPred'],label="deaths prediction") | |||
axUSARIMA = fig.add_subplot(212) | |||
axUSARIMA.set_title("ARIMA (US)",verticalalignment="bottom",fontsize="13") | |||
US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) | |||
yUSARIMA.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) | |||
forecastUSARIMA.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) | |||
axUSARIMA.plot(US['Confirmed'],label="confirmed",linestyle=":") | |||
axUSARIMA.plot(US['Recovered'],label="recovered",linestyle=":") | |||
axUSARIMA.plot(US['Deaths'],label="deaths",linestyle=":") | |||
axUSARIMA.plot(yUSARIMA['SARIMAconfirmed'],label="confirmed test") | |||
axUSARIMA.plot(yUSARIMA['SARIMArecovered'],label="recovered test") | |||
axUSARIMA.plot(yUSARIMA['SARIMAdeaths'],label="deaths test") | |||
axUSARIMA.plot(forecastUSARIMA['confirmedPred'],label="confirmed prediction") | |||
axUSARIMA.plot(forecastUSARIMA['recoveredPred'],label="recovered prediction") | |||
axUSARIMA.plot(forecastUSARIMA['deathsPred'],label="deaths prediction") | |||
plt.tight_layout() | |||
plt.gcf().autofmt_xdate() | |||
plt.legend(labelspacing=0.05) | |||
plt.show() |
@ -0,0 +1,96 @@ | |||
import pandas as pd | |||
import numpy as np | |||
import matplotlib.pyplot as plt | |||
import statsmodels.api as sm | |||
from statsmodels.tsa.stattools import adfuller | |||
from statsmodels.tsa.arima_model import ARIMA | |||
from pmdarima import auto_arima | |||
#打开数据文件 | |||
dataset = pd.read_csv('E:\DaseIntro\COVID-19Analysis\COVID-19\covid-19-all.csv') | |||
#数据预处理 | |||
def parse_ymd(s): | |||
year_s, mon_s, day_s = s.split('-') | |||
return datetime.datetime(int(year_s), int(mon_s), int(day_s)).strftime("%Y-%m-%d") | |||
dataset = dataset.fillna(0) | |||
dataset['Date'] = pd.to_datetime(dataset['Date']) | |||
dataset = dataset[['Country/Region','Confirmed','Recovered','Deaths','Date']].groupby(['Country/Region','Date']).sum().reset_index() | |||
CN = dataset[dataset['Country/Region'] == 'China'] | |||
CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) | |||
#差分后可视化 | |||
fig1 = plt.figure() | |||
axcon = fig1.add_subplot(221) | |||
axcon.set_title("Confirmed") | |||
confirmedSeries = pd.DataFrame(CN['Confirmed']) | |||
confirmedSeries = confirmedSeries.fillna(0) | |||
confirmedSeries['Confirmed'] = confirmedSeries['Confirmed'] - confirmedSeries['Confirmed'].shift(1) | |||
axcon.plot(confirmedSeries) | |||
axrec = fig1.add_subplot(222) | |||
axrec.set_title("Recovered") | |||
recoveredSeries = pd.DataFrame(CN['Recovered']) | |||
recoveredSeries = recoveredSeries.fillna(0) | |||
recoveredSeries['Recovered'] = recoveredSeries['Recovered'] - recoveredSeries['Recovered'].shift(1) | |||
axrec.plot(recoveredSeries) | |||
axdea = fig1.add_subplot(223) | |||
axdea.set_title("Deaths") | |||
deathsSeries = pd.DataFrame(CN['Deaths']) | |||
deathsSeries = deathsSeries.fillna(0) | |||
deathsSeries['Deaths'] = deathsSeries['Deaths'] - deathsSeries['Deaths'].shift(1) | |||
axdea.plot(deathsSeries) | |||
plt.show() | |||
#ADF检验 | |||
print(sm.tsa.stattools.adfuller(confirmedSeries.iloc[1:])) | |||
print(sm.tsa.stattools.adfuller(recoveredSeries.iloc[1:])) | |||
print(sm.tsa.stattools.adfuller(deathsSeries.iloc[1:])) | |||
#ARIMA模型 | |||
modelConfirmed = sm.tsa.ARIMA(confirmedSeries.iloc[1:],(1,0,2)).fit() | |||
confirmed_pre=modelConfirmed.predict(start="2020-01-23", end="2020-12-31", dynamic = False) | |||
modelRecovered = sm.tsa.ARIMA(recoveredSeries.iloc[1:],(1,0,2)).fit() | |||
recovered_pre=modelRecovered.predict(start="2020-01-23", end="2020-12-31", dynamic = False) | |||
modelDeaths = sm.tsa.ARIMA(deathsSeries.iloc[1:],(1,0,1)).fit() | |||
deaths_pre=modelDeaths.predict(start="2020-01-23", end="2020-12-31", dynamic = False) | |||
#逆差分还原 | |||
temp=np.array(CN['Confirmed']) | |||
for i in range(len(temp)): | |||
confirmed_pre[i]+=temp[i] | |||
for i in range(len(temp),confirmed_pre.shape[0]): | |||
confirmed_pre[i]+=confirmed_pre[i-1] | |||
confirmed_pre=pd.DataFrame({'confirmed_pre':confirmed_pre}) | |||
confirmed_pre.index = pd.Index(pd.date_range('2020-01-23','2020-12-31',freq = '1D')) | |||
temp=np.array(CN['Recovered']) | |||
for i in range(len(temp)): | |||
recovered_pre[i]+=temp[i] | |||
for i in range(len(temp),confirmed_pre.shape[0]): | |||
recovered_pre[i]+=recovered_pre[i-1] | |||
recovered_pre=pd.DataFrame({'recovered_pre':recovered_pre}) | |||
recovered_pre.index = pd.Index(pd.date_range('2020-01-23','2020-12-31',freq = '1D')) | |||
temp=np.array(CN['Deaths']) | |||
for i in range(len(temp)): | |||
deaths_pre[i]+=temp[i] | |||
for i in range(len(temp),deaths_pre.shape[0]): | |||
deaths_pre[i]+=deaths_pre[i-1] | |||
deaths_pre=pd.DataFrame({'deaths_pre':deaths_pre}) | |||
deaths_pre.index = pd.Index(pd.date_range('2020-01-23','2020-12-31',freq = '1D')) | |||
#可视化 | |||
fig2 = plt.figure() | |||
axcon = fig2.add_subplot(311) | |||
axcon.plot(CN['Confirmed'],label="confirmed") | |||
axcon.plot(confirmed_pre,label="ARIMA") | |||
axrec = fig2.add_subplot(312) | |||
axrec.plot(CN['Recovered'],label="recovered") | |||
axrec.plot(recovered_pre,label="ARIMA") | |||
axdea = fig2.add_subplot(313) | |||
axdea.plot(CN['Deaths'],label="deaths") | |||
axdea.plot(deaths_pre,label="ARIMA") | |||
plt.legend() | |||
plt.show() |
@ -0,0 +1,18 @@ | |||
import matplotlib.pyplot as plt | |||
import numpy as np | |||
AIC = np.zeros(14) | |||
BIC = np.zeros(14) | |||
AIC = [5351.5918, 5335.5662, 5323.8356, 5311.2017, 5273.9674, 5272.4677, 5272.1961, 5300.3335, 5273.3238, 5272.9836, 5275.0035, 5283.7308, 5271.9971, 5276.3102] | |||
BIC = [5362.9155, 5350.6644, 5342.7083, 5322.5254, 5289.0656, 5291.3404, 5294.8434, 5315.4317, 5292.1965, 5295.6309, 5301.4254, 5302.6036, 5294.6445, 5302.7321] | |||
x = ['0,0,1','0,0,2','0,0,3','1,0,0','1,0,1','1,0,2','1,0,3','2,0,0','2,0,1','2,0,2','2,0,3','3,0,0','3,0,1','3,0,2'] | |||
plt.title("AIC/BIC of CN ARIMA Confirmed Model") | |||
plt.plot(x,AIC,label="AIC") | |||
plt.plot(x,BIC,label="BIC") | |||
for y in [AIC, BIC]: | |||
for x1, yy in zip(x, y): | |||
plt.text(x1, yy + 1, str(yy), ha='center', va='bottom', fontsize=10, rotation=0) | |||
plt.legend() | |||
plt.show() |
@ -0,0 +1,18 @@ | |||
import matplotlib.pyplot as plt | |||
import numpy as np | |||
AIC = np.zeros(13) | |||
BIC = np.zeros(13) | |||
AIC = [3720.6474, 3719.8167, 3720.4415, 3720.2469, 3709.0862, 3710.7921, 3712.7911, 3718.7009, 3710.7933, 3712.7608, 3714.7807, 3718.7941, 3712.7926] | |||
BIC = [3731.9711, 3734.9149, 3739.3143, 3731.5706, 3724.1844, 3729.6649, 3735.4384, 3733.7991, 3729.6661, 3735.4081, 3741.2026, 3737.6668, 3735.4399] | |||
x = ['0,0,1','0,0,2','0,0,3','1,0,0','1,0,1','1,0,2','1,0,3','2,0,0','2,0,1','2,0,2','2,0,3','3,0,0','3,0,1'] | |||
plt.title("AIC/BIC of CN ARIMA Deaths Model") | |||
plt.plot(x,AIC,label="AIC") | |||
plt.plot(x,BIC,label="BIC") | |||
for y in [AIC, BIC]: | |||
for x1, yy in zip(x, y): | |||
plt.text(x1, yy + 1, str(yy), ha='center', va='bottom', fontsize=10, rotation=0) | |||
plt.legend() | |||
plt.show() |
@ -0,0 +1,18 @@ | |||
import matplotlib.pyplot as plt | |||
import numpy as np | |||
AIC = np.zeros(14) | |||
BIC = np.zeros(14) | |||
AIC = [4883.7687, 4768.7267, 4665.6302, 4613.9799, 4476.1181, 4406.9768, 4408.8600, 4462.9301, 4434.4957, 4408.8190, 4410.7338, 4418.0734, 4419.9409, 4409.4140] | |||
BIC = [4895.0924, 4783.8249, 4684.5030, 4625.3036, 4491.2163, 4425.8495, 4431.5073, 4478.0283, 4453.3685, 4431.4663, 4437.1557, 4436.9461, 4442.5882, 4435.8358] | |||
x = ['0,0,1','0,0,2','0,0,3','1,0,0','1,0,1','1,0,2','1,0,3','2,0,0','2,0,1','2,0,2','2,0,3','3,0,0','3,0,1','3,0,2'] | |||
plt.title("AIC/BIC of CN ARIMA Recovered Model") | |||
plt.plot(x,AIC,label="AIC") | |||
plt.plot(x,BIC,label="BIC") | |||
for y in [AIC, BIC]: | |||
for x1, yy in zip(x, y): | |||
plt.text(x1, yy + 1, str(yy), ha='center', va='bottom', fontsize=10, rotation=0) | |||
plt.legend() | |||
plt.show() |
@ -1,38 +0,0 @@ | |||
import pandas as pd | |||
import numpy as np | |||
import matplotlib.pyplot as plt | |||
import statsmodels.api as sm | |||
from statsmodels.graphics.tsaplots import acf,pacf,plot_acf,plot_pacf | |||
from statsmodels.tsa.arima_model import ARMA | |||
from statsmodels.tsa.stattools import adfuller | |||
#打开数据文件 | |||
dataset = pd.read_csv('E:\DaseIntro\COVID-19Analysis\COVID-19\covid-19-all.csv') | |||
#数据预处理 | |||
def parse_ymd(s): | |||
year_s, mon_s, day_s = s.split('-') | |||
return datetime.datetime(int(year_s), int(mon_s), int(day_s)).strftime("%Y-%m-%d") | |||
dataset = dataset.fillna(0) | |||
dataset['Date'] = pd.to_datetime(dataset['Date']) | |||
dataset = dataset[['Country/Region','Confirmed','Recovered','Deaths','Date']].groupby(['Country/Region','Date']).sum().reset_index() | |||
#取出中、美的数据 | |||
CN = dataset[dataset['Country/Region'] == 'China'] | |||
CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) | |||
US = dataset[dataset['Country/Region'] == 'US'] | |||
US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) | |||
#检验Confirmed | |||
CNconfirmedSeries = pd.DataFrame(CN['Confirmed']) | |||
CNconfirmedSeries['Confirmed'] = CNconfirmedSeries['Confirmed'] - CNconfirmedSeries['Confirmed'].shift(1) | |||
CNconfirmedSeries.plot(figsize=(8,6)) | |||
CNrecoveredSeries = pd.DataFrame(CN['Recovered']) | |||
CNrecoveredSeries['Recovered'] = CNrecoveredSeries['Recovered'] - CNrecoveredSeries['Recovered'].shift(1) | |||
CNrecoveredSeries.plot(figsize=(8,6)) | |||
CNdeathsSeries = pd.DataFrame(CN['Deaths']) | |||
CNdeathsSeries['Deaths'] = CNdeathsSeries['Deaths'] - CNdeathsSeries['Deaths'].shift(1) | |||
CNdeathsSeries.plot(figsize=(8,6)) | |||
plt.show() |