From 32a001e08e6c1171dc8d14b4a214d53715317c14 Mon Sep 17 00:00:00 2001 From: 10195501441 <10195501441@stu.ecnu.edu.cn> Date: Thu, 31 Dec 2020 13:09:06 +0800 Subject: [PATCH] added ARIMA method --- COVID-19/Prediction/.vs/Prediction/v16/.suo | Bin 34304 -> 24576 bytes COVID-19/Prediction/Prediction.sln | 2 +- COVID-19/Prediction/Prediction/ARIMA.py | 122 +++++++++++++++++++++ COVID-19/Prediction/Prediction/ARIMA.pyproj | 37 +++++++ .../Prediction/Prediction/Holt_Linear_Trend.py | 122 --------------------- .../Prediction/Prediction/Holt_Linear_Trend.pyproj | 37 ------- 6 files changed, 160 insertions(+), 160 deletions(-) create mode 100644 COVID-19/Prediction/Prediction/ARIMA.py create mode 100644 COVID-19/Prediction/Prediction/ARIMA.pyproj delete mode 100644 COVID-19/Prediction/Prediction/Holt_Linear_Trend.py delete mode 100644 COVID-19/Prediction/Prediction/Holt_Linear_Trend.pyproj diff --git a/COVID-19/Prediction/.vs/Prediction/v16/.suo b/COVID-19/Prediction/.vs/Prediction/v16/.suo index 3be7b6d49fffb7df4c32f24a6c55f72c3c24e555..5b02e307b985df8523fc621dc7ebc8ecac45be8e 100644 GIT binary patch delta 2593 zcmcgtZA?>F7`~^yt+xeA0WHuvr635%$5qNi;zwC)MS;>8r${g$ihK^4ULsIfZ!>U9 zB#YkH={|Iuo0*x(rmo9=_+x3zek}2aZZR&|50@Adlf@q=4B^4N-4NhKIQ-Zxkh;&3gA_ZYUlp-EQA`xS}#%7|J zk>g=!PQf}mwum-iEyUY}SQjD65ovL5Io2x>%Mcle3Pf4Ve=+L6+%Whr{`VyNuey?o z>pYBi$XkknyrZZVAZRE#s)fjrojyLM!~J6GULWdoR|R>8$e z@4Q;n7?tAhd6Kg_17?b&SF3v$$pRQHc{9HNTa+bqL%suDdwcgt2^TAdMqOMFq&t?d zX<*e8aIf14Ury=m`O3!2fsZ#FZWznlxb@EIyH$5u8>QVJow>bt*U+{Py(c-i#Am^D zpAo(ubwD`i1UV=xIrmakk*~+y;P;%h0uesytj%@v$Eh)y+mWl9jUzFEcpX8U3=c~@ zF^!fs8(0p~^s>x_SLzeWkf5h0K}su{4ct;>C{&frCKCb`n{Mn( zeJl!&>O<6q$PTa7^X=W*1=fR&3x`)^aoVg+7?5t<-sJ$4(PIF2p zz8WMy4M)PShVUMzV)_o;b`fSWuk$<(scDr$D!@J!$mrVy`rvtUFS{$7rX8w#zCq>c)n($iXZOItuD1I915*2r z-a)DBz`=wetyVtwQToCV94>*K{pUcIc4dW_yH>=XzsgfnSM^err`)ATGTfJz$Mbw% z#v29I3lEqb>kadI@dczdE}MCXp;U-zK)OkYiep*uF>B#2OD~9 zbK7jfZBRe{U*PJ99pnk8QW}@}bLLV-axVRU@5xt->lvR*VJ807)=Z~ZAJ64y(I-ej z3xwrTC3x{asq4^_)s<5;_*9~T`~F5qopgLEk27#ig~2)bnOzE>?^t}+Xz9p;{x*;1 o#|yQ-PnyKZ?d`vQ{XBRsaA1 delta 3429 zcmdT`ZETZO6u$T6?ReX5UAL94?Z(y(I`)CkjmhWex~`P18*4iUnMv7ZGK?4)3zW!* z&^MXl2m7(U2f;sT;s?Zt5!ZyD+5}BZFb<>ggP16S zdhb2=+;i_e=Q;Pja+1p1KUskc#&72kX6C~As{Wo zmWY6#YAe7cWbqSlT^Q!C!v1*&bC2MFYaae#nGaknl1i~vgouZ&5H*-$GwAaXhip06gAr{fANi7ZRDK zQ)r}u34gap$DR>HH-=Pd!ll|uq8&`wLh3YfjHbm8!(MxgAA;-lpgKs*(FP$}91_Xp znu(kccE*W(Sd_VUapBPyl{42dw&*#Ll!9}6YsT-PwO}bk^MZO#&LB!_quH-v=Mn-7 zOgXRR5KYP3s>g3sHcFi-0W>c0O~>J-CH=MoC_RXHF-<*&Z5eJBZIp;4^AXxA5VgwO z4cDzPz8?14jw;orgsFQq7l~E3l{N?UjPjFmQ&?DB1xFu%l@}bmoBLdQL8P zWQ>~d*hEjdEoSCSn1XQ5UzgQ`LOX{8`2mSoPg$<%=pxwp872z@a89TLxn6{Q=5}f; z*t}uBpv788zG8{V{PTR8S1B50t8z4gQl3b?jubXX%do%opd2!Z;JyE70H8oan3>IW8JN@=r*F5&_n+nW4;#Y zm6=7Gb}o^}GLyD2BvA%YI%uANiK-GXRjs&JP+QbssBxr7(Ces8*Qs%Y$?|}nQ{*X0 zwK0fRbUz5QN?PG70k4#d;H$ZDw|K)?gFoS4Y0>eMuytJwUUwRxBVq^d+IvOxvXrT= zG;V@Tks|oLuE6m0p~g$UPM+|8_8Z8J$LJnto&glu{uC{O}B58<-q z@K%Q*zjfD+XkW+n!EMj%?(5#YZEygBUVq^%>9Qe62C5-e`XR_=vf{M^cUQtkUV9#S zLO`QbwtF7d@R#1B{5bI1ICxsKpeOJggj$BcR#pvH1E(O_?3v?C|J(B@9WsozZ{-i^ zEQ@)L@a%sI@=M)9>#oOV+%ng)HC?ps95gz+$)8Uzp;qX%>xSIU&EP66v(GlwGfrQC zm_3fNio2aK*(savpzE@s{SUN-4UfEqD-PPGRb@SWz-onFML!~kC1MfIAsO_53sJDH z1P<3);8vs*MuT|{(!c-NUC!0u4%k8I8$>HL(BEgl6gdNDoFTZ&m}3u%uof0%TUZ+FB_^sV9H@BVuGDumq?xE&*SA|CMco^&icv8L= '2020-11-01'] + +trainUS = US[US['Date'] < '2020-11-01 '] +testUS = US[US['Date'] >= '2020-11-01'] + +#自回归移动平均模型(ARIMA) +yCNARIMA = testCN.copy() +yUSARIMA = testUS.copy() + +#训练模型 +fitCNconfirmed = sm.tsa.statespace.SARIMAX(trainCN['Confirmed'],trend='c').fit() +fitCNrecovered = sm.tsa.statespace.SARIMAX(trainCN['Recovered'],trend='c').fit() +fitCNdeaths = sm.tsa.statespace.SARIMAX(trainCN['Deaths'],trend='ct').fit() + +fitUSconfirmed = sm.tsa.statespace.SARIMAX(trainUS['Confirmed'],trend='ct').fit() +fitUSrecovered = sm.tsa.statespace.SARIMAX(trainUS['Recovered'],trend='ct').fit() +fitUSdeaths = sm.tsa.statespace.SARIMAX(trainUS['Deaths'],trend='ct').fit() + +#测试 +yCNARIMA['SARIMAconfirmed'] = fitCNconfirmed.predict(start="2020-11-01", end="2020-12-09", dynamic=True) +yCNARIMA['SARIMArecovered'] = fitCNrecovered.predict(start="2020-11-01", end="2020-12-09", dynamic=True) +yCNARIMA['SARIMAdeaths'] = fitCNdeaths.predict(start="2020-11-01", end="2020-12-09", dynamic=True) + +yUSARIMA['SARIMAconfirmed'] = fitUSconfirmed.predict(start="2020-11-01", end="2020-12-09") +yUSARIMA['SARIMArecovered'] = fitUSrecovered.predict(start="2020-11-01", end="2020-12-09", dynamic=True) +yUSARIMA['SARIMAdeaths'] = fitUSdeaths.predict(start="2020-11-01", end="2020-12-09", dynamic=True) + +#预测将来七天 +forecastCNARIMA = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) +forecastUSARIMA = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) + +forecastCNARIMA['Date'] = pd.to_datetime(forecastCNARIMA['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') +forecastCNARIMA['confirmedPred'] = fitCNconfirmed.predict(start="2020-12-10", end="2020-12-16", dynamic=True) +forecastCNARIMA['recoveredPred'] = fitCNrecovered.predict(start="2020-12-10", end="2020-12-16", dynamic=True) +forecastCNARIMA['deathsPred'] = fitCNdeaths.predict(start="2020-12-10", end="2020-12-16", dynamic=True) + +forecastUSARIMA['Date'] = pd.to_datetime(forecastUSARIMA['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') +forecastUSARIMA['confirmedPred'] = fitUSconfirmed.predict(start="2020-12-10", end="2020-12-16", dynamic=True) +forecastUSARIMA['recoveredPred'] = fitUSrecovered.predict(start="2020-12-10", end="2020-12-16", dynamic=True) +forecastUSARIMA['deathsPred'] = fitUSdeaths.predict(start="2020-12-10", end="2020-12-16", dynamic=False) + +#RMSE +rmseCNARIMACon = pow(mean_squared_error(np.asarray(testCN['Confirmed']), np.asarray(yCNARIMA['SARIMAconfirmed'])),0.05) +rmseCNARIMARec = pow(mean_squared_error(np.asarray(testCN['Recovered']), np.asarray(yCNARIMA['SARIMArecovered'])),0.05) +rmseCNARIMADea = pow(mean_squared_error(np.asarray(testCN['Deaths']), np.asarray(yCNARIMA['SARIMAdeaths'])),0.5) + +rmseUSARIMACon = pow(mean_squared_error(np.asarray(testUS['Confirmed']), np.asarray(yUSARIMA['SARIMAconfirmed'])),0.05) +rmseUSARIMARec = pow(mean_squared_error(np.asarray(testUS['Recovered']), np.asarray(yUSARIMA['SARIMArecovered'])),0.05) +rmseUSARIMADea = pow(mean_squared_error(np.asarray(testUS['Deaths']), np.asarray(yUSARIMA['SARIMAdeaths'])),0.05) + +#可视化 +fig = plt.figure() +axCNARIMA = fig.add_subplot(211) +axCNARIMA.set_title("ARIMA (CN)",verticalalignment="bottom",fontsize="13") + +CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) +yCNARIMA.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) +forecastCNARIMA.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) + +axCNARIMA.plot(CN['Confirmed'],label="confirmed",linestyle=":") +axCNARIMA.plot(CN['Recovered'],label="recovered",linestyle=":") +axCNARIMA.plot(CN['Deaths'],label="deaths",linestyle=":") + +axCNARIMA.plot(yCNARIMA['SARIMAconfirmed'],label="confirmed test") +axCNARIMA.plot(yCNARIMA['SARIMArecovered'],label="recovered test") +axCNARIMA.plot(yCNARIMA['SARIMAdeaths'],label="deaths test") + +axCNARIMA.plot(forecastCNARIMA['confirmedPred'],label="confirmed prediction") +axCNARIMA.plot(forecastCNARIMA['recoveredPred'],label="recovered prediction") +axCNARIMA.plot(forecastCNARIMA['deathsPred'],label="deaths prediction") + +axUSARIMA = fig.add_subplot(212) +axUSARIMA.set_title("ARIMA (US)",verticalalignment="bottom",fontsize="13") + +US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) +yUSARIMA.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) +forecastUSARIMA.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) + +axUSARIMA.plot(US['Confirmed'],label="confirmed",linestyle=":") +axUSARIMA.plot(US['Recovered'],label="recovered",linestyle=":") +axUSARIMA.plot(US['Deaths'],label="deaths",linestyle=":") + +axUSARIMA.plot(yUSARIMA['SARIMAconfirmed'],label="confirmed test") +axUSARIMA.plot(yUSARIMA['SARIMArecovered'],label="recovered test") +axUSARIMA.plot(yUSARIMA['SARIMAdeaths'],label="deaths test") + +axUSARIMA.plot(forecastUSARIMA['confirmedPred'],label="confirmed prediction") +axUSARIMA.plot(forecastUSARIMA['recoveredPred'],label="recovered prediction") +axUSARIMA.plot(forecastUSARIMA['deathsPred'],label="deaths prediction") + +plt.tight_layout() +plt.gcf().autofmt_xdate() +plt.legend(labelspacing=0.05) +plt.show() \ No newline at end of file diff --git a/COVID-19/Prediction/Prediction/ARIMA.pyproj b/COVID-19/Prediction/Prediction/ARIMA.pyproj new file mode 100644 index 0000000..4c5722a --- /dev/null +++ b/COVID-19/Prediction/Prediction/ARIMA.pyproj @@ -0,0 +1,37 @@ + + + Debug + 2.0 + 3f8a3b3f-c79e-412c-8ee4-da8ff5db31a6 + + + + + + + . + . + ARIMA + ARIMA + + + true + false + + + true + false + + + + + + + + + + + + \ No newline at end of file diff --git a/COVID-19/Prediction/Prediction/Holt_Linear_Trend.py b/COVID-19/Prediction/Prediction/Holt_Linear_Trend.py deleted file mode 100644 index 85826ae..0000000 --- a/COVID-19/Prediction/Prediction/Holt_Linear_Trend.py +++ /dev/null @@ -1,122 +0,0 @@ -import pandas as pd -import datetime as DT -import numpy as np -import matplotlib.pyplot as plt -import matplotlib.dates as mdates -import math -import statsmodels.api as sm -from sklearn.metrics import mean_squared_error - -#打开数据文件 -dataset = pd.read_csv('E:\dase intro\COVID-19Analysis\COVID-19\covid-19-all.csv') - -#数据预处理 -def parse_ymd(s): - year_s, mon_s, day_s = s.split('-') - return datetime.datetime(int(year_s), int(mon_s), int(day_s)).strftime("%Y-%m-%d") -dataset = dataset.fillna(0) -dataset['Date'] = pd.to_datetime(dataset['Date']) -dataset = dataset[['Country/Region','Confirmed','Recovered','Deaths','Date']].groupby(['Country/Region','Date']).sum().reset_index() - -#取出中、美的数据 -CN = dataset[dataset['Country/Region'] == 'China'] -CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) -US = dataset[dataset['Country/Region'] == 'US'] -US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) - -#划分训练集、测试集 -trainCN = CN[CN['Date'] < '2020-11-01 '] -testCN = CN[CN['Date'] >= '2020-11-01'] - -trainUS = US[US['Date'] < '2020-11-01 '] -testUS = US[US['Date'] >= '2020-11-01'] - -#自回归移动平均模型(ARIMA) -yCNARIMA = testCN.copy() -yUSARIMA = testUS.copy() - -#训练模型 -fitCNconfirmed = sm.tsa.statespace.SARIMAX(trainCN.Confirmed).fit() -fitCNrecovered = sm.tsa.statespace.SARIMAX(trainCN['Recovered']).fit() -fitCNdeaths = sm.tsa.statespace.SARIMAX(trainCN['Deaths']).fit() - -fitUSconfirmed = sm.tsa.statespace.SARIMAX(trainUS.Confirmed,trend='ct').fit() -fitUSrecovered = sm.tsa.statespace.SARIMAX(trainUS['Recovered']).fit() -fitUSdeaths = sm.tsa.statespace.SARIMAX(trainUS['Deaths']).fit() - -#测试 -yCNARIMA['SARIMAconfirmed'] = fitCNconfirmed.predict(start="2020-11-01", end="2020-12-09", dynamic=True) -yCNARIMA['SARIMArecovered'] = fitCNrecovered.predict(start="2020-11-01", end="2020-12-09", dynamic=True) -yCNARIMA['SARIMAdeaths'] = fitCNdeaths.predict(start="2020-11-01", end="2020-12-09", dynamic=True) - -yUSARIMA['SARIMAconfirmed'] = fitUSconfirmed.predict(start="2020-11-01", end="2020-12-09") -yUSARIMA['SARIMArecovered'] = fitUSrecovered.predict(start="2020-11-01", end="2020-12-09", dynamic=True) -yUSARIMA['SARIMAdeaths'] = fitUSdeaths.predict(start="2020-11-01", end="2020-12-09", dynamic=True) - -#预测将来七天 -forecastCNARIMA = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) -forecastUSARIMA = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) - -forecastCNARIMA['Date'] = pd.to_datetime(forecastCNARIMA['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') -forecastCNARIMA['confirmedPred'] = fitCNconfirmed.predict(start="2020-12-10", end="2020-12-16", dynamic=True) -forecastCNARIMA['recoveredPred'] = fitCNrecovered.predict(start="2020-12-10", end="2020-12-16", dynamic=True) -forecastCNARIMA['deathsPred'] = fitCNdeaths.predict(start="2020-12-10", end="2020-12-16", dynamic=True) - -forecastUSARIMA['Date'] = pd.to_datetime(forecastUSARIMA['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') -forecastUSARIMA['confirmedPred'] = fitUSconfirmed.predict(start="2020-12-10", end="2020-12-16", dynamic=True) -forecastUSARIMA['recoveredPred'] = fitUSrecovered.predict(start="2020-12-10", end="2020-12-16", dynamic=True) -forecastUSARIMA['deathsPred'] = fitUSdeaths.predict(start="2020-12-10", end="2020-12-16", dynamic=True) - -#RMSE -rmseCNARIMACon = pow(mean_squared_error(np.asarray(testCN['Confirmed']), np.asarray(yCNARIMA['SARIMAconfirmed'])),0.05) -rmseCNARIMARec = pow(mean_squared_error(np.asarray(testCN['Recovered']), np.asarray(yCNARIMA['SARIMArecovered'])),0.05) -rmseCNARIMADea = pow(mean_squared_error(np.asarray(testCN['Deaths']), np.asarray(yCNARIMA['SARIMAdeaths'])),0.5) - -rmseUSARIMACon = pow(mean_squared_error(np.asarray(testUS['Confirmed']), np.asarray(yUSARIMA['SARIMAconfirmed'])),0.05) -rmseUSARIMARec = pow(mean_squared_error(np.asarray(testUS['Recovered']), np.asarray(yUSARIMA['SARIMArecovered'])),0.05) -rmseUSARIMADea = pow(mean_squared_error(np.asarray(testUS['Deaths']), np.asarray(yUSARIMA['SARIMAdeaths'])),0.05) - -#可视化 -fig = plt.figure() -axCNARIMA = fig.add_subplot(211) -axCNARIMA.set_title("ARIMA (CN)",verticalalignment="bottom",fontsize="13") - -CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) -yCNARIMA.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) -forecastCNARIMA.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) - -axCNARIMA.plot(CN['Confirmed'],label="confirmed",linestyle=":") -axCNARIMA.plot(CN['Recovered'],label="recovered",linestyle=":") -axCNARIMA.plot(CN['Deaths'],label="deaths",linestyle=":") - -axCNARIMA.plot(yCNARIMA['SARIMAconfirmed'],label="confirmed test") -axCNARIMA.plot(yCNARIMA['SARIMArecovered'],label="recovered test") -axCNARIMA.plot(yCNARIMA['SARIMAdeaths'],label="deaths test") - -axCNARIMA.plot(forecastCNARIMA['confirmedPred'],label="confirmed prediction") -axCNARIMA.plot(forecastCNARIMA['recoveredPred'],label="recovered prediction") -axCNARIMA.plot(forecastCNARIMA['deathsPred'],label="deaths prediction") - -axUSARIMA = fig.add_subplot(212) -axUSARIMA.set_title("ARIMA (US)",verticalalignment="bottom",fontsize="13") - -US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) -yUSARIMA.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) -forecastUSARIMA.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) - -axUSARIMA.plot(US['Confirmed'],label="confirmed",linestyle=":") -axUSARIMA.plot(US['Recovered'],label="recovered",linestyle=":") -axUSARIMA.plot(US['Deaths'],label="deaths",linestyle=":") - -axUSARIMA.plot(yUSARIMA['SARIMAconfirmed'],label="confirmed test") -axUSARIMA.plot(yUSARIMA['SARIMArecovered'],label="recovered test") -axUSARIMA.plot(yUSARIMA['SARIMAdeaths'],label="deaths test") - -axUSARIMA.plot(forecastUSARIMA['confirmedPred'],label="confirmed prediction") -axUSARIMA.plot(forecastUSARIMA['recoveredPred'],label="recovered prediction") -axUSARIMA.plot(forecastUSARIMA['deathsPred'],label="deaths prediction") - -plt.tight_layout() -plt.gcf().autofmt_xdate() -plt.legend(labelspacing=0.05) -plt.show() \ No newline at end of file diff --git a/COVID-19/Prediction/Prediction/Holt_Linear_Trend.pyproj b/COVID-19/Prediction/Prediction/Holt_Linear_Trend.pyproj deleted file mode 100644 index c05dbd0..0000000 --- a/COVID-19/Prediction/Prediction/Holt_Linear_Trend.pyproj +++ /dev/null @@ -1,37 +0,0 @@ - - - Debug - 2.0 - 3f8a3b3f-c79e-412c-8ee4-da8ff5db31a6 - - - - - - - . - . - Holt_Linear_Trend - ARIMA - - - true - false - - - true - false - - - - - - - - - - - - \ No newline at end of file