From 8a472cdf8fd937079bee7c34eb226f2b72da28cc Mon Sep 17 00:00:00 2001 From: 10195501441 <10195501441@stu.ecnu.edu.cn> Date: Tue, 29 Dec 2020 14:34:41 +0800 Subject: [PATCH] ARIMA Prediction --- COVID-19/Prediction/.vs/Prediction/v16/.suo | Bin 26624 -> 34304 bytes COVID-19/Prediction/Prediction.sln | 6 +- .../Prediction/Prediction/Holt_Linear_Trend.py | 122 +++++++++++++++++++++ .../Prediction/Prediction/Holt_Linear_Trend.pyproj | 37 +++++++ COVID-19/Prediction/Prediction/Holt_Winters.py | 117 ++++++++++++++++++++ COVID-19/Prediction/Prediction/Holt_Winters.pyproj | 36 ++++++ COVID-19/Prediction/Prediction/Prediction.py | 117 -------------------- COVID-19/Prediction/Prediction/Prediction.pyproj | 35 ------ 8 files changed, 317 insertions(+), 153 deletions(-) create mode 100644 COVID-19/Prediction/Prediction/Holt_Linear_Trend.py create mode 100644 COVID-19/Prediction/Prediction/Holt_Linear_Trend.pyproj create mode 100644 COVID-19/Prediction/Prediction/Holt_Winters.py create mode 100644 COVID-19/Prediction/Prediction/Holt_Winters.pyproj delete mode 100644 COVID-19/Prediction/Prediction/Prediction.py delete mode 100644 COVID-19/Prediction/Prediction/Prediction.pyproj diff --git a/COVID-19/Prediction/.vs/Prediction/v16/.suo b/COVID-19/Prediction/.vs/Prediction/v16/.suo index 2a774873f43b8458176b52f196531c2c209b55b2..3be7b6d49fffb7df4c32f24a6c55f72c3c24e555 100644 GIT binary patch literal 34304 zcmeHQdvIJ;8NVql^Z^uTDNu@pQfgaz!@jcFZE2fLc9)hmv@~f-CDLxPn>Ouc6L-@` zOhV;Vu*yrIpra@b;y>sN=;)|3qN9#8%1m(_QHu;HjISBgamEM3*!cUMb5Hi}%|7mK zHYq9hWWGK3o_p^3&f`1Z>zs4*%E^o0eCCD4zt=2by>^avdUTF6ZcI-S}UXPd!onukIO>cQ9@+;;RwY0+s--1Y85S z5O5WMHpD0Hmo^fa?LP0d9cGqvCo?njiO900Mv@;6{MTyAIbO(>>eKXu4PV zJ}B{2&i~rOcdY09Uxfa4=07{(%+G&C=W|dO#RtrD`(mV@4`3R3%xu6y0N-1Hcs}3) z<9;sUO9AAqmm6t|5qkmG8utoczIYk(TnErg%IdzC$-*~7Kg4I*OWe-(7K?PW-ybBhou$h zzvK(5)Kd6M*#pa=|FRb-cNnX3i^b*O52IHOLF=S!$rn}|?WbI<1ph|p_;JzqDQNyl zj7AirkrXLud;MQiW8kg?e-p6vX$Q1kP(T{7UAss1#S)|cl}@12i^X^1;TuMVHsH+w z?-uk!1{Bw-?L&Rixaty?Q7Hkl^hGN#jh>I8-=nBU689O) zOlU@}XdfrUZ?;xzjP_rHvh3nF(_a|nvZcL7$vc5Rh0*WR4xMZytYV{yX!3qVu0NTBY;9nE$CAa?h_d z6qJ4B3M;^W=$ldY9n(sMyOH4)Bhzi*d`WP+7`Wg8{O$!G+zL-zF`j2GpZuS$Ecz7e z^51^s-2-4qf%g*UfV?gH_k4r@m(%}by|j-R?DRkVTZIDqpSV{bgWdLj5_wM<1(Fw) z0^0S=Lq7m*efsQZ_g@4c?Og&`0H6*)J&a>P-yz#byPQ4%`uphjqpy#?#j63e0Lsm! z0Md`L|LF@<{y%fw=>Id{^V)5sc@a}bV_*3J^f@a3AAJI}|LF%*?{C0$BOnB50yG1{ zfCwN8xCyWkunBN8U^8F~papOXK$UYVuD1hP0d0UCfEYkM--YXs0B#5D0elp22Vgfq z<>|zA7a$Hu0J;GO0Q&$vfc*gGO(O0EqyT+@G~i)El`m^lw4`q)EH@=?9uUQqP&y09zU~kgMah3ku4`XT5{|Q^UEy zxnURoI^>;U6x58?QvznRcC@JlNrj#|WpeNwDGe&D|v<&n4AZU0IWemnBVhx~{JQ`T$y4m&??B!^7zLXB4K1Qyr4CLS zXvR3R$NQjl+=O1|xc5W*i3x9oqB;z*V2^P<{gKfVg^^JQcBL3fmPX+g~` zXd(N1JJQJWDf@0k4LBpI8)U}3WMk-|q0FeEt}T1tUjLire-p6N@}mAD=XHE+rWe!u zR1K8>wH*8|yxd|GV4ZhL`(bU8HY`EtpJQ#eOMf>2x6(hX{%*zPNEg<+(Jtkvm;>am z&_8L{*u`ImyoUN|@iMky6y%&Ee(r}!W3EyAE~Ep~|3_Y0PXC+7pVy^eF3M*f&KdF= z>e`O|C;M%>+kb7KtbR}-c?^BcEf|-8UHYJYiJ@^Uq9w?ZBvbdnf4q zh~S86;O!B0$b&K`iS`%ib2-57{M(AWYX0304V#>wQZ5baycXO~>KWV-C!a8lUiy6V zkoaB8+Bmg-u9D@B1nu8)$A1>^Qn+Uk(2+>F#1w&@TO%`L9*S zmim8r{$J972Sce?axl}A8B6Hc zmQKWbx(0gsQYA0z`Ui)HTt?6tTYl8DkzvN%n!{Z}lj}h1XwyvQ9JXRQg7s+Mlwzbmu~%RmK#vyZQ{(l2TDi{p-!xkP zt67RsUSiCAL?wfH{jZ$$7iaxveAhbbKjquM)D7*}-{$ovrSUoIKlIIH1O1O`?V+@^ zm+zVNe`o!Nn%;hBe)7CSCF?)*y<7{h+yB*G|DivPdqC{sH;=zn{yYBPvi-j~8aIZa zWT>|Pm;OJs23!(3GSK%+V5q|SkMMtU4`3zq=dAw}KL4-wUr*)wkMu{E<^M06{vH2+ zRsH{E_rK%+7pzbGe9-;><2|Z&Jfc<~N0(S1RH+(#Ri*q6)xp~X|7-yM75a0y+r7B2 zqdGX{KClC5F?|H|0ZyJjSoNs#T9Gd&r?j<)pciv=)4h*bb=^4IFxH9fqC37+z25xW=;%aN7g!greK8O(gWN0Fft{;z$Lwyg zcY*r{=o#J(ou4~9xXY(*>>c@L_Sf$E9pA8<*KR(}Uyh5j{x|vSf7W^Ltp5~SZLUN& zdVjC~R3-Iodu2N5f6&S1YJYq@bozQ^CdO--x!G92u8|v0z5VW! z(eMB1B<`rg3hi~Kwvgv>th;Aw@+Z#?;iS|i?B5DOi#6R`p4EG$H&cG!CxcuCA1%$k z!!Hr#YekJ_BU2o$V*?3ic56}T2FWc&{pfHmfU(BU)|4_=CUY~C-5T=~3#{!%7fz;_ z|MaoD;#cN(crAi7h{_*7ey(x?Tf?|-wgr}sJlWU^9|UbQ>YzN8BZo%Q*5Pa13~R+V zE{3ohK7uQv_2J5oDg-bKg82G{j+7=`d2toQlYrQ}7QwF%X)fdlL(GjU;eJURjoB1$it$6%Uyf(HKA3NPwZ7y#I zzr9g+=0E#+t#j8^2K^r0*BJnT)yIRn#~*O{{P9@aa*|2DNtNB(2UAXPxmbmYIVK&FZO=bkKR>RO}%^(3@?Q%k5r{2G=L zb+&VFOdU)n`Hf`;uB8<_O)reB)lh*OV*XB-p78o$rFjAYSZVQYy*?4^_6B{4gx4Re zbGbae`i5ayZua+bVP^0<+cNy)CGHTVEDoPlM#sW@_Hb(j!-K=J zcoGTS9f$?-~gfB2+E=7Itq`*f_#!tc9PLk8$e+>izC0tVZ$8Q;Y@VWniDTCFB zcCnJ}hELuF|2$9Yb>DL55U&_M4_`jNa#M|OmW&^(X)^v1bcA{2obeaC1;&4orrO`E z$u868?yiw~AL(-;fI08TDTkZ_Bd2eOour`)qQ_H!nK= zmfrT>Wk0&oeLUt``P*ajp4FB0Gp(Ee_dCuBRBt{P^Z(}a)|H#ze5$}1 z|LH&e*>77FS~bW=HTm-@4#0W;^)pD*`D>RyRn?jNs+w}O`twSjl**2mbzs0$!+46GJxrSZX#?PNu^`$fZa!X+TUMiGYTo$P>=32iwXC432-<-?E zW9H|DcD|}{hulrR`2Dg7LzI*eLSBLKVRdoB6v;MQ`@kJNDb@E%8 nFMeae8`nRIhE*uZC+mBGYJD~YJwJP`MuJAqQiJ$QVywgR>&%C{7#6z~-V5sIKrKt3jFhg0h+C^D5`yMX9;M6*BC zETbn;W6)*G{@9jfk>L`vWl5K8$(Ah{Y|FCEeW3rjE!jU{f(vx#y!M46L@`^oKen6v zde1%Q_Pq0P&$(B*z$>HdrNWgw7am5A6Od$xnP4zTTpf%_=3(~`^Yis=38eF>hNhM8 z=9+)}hOI(zU35f zVj04OKvpmg=NJUZX&8`B49hg09@k6Bra`x0O2tJQA{~)|$b>;DbI>ip4JnbCLzbsS zSg!aNW%*Gz>e4I^nGy_&$eYH6W;{8f$!I1=XeOsHj$Ax~_L6ht2+5Mdvjt~zgm^Q9 z&E|(YlAt*Pf+a0p3lIehY?dxPeMCb(OqXcOq8zai zQGuvLI1#iLgkV$ejw4z*p5TMk8B-snpts8kRiIFi|xOrdISZ8~9TCSsqNL>=2S8zQS_B zSK1GO0s+4@m0laesCi{a>;R=$fS!!H7)EC-D^fC1{BD znlp1KMiMnZN)(KDCPfu=_3q-z@Y+ab7lAn{jg(>^T9`v&H zRR{AQaizZ|Tl`mFznycmyd=hZ`}q5JZufoK@H^k<96wh!bhB$82g5=FTsR`bmwj%u zGS5E{&o~%Ua+7{|=Y`8-*|T<1FJVwsY=&_@QGC$F*m<1AI&Fw6Ew+`cJGJG&vy1iK zpXC+pXigFIX7#ZGn5^h$Yrt1|e&Mi~)v%oUSHeO`I6tuGtp3&oll`>2?p~+x^UL>& zMz8+1YWTaGZ&y!pZ{Gd;lHlJ|vv4`gsj>-cliKg8{InXGTN~mb6n#E2wIQ1KJG~6s zhBG*~L4;~yq^Cw~x@HrePOIOpz;(NG(58pR z`l3+X6enWI&*HjW9{Hpp5DikndI;1HvCFV`dq1pKQlbk{BMu1e&VY?B6W;nwTsPN- z*JKCh8L50?oABsAp>+{|Qm<_)GSyrLFS^#~KMpNa`>(-=9hc&|@GHl;BgDZS%V4B+ z7j(Pxutu0GjVv*EIUm=hDyDrS%KkDQ>OJL2vpf-tH9^(<0estD3cq&7!pcKiph1p> zeqTPc9yYKAB}@k;9qxr?27Oa^+rbWxx8wLR><%#Iu|tKtTo*xurw3b!#|h_pQcd&* z2@Pw2-WV0|Ue8W+!`)sM=lX(PVH}65$e`F+)u8msn1%VR4BrE4L%@IW7(j$oRY5=2 zKsa0K|PYbOa`*pGXX F{sc^3FLeL_ diff --git a/COVID-19/Prediction/Prediction.sln b/COVID-19/Prediction/Prediction.sln index 67423c5..2298e0b 100644 --- a/COVID-19/Prediction/Prediction.sln +++ b/COVID-19/Prediction/Prediction.sln @@ -3,7 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 16 VisualStudioVersion = 16.0.30804.86 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "Prediction", "Prediction\Prediction.pyproj", "{FE4194E6-8A95-4B93-968D-ED4305D7F944}" +Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "Holt_Winters", "Prediction\Holt_Winters.pyproj", "{FE4194E6-8A95-4B93-968D-ED4305D7F944}" +EndProject +Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "Holt_Linear_Trend", "Prediction\Holt_Linear_Trend.pyproj", "{3F8A3B3F-C79E-412C-8EE4-DA8FF5DB31A6}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -13,6 +15,8 @@ Global GlobalSection(ProjectConfigurationPlatforms) = postSolution {FE4194E6-8A95-4B93-968D-ED4305D7F944}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {FE4194E6-8A95-4B93-968D-ED4305D7F944}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3F8A3B3F-C79E-412C-8EE4-DA8FF5DB31A6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3F8A3B3F-C79E-412C-8EE4-DA8FF5DB31A6}.Release|Any CPU.ActiveCfg = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/COVID-19/Prediction/Prediction/Holt_Linear_Trend.py b/COVID-19/Prediction/Prediction/Holt_Linear_Trend.py new file mode 100644 index 0000000..85826ae --- /dev/null +++ b/COVID-19/Prediction/Prediction/Holt_Linear_Trend.py @@ -0,0 +1,122 @@ +import pandas as pd +import datetime as DT +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.dates as mdates +import math +import statsmodels.api as sm +from sklearn.metrics import mean_squared_error + +#打开数据文件 +dataset = pd.read_csv('E:\dase intro\COVID-19Analysis\COVID-19\covid-19-all.csv') + +#数据预处理 +def parse_ymd(s): + year_s, mon_s, day_s = s.split('-') + return datetime.datetime(int(year_s), int(mon_s), int(day_s)).strftime("%Y-%m-%d") +dataset = dataset.fillna(0) +dataset['Date'] = pd.to_datetime(dataset['Date']) +dataset = dataset[['Country/Region','Confirmed','Recovered','Deaths','Date']].groupby(['Country/Region','Date']).sum().reset_index() + +#取出中、美的数据 +CN = dataset[dataset['Country/Region'] == 'China'] +CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) +US = dataset[dataset['Country/Region'] == 'US'] +US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) + +#划分训练集、测试集 +trainCN = CN[CN['Date'] < '2020-11-01 '] +testCN = CN[CN['Date'] >= '2020-11-01'] + +trainUS = US[US['Date'] < '2020-11-01 '] +testUS = US[US['Date'] >= '2020-11-01'] + +#自回归移动平均模型(ARIMA) +yCNARIMA = testCN.copy() +yUSARIMA = testUS.copy() + +#训练模型 +fitCNconfirmed = sm.tsa.statespace.SARIMAX(trainCN.Confirmed).fit() +fitCNrecovered = sm.tsa.statespace.SARIMAX(trainCN['Recovered']).fit() +fitCNdeaths = sm.tsa.statespace.SARIMAX(trainCN['Deaths']).fit() + +fitUSconfirmed = sm.tsa.statespace.SARIMAX(trainUS.Confirmed,trend='ct').fit() +fitUSrecovered = sm.tsa.statespace.SARIMAX(trainUS['Recovered']).fit() +fitUSdeaths = sm.tsa.statespace.SARIMAX(trainUS['Deaths']).fit() + +#测试 +yCNARIMA['SARIMAconfirmed'] = fitCNconfirmed.predict(start="2020-11-01", end="2020-12-09", dynamic=True) +yCNARIMA['SARIMArecovered'] = fitCNrecovered.predict(start="2020-11-01", end="2020-12-09", dynamic=True) +yCNARIMA['SARIMAdeaths'] = fitCNdeaths.predict(start="2020-11-01", end="2020-12-09", dynamic=True) + +yUSARIMA['SARIMAconfirmed'] = fitUSconfirmed.predict(start="2020-11-01", end="2020-12-09") +yUSARIMA['SARIMArecovered'] = fitUSrecovered.predict(start="2020-11-01", end="2020-12-09", dynamic=True) +yUSARIMA['SARIMAdeaths'] = fitUSdeaths.predict(start="2020-11-01", end="2020-12-09", dynamic=True) + +#预测将来七天 +forecastCNARIMA = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) +forecastUSARIMA = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) + +forecastCNARIMA['Date'] = pd.to_datetime(forecastCNARIMA['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') +forecastCNARIMA['confirmedPred'] = fitCNconfirmed.predict(start="2020-12-10", end="2020-12-16", dynamic=True) +forecastCNARIMA['recoveredPred'] = fitCNrecovered.predict(start="2020-12-10", end="2020-12-16", dynamic=True) +forecastCNARIMA['deathsPred'] = fitCNdeaths.predict(start="2020-12-10", end="2020-12-16", dynamic=True) + +forecastUSARIMA['Date'] = pd.to_datetime(forecastUSARIMA['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') +forecastUSARIMA['confirmedPred'] = fitUSconfirmed.predict(start="2020-12-10", end="2020-12-16", dynamic=True) +forecastUSARIMA['recoveredPred'] = fitUSrecovered.predict(start="2020-12-10", end="2020-12-16", dynamic=True) +forecastUSARIMA['deathsPred'] = fitUSdeaths.predict(start="2020-12-10", end="2020-12-16", dynamic=True) + +#RMSE +rmseCNARIMACon = pow(mean_squared_error(np.asarray(testCN['Confirmed']), np.asarray(yCNARIMA['SARIMAconfirmed'])),0.05) +rmseCNARIMARec = pow(mean_squared_error(np.asarray(testCN['Recovered']), np.asarray(yCNARIMA['SARIMArecovered'])),0.05) +rmseCNARIMADea = pow(mean_squared_error(np.asarray(testCN['Deaths']), np.asarray(yCNARIMA['SARIMAdeaths'])),0.5) + +rmseUSARIMACon = pow(mean_squared_error(np.asarray(testUS['Confirmed']), np.asarray(yUSARIMA['SARIMAconfirmed'])),0.05) +rmseUSARIMARec = pow(mean_squared_error(np.asarray(testUS['Recovered']), np.asarray(yUSARIMA['SARIMArecovered'])),0.05) +rmseUSARIMADea = pow(mean_squared_error(np.asarray(testUS['Deaths']), np.asarray(yUSARIMA['SARIMAdeaths'])),0.05) + +#可视化 +fig = plt.figure() +axCNARIMA = fig.add_subplot(211) +axCNARIMA.set_title("ARIMA (CN)",verticalalignment="bottom",fontsize="13") + +CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) +yCNARIMA.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) +forecastCNARIMA.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) + +axCNARIMA.plot(CN['Confirmed'],label="confirmed",linestyle=":") +axCNARIMA.plot(CN['Recovered'],label="recovered",linestyle=":") +axCNARIMA.plot(CN['Deaths'],label="deaths",linestyle=":") + +axCNARIMA.plot(yCNARIMA['SARIMAconfirmed'],label="confirmed test") +axCNARIMA.plot(yCNARIMA['SARIMArecovered'],label="recovered test") +axCNARIMA.plot(yCNARIMA['SARIMAdeaths'],label="deaths test") + +axCNARIMA.plot(forecastCNARIMA['confirmedPred'],label="confirmed prediction") +axCNARIMA.plot(forecastCNARIMA['recoveredPred'],label="recovered prediction") +axCNARIMA.plot(forecastCNARIMA['deathsPred'],label="deaths prediction") + +axUSARIMA = fig.add_subplot(212) +axUSARIMA.set_title("ARIMA (US)",verticalalignment="bottom",fontsize="13") + +US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) +yUSARIMA.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) +forecastUSARIMA.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) + +axUSARIMA.plot(US['Confirmed'],label="confirmed",linestyle=":") +axUSARIMA.plot(US['Recovered'],label="recovered",linestyle=":") +axUSARIMA.plot(US['Deaths'],label="deaths",linestyle=":") + +axUSARIMA.plot(yUSARIMA['SARIMAconfirmed'],label="confirmed test") +axUSARIMA.plot(yUSARIMA['SARIMArecovered'],label="recovered test") +axUSARIMA.plot(yUSARIMA['SARIMAdeaths'],label="deaths test") + +axUSARIMA.plot(forecastUSARIMA['confirmedPred'],label="confirmed prediction") +axUSARIMA.plot(forecastUSARIMA['recoveredPred'],label="recovered prediction") +axUSARIMA.plot(forecastUSARIMA['deathsPred'],label="deaths prediction") + +plt.tight_layout() +plt.gcf().autofmt_xdate() +plt.legend(labelspacing=0.05) +plt.show() \ No newline at end of file diff --git a/COVID-19/Prediction/Prediction/Holt_Linear_Trend.pyproj b/COVID-19/Prediction/Prediction/Holt_Linear_Trend.pyproj new file mode 100644 index 0000000..c05dbd0 --- /dev/null +++ b/COVID-19/Prediction/Prediction/Holt_Linear_Trend.pyproj @@ -0,0 +1,37 @@ + + + Debug + 2.0 + 3f8a3b3f-c79e-412c-8ee4-da8ff5db31a6 + + + + + + + . + . + Holt_Linear_Trend + ARIMA + + + true + false + + + true + false + + + + + + + + + + + + \ No newline at end of file diff --git a/COVID-19/Prediction/Prediction/Holt_Winters.py b/COVID-19/Prediction/Prediction/Holt_Winters.py new file mode 100644 index 0000000..404db3c --- /dev/null +++ b/COVID-19/Prediction/Prediction/Holt_Winters.py @@ -0,0 +1,117 @@ +import pandas as pd +import datetime as DT +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.dates as mdates +import math +from statsmodels.tsa.api import ExponentialSmoothing +from sklearn.metrics import mean_squared_error + +#打开数据文件 +dataset = pd.read_csv('E:\dase intro\COVID-19Analysis\COVID-19\covid-19-all.csv') + +#数据预处理 +def parse_ymd(s): + year_s, mon_s, day_s = s.split('-') + return datetime.datetime(int(year_s), int(mon_s), int(day_s)).strftime("%Y-%m-%d") +dataset = dataset.fillna(0) +dataset['Date'] = pd.to_datetime(dataset['Date']) +dataset = dataset[['Country/Region','Confirmed','Recovered','Deaths','Date']].groupby(['Country/Region','Date']).sum().reset_index() + +#取出中、美的数据 +CN = dataset[dataset['Country/Region'] == 'China'] +CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) +US = dataset[dataset['Country/Region'] == 'US'] +US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) + +#划分训练集、测试集 +trainCN = CN[CN['Date'] < '2020-11-01 '] +testCN = CN[CN['Date'] >= '2020-11-01'] + +trainUS = US[US['Date'] < '2020-11-01 '] +testUS = US[US['Date'] >= '2020-11-01'] + +#简单指数法 +yCNexp = testCN.copy() +yUSexp = testUS.copy() +#训练模型 +confirmedCNexp = ExponentialSmoothing(np.asarray(trainCN['Confirmed']), trend='add', seasonal=None).fit() +recoveredCNexp = ExponentialSmoothing(np.asarray(trainCN['Recovered']), trend='add', seasonal=None).fit() +deathsCNexp = ExponentialSmoothing(np.asarray(trainCN['Deaths']), trend='add', seasonal=None).fit() + +confirmedUSexp = ExponentialSmoothing(np.asarray(trainUS['Confirmed']), trend='add', seasonal=None).fit() +recoveredUSexp = ExponentialSmoothing(np.asarray(trainUS['Recovered']), trend='add', seasonal=None).fit() +deathsUSexp = ExponentialSmoothing(np.asarray(trainUS['Deaths']), trend='add', seasonal=None).fit() +#测试 +yCNexp['confirmedTest'] = confirmedCNexp.forecast(len(testCN)) +yCNexp['recoveredTest'] = recoveredCNexp.forecast(len(testCN)) +yCNexp['deathsTest'] = deathsCNexp.forecast(len(testCN)) + +yUSexp['confirmedTest'] = confirmedUSexp.forecast(len(testUS)) +yUSexp['recoveredTest'] = recoveredUSexp.forecast(len(testUS)) +yUSexp['deathsTest'] = deathsUSexp.forecast(len(testUS)) +#预测将来七天 +forecastCNexp = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) +forecastUSexp = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) + +forecastCNexp['Date'] = pd.to_datetime(forecastCNexp['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') +forecastCNexp['confirmedPred'] = confirmedCNexp.forecast(len(forecastCNexp)) +forecastCNexp['recoveredPred'] = recoveredCNexp.forecast(len(forecastCNexp)) +forecastCNexp['deathsPred'] = deathsCNexp.forecast(len(forecastCNexp)) + +forecastUSexp['Date'] = pd.to_datetime(forecastUSexp['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') +forecastUSexp['confirmedPred'] = confirmedUSexp.forecast(len(forecastUSexp)) +forecastUSexp['recoveredPred'] = recoveredUSexp.forecast(len(forecastUSexp)) +forecastUSexp['deathsPred'] = deathsUSexp.forecast(len(forecastUSexp)) +#RMSE +rmseCNexpCon = pow(mean_squared_error(np.asarray(testCN['Confirmed']), np.asarray(yCNexp['confirmedTest'])),0.05) +rmseCNexpRec = pow(mean_squared_error(np.asarray(testCN['Recovered']), np.asarray(yCNexp['recoveredTest'])),0.05) +rmseCNexpDea = pow(mean_squared_error(np.asarray(testCN['Deaths']), np.asarray(yCNexp['deathsTest'])),0.5) + +rmseUSexpCon = pow(mean_squared_error(np.asarray(testUS['Confirmed']), np.asarray(yUSexp['confirmedTest'])),0.05) +rmseUSexpRec = pow(mean_squared_error(np.asarray(testUS['Recovered']), np.asarray(yUSexp['recoveredTest'])),0.05) +rmseUSexpDea = pow(mean_squared_error(np.asarray(testUS['Deaths']), np.asarray(yUSexp['deathsTest'])),0.05) +#可视化 +fig = plt.figure() +axCNexp = fig.add_subplot(211) +axCNexp.set_title("Holt-Winters (CN)",verticalalignment="bottom",fontsize="13") + +CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) +yCNexp.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) +forecastCNexp.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) + +axCNexp.plot(CN['Confirmed'],label="confirmed",linestyle=":") +axCNexp.plot(CN['Recovered'],label="recovered",linestyle=":") +axCNexp.plot(CN['Deaths'],label="deaths",linestyle=":") + +axCNexp.plot(yCNexp['confirmedTest'],label="confirmed test") +axCNexp.plot(yCNexp['recoveredTest'],label="recovered test") +axCNexp.plot(yCNexp['deathsTest'],label="deaths test") + +axCNexp.plot(forecastCNexp['confirmedPred'],label="confirmed prediction") +axCNexp.plot(forecastCNexp['recoveredPred'],label="recovered prediction") +axCNexp.plot(forecastCNexp['deathsPred'],label="deaths prediction") + +axUSexp = fig.add_subplot(212) +axUSexp.set_title("Holt-Winters (US)",verticalalignment="bottom",fontsize="13") + +US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) +yUSexp.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) +forecastUSexp.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) + +axUSexp.plot(US['Confirmed'],label="confirmed",linestyle=":") +axUSexp.plot(US['Recovered'],label="recovered",linestyle=":") +axUSexp.plot(US['Deaths'],label="deaths",linestyle=":") + +axUSexp.plot(yUSexp['confirmedTest'],label="confirmed test") +axUSexp.plot(yUSexp['recoveredTest'],label="recovered test") +axUSexp.plot(yUSexp['deathsTest'],label="deaths test") + +axUSexp.plot(forecastUSexp['confirmedPred'],label="confirmed prediction") +axUSexp.plot(forecastUSexp['recoveredPred'],label="recovered prediction") +axUSexp.plot(forecastUSexp['deathsPred'],label="deaths prediction") + +plt.tight_layout() +plt.gcf().autofmt_xdate() +plt.legend(labelspacing=0.05) +plt.show() \ No newline at end of file diff --git a/COVID-19/Prediction/Prediction/Holt_Winters.pyproj b/COVID-19/Prediction/Prediction/Holt_Winters.pyproj new file mode 100644 index 0000000..85c7ea9 --- /dev/null +++ b/COVID-19/Prediction/Prediction/Holt_Winters.pyproj @@ -0,0 +1,36 @@ + + + Debug + 2.0 + fe4194e6-8a95-4b93-968d-ed4305d7f944 + + + Holt_Winters.py + + + . + . + Holt_Winters + Prediction + + + true + false + + + true + false + + + + + + + + + + + + \ No newline at end of file diff --git a/COVID-19/Prediction/Prediction/Prediction.py b/COVID-19/Prediction/Prediction/Prediction.py deleted file mode 100644 index b48c9fd..0000000 --- a/COVID-19/Prediction/Prediction/Prediction.py +++ /dev/null @@ -1,117 +0,0 @@ -import pandas as pd -import datetime as DT -import numpy as np -import matplotlib.pyplot as plt -import matplotlib.dates as mdates -import math -from statsmodels.tsa.api import ExponentialSmoothing -from sklearn.metrics import mean_squared_error - -#打开数据文件 -dataset = pd.read_csv('E:\dase intro\COVID-19Analysis\COVID-19\covid-19-all.csv') - -#数据预处理 -def parse_ymd(s): - year_s, mon_s, day_s = s.split('-') - return datetime.datetime(int(year_s), int(mon_s), int(day_s)).strftime("%Y-%m-%d") -dataset = dataset.fillna(0) -dataset['Date'] = pd.to_datetime(dataset['Date']) -dataset = dataset[['Country/Region','Confirmed','Recovered','Deaths','Date']].groupby(['Country/Region','Date']).sum().reset_index() - -#取出中、美、俄的数据 -CN = dataset[dataset['Country/Region'] == 'China'] -CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) -US = dataset[dataset['Country/Region'] == 'US'] -US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) - -#划分训练集、测试集 -trainCN = CN[CN['Date'] < '2020-11-01 '] -testCN = CN[CN['Date'] >= '2020-11-01'] - -trainUS = US[US['Date'] < '2020-11-01 '] -testUS = US[US['Date'] >= '2020-11-01'] - -#简单指数法 -yCNexp = testCN.copy() -yUSexp = testUS.copy() -#训练模型 -confirmedCNexp = ExponentialSmoothing(np.asarray(trainCN['Confirmed']), trend='add', seasonal=None).fit() -recoveredCNexp = ExponentialSmoothing(np.asarray(trainCN['Recovered']), trend='add', seasonal=None).fit() -deathsCNexp = ExponentialSmoothing(np.asarray(trainCN['Deaths']), trend='add', seasonal=None).fit() - -confirmedUSexp = ExponentialSmoothing(np.asarray(trainUS['Confirmed']), trend='add', seasonal=None).fit() -recoveredUSexp = ExponentialSmoothing(np.asarray(trainUS['Recovered']), trend='add', seasonal=None).fit() -deathsUSexp = ExponentialSmoothing(np.asarray(trainUS['Deaths']), trend='add', seasonal=None).fit() -#测试 -yCNexp['confirmedTest'] = confirmedCNexp.forecast(len(testCN)) -yCNexp['recoveredTest'] = recoveredCNexp.forecast(len(testCN)) -yCNexp['deathsTest'] = deathsCNexp.forecast(len(testCN)) - -yUSexp['confirmedTest'] = confirmedUSexp.forecast(len(testUS)) -yUSexp['recoveredTest'] = recoveredUSexp.forecast(len(testUS)) -yUSexp['deathsTest'] = deathsUSexp.forecast(len(testUS)) -#预测将来七天 -forecastCNexp = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) -forecastUSexp = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) - -forecastCNexp['Date'] = pd.to_datetime(forecastCNexp['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') -forecastCNexp['confirmedPred'] = confirmedCNexp.forecast(len(forecastCNexp)) -forecastCNexp['recoveredPred'] = recoveredCNexp.forecast(len(forecastCNexp)) -forecastCNexp['deathsPred'] = deathsCNexp.forecast(len(forecastCNexp)) - -forecastUSexp['Date'] = pd.to_datetime(forecastUSexp['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') -forecastUSexp['confirmedPred'] = confirmedUSexp.forecast(len(forecastUSexp)) -forecastUSexp['recoveredPred'] = recoveredUSexp.forecast(len(forecastUSexp)) -forecastUSexp['deathsPred'] = deathsUSexp.forecast(len(forecastUSexp)) -#RMSE -rmseCNexpCon = pow(mean_squared_error(np.asarray(testCN['Confirmed']), np.asarray(yCNexp['confirmedTest'])),0.05) -rmseCNexpRec = pow(mean_squared_error(np.asarray(testCN['Recovered']), np.asarray(yCNexp['recoveredTest'])),0.05) -rmseCNexpDea = pow(mean_squared_error(np.asarray(testCN['Deaths']), np.asarray(yCNexp['deathsTest'])),0.5) - -rmseUSexpCon = pow(mean_squared_error(np.asarray(testUS['Confirmed']), np.asarray(yUSexp['confirmedTest'])),0.05) -rmseUSexpRec = pow(mean_squared_error(np.asarray(testUS['Recovered']), np.asarray(yUSexp['recoveredTest'])),0.05) -rmseUSexpDea = pow(mean_squared_error(np.asarray(testUS['Deaths']), np.asarray(yUSexp['deathsTest'])),0.05) -#可视化 -fig = plt.figure() -axCNexp = fig.add_subplot(211) -axCNexp.set_title("Holt-Winters (CN)",verticalalignment="bottom",fontsize="13") - -CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) -yCNexp.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) -forecastCNexp.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) - -axCNexp.plot(CN['Confirmed'],label="confirmed",linestyle=":") -axCNexp.plot(CN['Recovered'],label="recovered",linestyle=":") -axCNexp.plot(CN['Deaths'],label="deaths",linestyle=":") - -axCNexp.plot(yCNexp['confirmedTest'],label="confirmed test") -axCNexp.plot(yCNexp['recoveredTest'],label="recovered test") -axCNexp.plot(yCNexp['deathsTest'],label="deaths test") - -axCNexp.plot(forecastCNexp['confirmedPred'],label="confirmed prediction") -axCNexp.plot(forecastCNexp['recoveredPred'],label="recovered prediction") -axCNexp.plot(forecastCNexp['deathsPred'],label="deaths prediction") - -axUSexp = fig.add_subplot(212) -axUSexp.set_title("Holt-Winters (US)",verticalalignment="bottom",fontsize="13") - -US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) -yUSexp.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) -forecastUSexp.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) - -axUSexp.plot(US['Confirmed'],label="confirmed",linestyle=":") -axUSexp.plot(US['Recovered'],label="recovered",linestyle=":") -axUSexp.plot(US['Deaths'],label="deaths",linestyle=":") - -axUSexp.plot(yUSexp['confirmedTest'],label="confirmed test") -axUSexp.plot(yUSexp['recoveredTest'],label="recovered test") -axUSexp.plot(yUSexp['deathsTest'],label="deaths test") - -axUSexp.plot(forecastUSexp['confirmedPred'],label="confirmed prediction") -axUSexp.plot(forecastUSexp['recoveredPred'],label="recovered prediction") -axUSexp.plot(forecastUSexp['deathsPred'],label="deaths prediction") - -plt.tight_layout() -plt.gcf().autofmt_xdate() -plt.legend(labelspacing=0.05) -plt.show() \ No newline at end of file diff --git a/COVID-19/Prediction/Prediction/Prediction.pyproj b/COVID-19/Prediction/Prediction/Prediction.pyproj deleted file mode 100644 index 6e425c5..0000000 --- a/COVID-19/Prediction/Prediction/Prediction.pyproj +++ /dev/null @@ -1,35 +0,0 @@ - - - Debug - 2.0 - fe4194e6-8a95-4b93-968d-ed4305d7f944 - . - Prediction.py - - - . - . - Prediction - Prediction - - - true - false - - - true - false - - - - - - - - - - - - \ No newline at end of file