|
|
@ -1 +1,64 @@ |
|
|
|
import pandas as pd |
|
|
|
import datetime as DT |
|
|
|
import numpy as np |
|
|
|
import matplotlib.pyplot as plt |
|
|
|
import matplotlib.dates as mdates |
|
|
|
import math |
|
|
|
from statsmodels.tsa.api import SimpleExpSmoothing |
|
|
|
from sklearn.metrics import mean_squared_error |
|
|
|
|
|
|
|
#打开数据文件 |
|
|
|
dataset = pd.read_csv('E:\dase intro\COVID-19Analysis\COVID-19\covid-19-all.csv') |
|
|
|
|
|
|
|
#数据预处理 |
|
|
|
def parse_ymd(s): |
|
|
|
year_s, mon_s, day_s = s.split('-') |
|
|
|
return datetime.datetime(int(year_s), int(mon_s), int(day_s)).strftime("%Y-%m-%d") |
|
|
|
dataset = dataset.fillna(0) |
|
|
|
dataset['Date'] = pd.to_datetime(dataset['Date']) |
|
|
|
dataset = dataset[['Country/Region','Confirmed','Recovered','Deaths','Date']].groupby(['Country/Region','Date']).sum().reset_index() |
|
|
|
|
|
|
|
#取出中、美、俄的数据 |
|
|
|
CN = dataset[dataset['Country/Region'] == 'China'].reset_index() |
|
|
|
CN = CN.drop('index', 1) |
|
|
|
US = dataset[dataset['Country/Region'] == 'US'].reset_index() |
|
|
|
US = US.drop('index', 1) |
|
|
|
RUS = dataset[dataset['Country/Region'] == 'Russia'].reset_index() |
|
|
|
RUS = RUS.drop('index', 1) |
|
|
|
|
|
|
|
#中国 |
|
|
|
|
|
|
|
#划分训练集、测试集 |
|
|
|
trainCN = CN[CN['Date'] < '2020-11-01 '] |
|
|
|
testCN = CN[CN['Date'] >= '2020-11-01'] |
|
|
|
|
|
|
|
#简单指数法 |
|
|
|
yCNexp = testCN.copy() |
|
|
|
confirmedCNexp = SimpleExpSmoothing(np.asarray(trainCN['Confirmed'])).fit(smoothing_level=0.4, optimized=False) |
|
|
|
recoveredCNexp = SimpleExpSmoothing(np.asarray(trainCN['Recovered'])).fit(smoothing_level=0.4, optimized=False) |
|
|
|
deathsCNexp = SimpleExpSmoothing(np.asarray(trainCN['Deaths'])).fit(smoothing_level=0.4, optimized=False) |
|
|
|
yCNexp['confirmedTest'] = confirmedCNexp.forecast(len(testCN)) |
|
|
|
yCNexp['recoveredTest'] = recoveredCNexp.forecast(len(testCN)) |
|
|
|
yCNexp['deathsTest'] = deathsCNexp.forecast(len(testCN)) |
|
|
|
|
|
|
|
#RMSE |
|
|
|
rmseCNexpCon = pow(mean_squared_error(np.asarray(testCN['Confirmed']), np.asarray(yCNexp['confirmedTest'])),0.05) |
|
|
|
rmseCNexpRec = pow(mean_squared_error(np.asarray(testCN['Recovered']), np.asarray(yCNexp['recoveredTest'])),0.05) |
|
|
|
rmseCNexpDea = pow(mean_squared_error(np.asarray(testCN['Deaths']), np.asarray(yCNexp['deathsTest'])),0.5) |
|
|
|
|
|
|
|
#可视化 |
|
|
|
figCN = plt.figure() |
|
|
|
axCNexp = figCN.add_subplot(311) |
|
|
|
axCNexp.set_title("Simple Exponential Smoothing(CN)",verticalalignment="bottom",fontsize="13") |
|
|
|
CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) |
|
|
|
yCNexp.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) |
|
|
|
axCNexp.plot(CN['Confirmed'],label="confirmed",linestyle=":") |
|
|
|
axCNexp.plot(CN['Recovered'],label="recovered",linestyle=":") |
|
|
|
axCNexp.plot(CN['Deaths'],label="deaths",linestyle=":") |
|
|
|
axCNexp.plot(yCNexp['confirmedTest'],label="exp confirmed") |
|
|
|
axCNexp.plot(yCNexp['recoveredTest'],label="exp recovered") |
|
|
|
axCNexp.plot(yCNexp['deathsTest'],label="exp deaths") |
|
|
|
plt.tight_layout() |
|
|
|
plt.gcf().autofmt_xdate() |
|
|
|
plt.legend() |
|
|
|
plt.show() |