diff --git a/COVID-19/Prediction/.vs/Prediction/v16/.suo b/COVID-19/Prediction/.vs/Prediction/v16/.suo index 6866a34..2a77487 100644 Binary files a/COVID-19/Prediction/.vs/Prediction/v16/.suo and b/COVID-19/Prediction/.vs/Prediction/v16/.suo differ diff --git a/COVID-19/Prediction/Prediction/Prediction.py b/COVID-19/Prediction/Prediction/Prediction.py index 6957906..b48c9fd 100644 --- a/COVID-19/Prediction/Prediction/Prediction.py +++ b/COVID-19/Prediction/Prediction/Prediction.py @@ -4,7 +4,7 @@ import numpy as np import matplotlib.pyplot as plt import matplotlib.dates as mdates import math -from statsmodels.tsa.api import SimpleExpSmoothing +from statsmodels.tsa.api import ExponentialSmoothing from sklearn.metrics import mean_squared_error #打开数据文件 @@ -19,46 +19,99 @@ dataset['Date'] = pd.to_datetime(dataset['Date']) dataset = dataset[['Country/Region','Confirmed','Recovered','Deaths','Date']].groupby(['Country/Region','Date']).sum().reset_index() #取出中、美、俄的数据 -CN = dataset[dataset['Country/Region'] == 'China'].reset_index() -CN = CN.drop('index', 1) -US = dataset[dataset['Country/Region'] == 'US'].reset_index() -US = US.drop('index', 1) -RUS = dataset[dataset['Country/Region'] == 'Russia'].reset_index() -RUS = RUS.drop('index', 1) - -#中国 +CN = dataset[dataset['Country/Region'] == 'China'] +CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) +US = dataset[dataset['Country/Region'] == 'US'] +US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) #划分训练集、测试集 trainCN = CN[CN['Date'] < '2020-11-01 '] -testCN = CN[CN['Date'] >= '2020-11-01'] +testCN = CN[CN['Date'] >= '2020-11-01'] + +trainUS = US[US['Date'] < '2020-11-01 '] +testUS = US[US['Date'] >= '2020-11-01'] #简单指数法 yCNexp = testCN.copy() -confirmedCNexp = SimpleExpSmoothing(np.asarray(trainCN['Confirmed'])).fit(smoothing_level=0.4, optimized=False) -recoveredCNexp = SimpleExpSmoothing(np.asarray(trainCN['Recovered'])).fit(smoothing_level=0.4, optimized=False) -deathsCNexp = SimpleExpSmoothing(np.asarray(trainCN['Deaths'])).fit(smoothing_level=0.4, optimized=False) +yUSexp = testUS.copy() +#训练模型 +confirmedCNexp = ExponentialSmoothing(np.asarray(trainCN['Confirmed']), trend='add', seasonal=None).fit() +recoveredCNexp = ExponentialSmoothing(np.asarray(trainCN['Recovered']), trend='add', seasonal=None).fit() +deathsCNexp = ExponentialSmoothing(np.asarray(trainCN['Deaths']), trend='add', seasonal=None).fit() + +confirmedUSexp = ExponentialSmoothing(np.asarray(trainUS['Confirmed']), trend='add', seasonal=None).fit() +recoveredUSexp = ExponentialSmoothing(np.asarray(trainUS['Recovered']), trend='add', seasonal=None).fit() +deathsUSexp = ExponentialSmoothing(np.asarray(trainUS['Deaths']), trend='add', seasonal=None).fit() +#测试 yCNexp['confirmedTest'] = confirmedCNexp.forecast(len(testCN)) yCNexp['recoveredTest'] = recoveredCNexp.forecast(len(testCN)) yCNexp['deathsTest'] = deathsCNexp.forecast(len(testCN)) +yUSexp['confirmedTest'] = confirmedUSexp.forecast(len(testUS)) +yUSexp['recoveredTest'] = recoveredUSexp.forecast(len(testUS)) +yUSexp['deathsTest'] = deathsUSexp.forecast(len(testUS)) +#预测将来七天 +forecastCNexp = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) +forecastUSexp = pd.DataFrame({'Date':['2020-12-10','2020-12-11','2020-12-12','2020-12-13','2020-12-14','2020-12-15','2020-12-16']}) + +forecastCNexp['Date'] = pd.to_datetime(forecastCNexp['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') +forecastCNexp['confirmedPred'] = confirmedCNexp.forecast(len(forecastCNexp)) +forecastCNexp['recoveredPred'] = recoveredCNexp.forecast(len(forecastCNexp)) +forecastCNexp['deathsPred'] = deathsCNexp.forecast(len(forecastCNexp)) + +forecastUSexp['Date'] = pd.to_datetime(forecastUSexp['Date'], format='%Y/%m/%d').values.astype('datetime64[h]') +forecastUSexp['confirmedPred'] = confirmedUSexp.forecast(len(forecastUSexp)) +forecastUSexp['recoveredPred'] = recoveredUSexp.forecast(len(forecastUSexp)) +forecastUSexp['deathsPred'] = deathsUSexp.forecast(len(forecastUSexp)) #RMSE rmseCNexpCon = pow(mean_squared_error(np.asarray(testCN['Confirmed']), np.asarray(yCNexp['confirmedTest'])),0.05) rmseCNexpRec = pow(mean_squared_error(np.asarray(testCN['Recovered']), np.asarray(yCNexp['recoveredTest'])),0.05) rmseCNexpDea = pow(mean_squared_error(np.asarray(testCN['Deaths']), np.asarray(yCNexp['deathsTest'])),0.5) +rmseUSexpCon = pow(mean_squared_error(np.asarray(testUS['Confirmed']), np.asarray(yUSexp['confirmedTest'])),0.05) +rmseUSexpRec = pow(mean_squared_error(np.asarray(testUS['Recovered']), np.asarray(yUSexp['recoveredTest'])),0.05) +rmseUSexpDea = pow(mean_squared_error(np.asarray(testUS['Deaths']), np.asarray(yUSexp['deathsTest'])),0.05) #可视化 -figCN = plt.figure() -axCNexp = figCN.add_subplot(311) -axCNexp.set_title("Simple Exponential Smoothing(CN)",verticalalignment="bottom",fontsize="13") +fig = plt.figure() +axCNexp = fig.add_subplot(211) +axCNexp.set_title("Holt-Winters (CN)",verticalalignment="bottom",fontsize="13") + CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) yCNexp.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) +forecastCNexp.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) + axCNexp.plot(CN['Confirmed'],label="confirmed",linestyle=":") axCNexp.plot(CN['Recovered'],label="recovered",linestyle=":") axCNexp.plot(CN['Deaths'],label="deaths",linestyle=":") -axCNexp.plot(yCNexp['confirmedTest'],label="exp confirmed") -axCNexp.plot(yCNexp['recoveredTest'],label="exp recovered") -axCNexp.plot(yCNexp['deathsTest'],label="exp deaths") + +axCNexp.plot(yCNexp['confirmedTest'],label="confirmed test") +axCNexp.plot(yCNexp['recoveredTest'],label="recovered test") +axCNexp.plot(yCNexp['deathsTest'],label="deaths test") + +axCNexp.plot(forecastCNexp['confirmedPred'],label="confirmed prediction") +axCNexp.plot(forecastCNexp['recoveredPred'],label="recovered prediction") +axCNexp.plot(forecastCNexp['deathsPred'],label="deaths prediction") + +axUSexp = fig.add_subplot(212) +axUSexp.set_title("Holt-Winters (US)",verticalalignment="bottom",fontsize="13") + +US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) +yUSexp.index = pd.Index(pd.date_range('2020-11-01','2020-12-09',freq = '1D')) +forecastUSexp.index = pd.Index(pd.date_range('2020-12-10','2020-12-16',freq = '1D')) + +axUSexp.plot(US['Confirmed'],label="confirmed",linestyle=":") +axUSexp.plot(US['Recovered'],label="recovered",linestyle=":") +axUSexp.plot(US['Deaths'],label="deaths",linestyle=":") + +axUSexp.plot(yUSexp['confirmedTest'],label="confirmed test") +axUSexp.plot(yUSexp['recoveredTest'],label="recovered test") +axUSexp.plot(yUSexp['deathsTest'],label="deaths test") + +axUSexp.plot(forecastUSexp['confirmedPred'],label="confirmed prediction") +axUSexp.plot(forecastUSexp['recoveredPred'],label="recovered prediction") +axUSexp.plot(forecastUSexp['deathsPred'],label="deaths prediction") + plt.tight_layout() plt.gcf().autofmt_xdate() -plt.legend() +plt.legend(labelspacing=0.05) plt.show() \ No newline at end of file