|
@ -0,0 +1,38 @@ |
|
|
|
|
|
import pandas as pd |
|
|
|
|
|
import numpy as np |
|
|
|
|
|
import matplotlib.pyplot as plt |
|
|
|
|
|
import statsmodels.api as sm |
|
|
|
|
|
from statsmodels.graphics.tsaplots import acf,pacf,plot_acf,plot_pacf |
|
|
|
|
|
from statsmodels.tsa.arima_model import ARMA |
|
|
|
|
|
from statsmodels.tsa.stattools import adfuller |
|
|
|
|
|
|
|
|
|
|
|
#打开数据文件 |
|
|
|
|
|
dataset = pd.read_csv('E:\DaseIntro\COVID-19Analysis\COVID-19\covid-19-all.csv') |
|
|
|
|
|
|
|
|
|
|
|
#数据预处理 |
|
|
|
|
|
def parse_ymd(s): |
|
|
|
|
|
year_s, mon_s, day_s = s.split('-') |
|
|
|
|
|
return datetime.datetime(int(year_s), int(mon_s), int(day_s)).strftime("%Y-%m-%d") |
|
|
|
|
|
dataset = dataset.fillna(0) |
|
|
|
|
|
dataset['Date'] = pd.to_datetime(dataset['Date']) |
|
|
|
|
|
dataset = dataset[['Country/Region','Confirmed','Recovered','Deaths','Date']].groupby(['Country/Region','Date']).sum().reset_index() |
|
|
|
|
|
|
|
|
|
|
|
#取出中、美的数据 |
|
|
|
|
|
CN = dataset[dataset['Country/Region'] == 'China'] |
|
|
|
|
|
CN.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) |
|
|
|
|
|
US = dataset[dataset['Country/Region'] == 'US'] |
|
|
|
|
|
US.index = pd.Index(pd.date_range('2020-01-22','2020-12-09',freq = '1D')) |
|
|
|
|
|
|
|
|
|
|
|
#检验Confirmed |
|
|
|
|
|
CNconfirmedSeries = pd.DataFrame(CN['Confirmed']) |
|
|
|
|
|
CNconfirmedSeries['Confirmed'] = CNconfirmedSeries['Confirmed'] - CNconfirmedSeries['Confirmed'].shift(1) |
|
|
|
|
|
CNconfirmedSeries.plot(figsize=(8,6)) |
|
|
|
|
|
|
|
|
|
|
|
CNrecoveredSeries = pd.DataFrame(CN['Recovered']) |
|
|
|
|
|
CNrecoveredSeries['Recovered'] = CNrecoveredSeries['Recovered'] - CNrecoveredSeries['Recovered'].shift(1) |
|
|
|
|
|
CNrecoveredSeries.plot(figsize=(8,6)) |
|
|
|
|
|
|
|
|
|
|
|
CNdeathsSeries = pd.DataFrame(CN['Deaths']) |
|
|
|
|
|
CNdeathsSeries['Deaths'] = CNdeathsSeries['Deaths'] - CNdeathsSeries['Deaths'].shift(1) |
|
|
|
|
|
CNdeathsSeries.plot(figsize=(8,6)) |
|
|
|
|
|
plt.show() |