The final project of DASE Introduction, the analysis of COVID-19
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

193 lines
9.5 KiB

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import datetime
import math
from scipy import interpolate
from pyecharts.charts import Map
from pyecharts import options
plt.rcParams['axes.unicode_minus'] = False
#打开数据文件
dataset = pd.read_csv('E:\DaseIntro\COVID-19Analysis\COVID-19\covid-19-all.csv')
#数据预处理
def parse_ymd(s):
year_s, mon_s, day_s = s.split('-')
return datetime.datetime(int(year_s), int(mon_s), int(day_s)).strftime("%Y-%m-%d")
dataset = dataset.fillna(0)
dataset['Date'] = pd.to_datetime(dataset['Date'])
#对China\US数据进行可视化操作
China = dataset[dataset['Country/Region'] == 'China'].reset_index(drop=True)
US = dataset[dataset['Country/Region'] == 'US'].reset_index(drop=True)
#计算中\美每日治愈率/确诊人数/治愈人数(分月份显示)
dfChinaJan = China[China['Date'].dt.month == 1]
dfChinaFeb = China[China['Date'].dt.month == 2]
dfChinaMar = China[China['Date'].dt.month == 3]
dfChinaApr = China[China['Date'].dt.month == 4]
dfChinaMay = China[China['Date'].dt.month == 5]
dfChinaJun = China[China['Date'].dt.month == 6]
dfChinaJul = China[China['Date'].dt.month == 7]
dfChinaAug = China[China['Date'].dt.month == 8]
dfChinaSept = China[China['Date'].dt.month == 9]
dfChinaOct = China[China['Date'].dt.month == 10]
dfChinaNov = China[China['Date'].dt.month == 11]
dfChinaDec = China[China['Date'].dt.month == 12]
dfUSJan = US[US['Date'].dt.month == 1]
dfUSFeb = US[US['Date'].dt.month == 2]
dfUSMar = US[US['Date'].dt.month == 3]
dfUSApr = US[US['Date'].dt.month == 4]
dfUSMay = US[US['Date'].dt.month == 5]
dfUSJun = US[US['Date'].dt.month == 6]
dfUSJul = US[US['Date'].dt.month == 7]
dfUSAug = US[US['Date'].dt.month == 8]
dfUSSept = US[US['Date'].dt.month == 9]
dfUSOct = US[US['Date'].dt.month == 10]
dfUSNov = US[US['Date'].dt.month == 11]
dfUSDec = US[US['Date'].dt.month == 12]
confirmedChina = [dfChinaJan['Confirmed'].sum(),dfChinaFeb['Confirmed'].sum(),dfChinaMar['Confirmed'].sum(),dfChinaApr['Confirmed'].sum(),
dfChinaMay['Confirmed'].sum(),dfChinaJun['Confirmed'].sum(),dfChinaJul['Confirmed'].sum(),dfChinaAug['Confirmed'].sum(),
dfChinaSept['Confirmed'].sum(),dfChinaOct['Confirmed'].sum(),dfChinaNov['Confirmed'].sum(),dfChinaDec['Confirmed'].sum()]
recoveredChina = [dfChinaJan['Recovered'].sum(),dfChinaFeb['Recovered'].sum(),dfChinaMar['Recovered'].sum(),dfChinaApr['Recovered'].sum(),
dfChinaMay['Recovered'].sum(),dfChinaJun['Recovered'].sum(),dfChinaJul['Recovered'].sum(),dfChinaAug['Recovered'].sum(),
dfChinaSept['Recovered'].sum(),dfChinaOct['Recovered'].sum(),dfChinaNov['Recovered'].sum(),dfChinaDec['Recovered'].sum()]
recoverPossChina = []
confirmedUS = [dfUSJan['Confirmed'].sum(),dfUSFeb['Confirmed'].sum(),dfUSMar['Confirmed'].sum(),dfUSApr['Confirmed'].sum(),
dfUSMay['Confirmed'].sum(),dfUSJun['Confirmed'].sum(),dfUSJul['Confirmed'].sum(),dfUSAug['Confirmed'].sum(),
dfUSSept['Confirmed'].sum(),dfUSOct['Confirmed'].sum(),dfUSNov['Confirmed'].sum(),dfUSDec['Confirmed'].sum()]
recoveredUS = [dfUSJan['Recovered'].sum(),dfUSFeb['Recovered'].sum(),dfUSMar['Recovered'].sum(),dfUSApr['Recovered'].sum(),
dfUSMay['Recovered'].sum(),dfUSJun['Recovered'].sum(),dfUSJul['Recovered'].sum(),dfUSAug['Recovered'].sum(),
dfUSSept['Recovered'].sum(),dfUSOct['Recovered'].sum(),dfUSNov['Recovered'].sum(),dfUSDec['Recovered'].sum()]
recoverPossUS = []
for i in range(len(confirmedChina)):
recoverPossChina.append(recoveredChina[i] / confirmedChina[i])
recoverPossUS.append(recoveredUS[i] / confirmedUS[i])
#x轴为日期(月份)
date = pd.date_range(start='2020-01', periods=12, freq="m").strftime('%m')
fig = plt.figure()
#中国
ax1 = fig.add_subplot(221)
ax1.set_title("Recovered/Confirmed/Recovery Possibility(month) of China",verticalalignment="bottom",fontsize="13")
data1 = ax1.plot(date,confirmedChina,color="red",linewidth="1.7",label="confirmed")
data2 = ax1.plot(date,recoveredChina,color="lime",linewidth="1.7",label="recovered")
ax1.set_ylabel('recovered/confirmed')
ax2 = ax1.twinx()
ax2.set_ylabel('recovery possibility')
data3 = ax2.plot(date,recoverPossChina,color="blue",linestyle="--",linewidth="1.0",label="recovery possibility")
datasChina = data1 + data2 + data3
labsChina = [d.get_label() for d in datasChina]
ax1.legend(datasChina, labsChina, loc="lower right")
#美国
ax3 = fig.add_subplot(222)
ax3.set_title("Recovered/Confirmed/Recovery Possibility(month) of US",verticalalignment="bottom",fontsize="13")
data3 = ax3.plot(date,confirmedUS,color="red",linewidth="1.7",label="confirmed")
data4 = ax3.plot(date,recoveredUS,color="lime",linewidth="1.7",label="recovered")
ax3.set_ylabel('recovered/confirmed')
ax4 = ax3.twinx()
ax4.set_ylabel('recovery possibility')
data5 = ax4.plot(date,recoverPossUS,color="blue",linestyle="--",linewidth="1.0",label="recovery possibility")
datasUS = data3 + data4 + data5
labsUS = [d.get_label() for d in datasUS]
ax3.legend(datasUS, labsUS, loc="upper left")
#中国各省数据
dataChina = pd.melt(China[['Date','Province/State','Confirmed','Recovered','Deaths']],
id_vars=['Date','Province/State'],value_vars=['Confirmed','Recovered','Deaths'],
var_name='group_var',value_name='Cases')
dataChina['Date'] = pd.to_datetime(dataChina['Date'])
dfChina = China[['Province/State','Confirmed','Recovered','Deaths']].groupby(['Province/State']).sum().reset_index()
dataChina = pd.melt(dfChina,id_vars=['Province/State'],
value_vars=['Confirmed','Deaths','Recovered'],
var_name='group_var',value_name='Cases')
dataChina = dataChina.sort_values(by=['Province/State','group_var']).reset_index(drop=True)
dataChina = dataChina.pivot_table(index=['Province/State'], columns='group_var')
dataChina.columns = dataChina.columns.droplevel().rename(None)
#各国数据
dataWorld = pd.melt(dataset[['Date','Country/Region','Confirmed','Recovered','Deaths']],
id_vars=['Date','Country/Region'],value_vars=['Confirmed','Recovered','Deaths'],
var_name='group_var',value_name='Cases')
dataWorld['Date'] = pd.to_datetime(dataWorld['Date'])
dfWorld = dataset[['Country/Region','Confirmed','Recovered','Deaths']].groupby(['Country/Region']).sum().reset_index()
dataWorld = pd.melt(dfWorld,id_vars=['Country/Region'],
value_vars=['Confirmed','Deaths','Recovered'],
var_name='group_var',value_name='Cases')
dataWorld = dataWorld.sort_values(by=['Country/Region','group_var']).reset_index(drop=True)
dataWorld = dataWorld.pivot_table(index=['Country/Region'], columns='group_var')
dataWorld.columns = dataWorld.columns.droplevel().rename(None)
#中国省份(bottom 12)
dataChina.sort_values('Confirmed', inplace=True)
xData = []
yConfirmed = []
yRecovered = []
for i in range(12):
xData.append(dataChina.index[i])
yConfirmed.append(dataChina['Confirmed'][i])
yRecovered.append(dataChina['Recovered'][i])
ax5 = fig.add_subplot(212)
ax5.set_title("Total Recovered/Confirmed of China(the bottom ten)",verticalalignment="bottom",fontsize="13")
barWidth = 0.25
r1 = np.arange(12)
r2 = [x + barWidth for x in r1]
ax5.bar(r1, yConfirmed, color='#FF0088', width=barWidth, edgecolor='white', label='Confirmed')
ax5.bar(r2, yRecovered, color='#00BBFF', width=barWidth, edgecolor='white', label='Recovered')
plt.xticks([r + barWidth for r in range(len(yConfirmed))], xData)
ax5.legend()
plt.show()
#中国疫情图
ChinaConfirmed = []
ChinaProvince = []
worldConfirmed = []
provinceNameMap = {'Tibet':'西藏', 'Qinghai':'青海', 'Macau':'澳门', 'Ningxia':'宁夏', 'Jilin':'吉林', 'Guizhou':'贵州',
'Gansu':'甘肃', 'Hainan':'海南', 'Shanxi':'山西', 'Yunnan':'云南', 'Liaoning':'辽宁', 'Tianjin':'天津',
'Inner Mongolia':'内蒙古', 'Guangxi':'广西', 'Shaanxi':'陕西', 'Hebei':'河北', 'Fujian':'福建', 'Xinjiang':'新疆',
'Chongqing':'重庆', 'Sichuan':'四川', 'Jiangsu':'江苏', 'Beijing':'北京', 'Shanghai':'上海', 'Shandong':'山东',
'Heilongjiang':'黑龙江', 'Jiangxi':'江西', 'Anhui':'安徽', 'Hunan':'湖南', 'Zhejiang':'浙江', 'Henan':'河南',
'Guangdong':'广东', 'Hong Kong':'香港', 'Hubei':'湖北'}
for i in range(33):
ChinaConfirmed.append(dataChina['Confirmed'][i])
ChinaProvince.append(provinceNameMap[dataChina.index[i]])
map_China = Map()
map_China.set_global_opts(title_opts=options.TitleOpts(title="中国(大陆)疫情图-总确诊人数"),
legend_opts=options.LegendOpts(is_show=False),
visualmap_opts=options.VisualMapOpts(max_=200000))
map_China.add("",[list(z) for z in zip(ChinaProvince, ChinaConfirmed)], maptype='china')
map_China.render("China.html")
#世界疫情图
worldConfirmed = []
worldName = []
for i in range(dataWorld.shape[0]):
worldConfirmed.append(dataWorld['Confirmed'][i])
if dataWorld.index[i] == 'US':
worldName.append('United States')
else:
worldName.append(dataWorld.index[i])
map_world = Map()
map_world.set_global_opts(title_opts=options.TitleOpts(title="世界疫情图-总确诊人数"),
legend_opts=options.LegendOpts(is_show=True),
visualmap_opts=options.VisualMapOpts(min_=10000,max_=80000000))
map_world.add("",[list(z) for z in zip(worldName, worldConfirmed)], maptype='world')
map_world.render("world.html")