|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.dates as mdates
|
|
import numpy as np
|
|
import datetime
|
|
import math
|
|
from scipy import interpolate
|
|
from pyecharts.charts import Map
|
|
from pyecharts import options
|
|
|
|
plt.rcParams['axes.unicode_minus'] = False
|
|
|
|
#打开数据文件
|
|
dataset = pd.read_csv('E:\DaseIntro\COVID-19Analysis\COVID-19\covid-19-all.csv')
|
|
|
|
#数据预处理
|
|
def parse_ymd(s):
|
|
year_s, mon_s, day_s = s.split('-')
|
|
return datetime.datetime(int(year_s), int(mon_s), int(day_s)).strftime("%Y-%m-%d")
|
|
|
|
dataset = dataset.fillna(0)
|
|
|
|
dataset['Date'] = pd.to_datetime(dataset['Date'])
|
|
|
|
#对China\US数据进行可视化操作
|
|
China = dataset[dataset['Country/Region'] == 'China'].reset_index(drop=True)
|
|
US = dataset[dataset['Country/Region'] == 'US'].reset_index(drop=True)
|
|
|
|
#计算中\美每日治愈率/确诊人数/治愈人数(分月份显示)
|
|
dfChinaJan = China[China['Date'].dt.month == 1]
|
|
dfChinaFeb = China[China['Date'].dt.month == 2]
|
|
dfChinaMar = China[China['Date'].dt.month == 3]
|
|
dfChinaApr = China[China['Date'].dt.month == 4]
|
|
dfChinaMay = China[China['Date'].dt.month == 5]
|
|
dfChinaJun = China[China['Date'].dt.month == 6]
|
|
dfChinaJul = China[China['Date'].dt.month == 7]
|
|
dfChinaAug = China[China['Date'].dt.month == 8]
|
|
dfChinaSept = China[China['Date'].dt.month == 9]
|
|
dfChinaOct = China[China['Date'].dt.month == 10]
|
|
dfChinaNov = China[China['Date'].dt.month == 11]
|
|
dfChinaDec = China[China['Date'].dt.month == 12]
|
|
dfUSJan = US[US['Date'].dt.month == 1]
|
|
dfUSFeb = US[US['Date'].dt.month == 2]
|
|
dfUSMar = US[US['Date'].dt.month == 3]
|
|
dfUSApr = US[US['Date'].dt.month == 4]
|
|
dfUSMay = US[US['Date'].dt.month == 5]
|
|
dfUSJun = US[US['Date'].dt.month == 6]
|
|
dfUSJul = US[US['Date'].dt.month == 7]
|
|
dfUSAug = US[US['Date'].dt.month == 8]
|
|
dfUSSept = US[US['Date'].dt.month == 9]
|
|
dfUSOct = US[US['Date'].dt.month == 10]
|
|
dfUSNov = US[US['Date'].dt.month == 11]
|
|
dfUSDec = US[US['Date'].dt.month == 12]
|
|
|
|
confirmedChina = [dfChinaJan['Confirmed'].sum(),dfChinaFeb['Confirmed'].sum(),dfChinaMar['Confirmed'].sum(),dfChinaApr['Confirmed'].sum(),
|
|
dfChinaMay['Confirmed'].sum(),dfChinaJun['Confirmed'].sum(),dfChinaJul['Confirmed'].sum(),dfChinaAug['Confirmed'].sum(),
|
|
dfChinaSept['Confirmed'].sum(),dfChinaOct['Confirmed'].sum(),dfChinaNov['Confirmed'].sum(),dfChinaDec['Confirmed'].sum()]
|
|
recoveredChina = [dfChinaJan['Recovered'].sum(),dfChinaFeb['Recovered'].sum(),dfChinaMar['Recovered'].sum(),dfChinaApr['Recovered'].sum(),
|
|
dfChinaMay['Recovered'].sum(),dfChinaJun['Recovered'].sum(),dfChinaJul['Recovered'].sum(),dfChinaAug['Recovered'].sum(),
|
|
dfChinaSept['Recovered'].sum(),dfChinaOct['Recovered'].sum(),dfChinaNov['Recovered'].sum(),dfChinaDec['Recovered'].sum()]
|
|
recoverPossChina = []
|
|
confirmedUS = [dfUSJan['Confirmed'].sum(),dfUSFeb['Confirmed'].sum(),dfUSMar['Confirmed'].sum(),dfUSApr['Confirmed'].sum(),
|
|
dfUSMay['Confirmed'].sum(),dfUSJun['Confirmed'].sum(),dfUSJul['Confirmed'].sum(),dfUSAug['Confirmed'].sum(),
|
|
dfUSSept['Confirmed'].sum(),dfUSOct['Confirmed'].sum(),dfUSNov['Confirmed'].sum(),dfUSDec['Confirmed'].sum()]
|
|
recoveredUS = [dfUSJan['Recovered'].sum(),dfUSFeb['Recovered'].sum(),dfUSMar['Recovered'].sum(),dfUSApr['Recovered'].sum(),
|
|
dfUSMay['Recovered'].sum(),dfUSJun['Recovered'].sum(),dfUSJul['Recovered'].sum(),dfUSAug['Recovered'].sum(),
|
|
dfUSSept['Recovered'].sum(),dfUSOct['Recovered'].sum(),dfUSNov['Recovered'].sum(),dfUSDec['Recovered'].sum()]
|
|
recoverPossUS = []
|
|
|
|
for i in range(len(confirmedChina)):
|
|
recoverPossChina.append(recoveredChina[i] / confirmedChina[i])
|
|
recoverPossUS.append(recoveredUS[i] / confirmedUS[i])
|
|
|
|
#x轴为日期(月份)
|
|
date = pd.date_range(start='2020-01', periods=12, freq="m").strftime('%m')
|
|
|
|
fig = plt.figure()
|
|
|
|
#中国
|
|
ax1 = fig.add_subplot(221)
|
|
ax1.set_title("Recovered/Confirmed/Recovery Possibility(month) of China",verticalalignment="bottom",fontsize="13")
|
|
data1 = ax1.plot(date,confirmedChina,color="red",linewidth="1.7",label="confirmed")
|
|
data2 = ax1.plot(date,recoveredChina,color="lime",linewidth="1.7",label="recovered")
|
|
ax1.set_ylabel('recovered/confirmed')
|
|
ax2 = ax1.twinx()
|
|
ax2.set_ylabel('recovery possibility')
|
|
data3 = ax2.plot(date,recoverPossChina,color="blue",linestyle="--",linewidth="1.0",label="recovery possibility")
|
|
datasChina = data1 + data2 + data3
|
|
labsChina = [d.get_label() for d in datasChina]
|
|
ax1.legend(datasChina, labsChina, loc="lower right")
|
|
|
|
#美国
|
|
ax3 = fig.add_subplot(222)
|
|
ax3.set_title("Recovered/Confirmed/Recovery Possibility(month) of US",verticalalignment="bottom",fontsize="13")
|
|
data3 = ax3.plot(date,confirmedUS,color="red",linewidth="1.7",label="confirmed")
|
|
data4 = ax3.plot(date,recoveredUS,color="lime",linewidth="1.7",label="recovered")
|
|
ax3.set_ylabel('recovered/confirmed')
|
|
ax4 = ax3.twinx()
|
|
ax4.set_ylabel('recovery possibility')
|
|
data5 = ax4.plot(date,recoverPossUS,color="blue",linestyle="--",linewidth="1.0",label="recovery possibility")
|
|
datasUS = data3 + data4 + data5
|
|
labsUS = [d.get_label() for d in datasUS]
|
|
ax3.legend(datasUS, labsUS, loc="upper left")
|
|
|
|
#中国各省数据
|
|
dataChina = pd.melt(China[['Date','Province/State','Confirmed','Recovered','Deaths']],
|
|
id_vars=['Date','Province/State'],value_vars=['Confirmed','Recovered','Deaths'],
|
|
var_name='group_var',value_name='Cases')
|
|
dataChina['Date'] = pd.to_datetime(dataChina['Date'])
|
|
dfChina = China[['Province/State','Confirmed','Recovered','Deaths']].groupby(['Province/State']).sum().reset_index()
|
|
dataChina = pd.melt(dfChina,id_vars=['Province/State'],
|
|
value_vars=['Confirmed','Deaths','Recovered'],
|
|
var_name='group_var',value_name='Cases')
|
|
dataChina = dataChina.sort_values(by=['Province/State','group_var']).reset_index(drop=True)
|
|
dataChina = dataChina.pivot_table(index=['Province/State'], columns='group_var')
|
|
dataChina.columns = dataChina.columns.droplevel().rename(None)
|
|
|
|
#各国数据
|
|
dataWorld = pd.melt(dataset[['Date','Country/Region','Confirmed','Recovered','Deaths']],
|
|
id_vars=['Date','Country/Region'],value_vars=['Confirmed','Recovered','Deaths'],
|
|
var_name='group_var',value_name='Cases')
|
|
dataWorld['Date'] = pd.to_datetime(dataWorld['Date'])
|
|
dfWorld = dataset[['Country/Region','Confirmed','Recovered','Deaths']].groupby(['Country/Region']).sum().reset_index()
|
|
dataWorld = pd.melt(dfWorld,id_vars=['Country/Region'],
|
|
value_vars=['Confirmed','Deaths','Recovered'],
|
|
var_name='group_var',value_name='Cases')
|
|
dataWorld = dataWorld.sort_values(by=['Country/Region','group_var']).reset_index(drop=True)
|
|
dataWorld = dataWorld.pivot_table(index=['Country/Region'], columns='group_var')
|
|
dataWorld.columns = dataWorld.columns.droplevel().rename(None)
|
|
|
|
#中国省份(bottom 12)
|
|
dataChina.sort_values('Confirmed', inplace=True)
|
|
xData = []
|
|
yConfirmed = []
|
|
yRecovered = []
|
|
for i in range(12):
|
|
xData.append(dataChina.index[i])
|
|
yConfirmed.append(dataChina['Confirmed'][i])
|
|
yRecovered.append(dataChina['Recovered'][i])
|
|
|
|
ax5 = fig.add_subplot(212)
|
|
ax5.set_title("Total Recovered/Confirmed of China(the bottom ten)",verticalalignment="bottom",fontsize="13")
|
|
barWidth = 0.25
|
|
r1 = np.arange(12)
|
|
r2 = [x + barWidth for x in r1]
|
|
ax5.bar(r1, yConfirmed, color='#FF0088', width=barWidth, edgecolor='white', label='Confirmed')
|
|
ax5.bar(r2, yRecovered, color='#00BBFF', width=barWidth, edgecolor='white', label='Recovered')
|
|
plt.xticks([r + barWidth for r in range(len(yConfirmed))], xData)
|
|
ax5.legend()
|
|
|
|
plt.show()
|
|
|
|
#中国疫情图
|
|
ChinaConfirmed = []
|
|
ChinaProvince = []
|
|
worldConfirmed = []
|
|
provinceNameMap = {'Tibet':'西藏', 'Qinghai':'青海', 'Macau':'澳门', 'Ningxia':'宁夏', 'Jilin':'吉林', 'Guizhou':'贵州',
|
|
'Gansu':'甘肃', 'Hainan':'海南', 'Shanxi':'山西', 'Yunnan':'云南', 'Liaoning':'辽宁', 'Tianjin':'天津',
|
|
'Inner Mongolia':'内蒙古', 'Guangxi':'广西', 'Shaanxi':'陕西', 'Hebei':'河北', 'Fujian':'福建', 'Xinjiang':'新疆',
|
|
'Chongqing':'重庆', 'Sichuan':'四川', 'Jiangsu':'江苏', 'Beijing':'北京', 'Shanghai':'上海', 'Shandong':'山东',
|
|
'Heilongjiang':'黑龙江', 'Jiangxi':'江西', 'Anhui':'安徽', 'Hunan':'湖南', 'Zhejiang':'浙江', 'Henan':'河南',
|
|
'Guangdong':'广东', 'Hong Kong':'香港', 'Hubei':'湖北'}
|
|
|
|
for i in range(33):
|
|
ChinaConfirmed.append(dataChina['Confirmed'][i])
|
|
ChinaProvince.append(provinceNameMap[dataChina.index[i]])
|
|
|
|
|
|
map_China = Map()
|
|
map_China.set_global_opts(title_opts=options.TitleOpts(title="中国(大陆)疫情图-总确诊人数"),
|
|
legend_opts=options.LegendOpts(is_show=False),
|
|
visualmap_opts=options.VisualMapOpts(max_=200000))
|
|
map_China.add("",[list(z) for z in zip(ChinaProvince, ChinaConfirmed)], maptype='china')
|
|
map_China.render("China.html")
|
|
|
|
#世界疫情图
|
|
worldConfirmed = []
|
|
worldName = []
|
|
for i in range(dataWorld.shape[0]):
|
|
worldConfirmed.append(dataWorld['Confirmed'][i])
|
|
if dataWorld.index[i] == 'US':
|
|
worldName.append('United States')
|
|
else:
|
|
worldName.append(dataWorld.index[i])
|
|
|
|
map_world = Map()
|
|
map_world.set_global_opts(title_opts=options.TitleOpts(title="世界疫情图-总确诊人数"),
|
|
legend_opts=options.LegendOpts(is_show=True),
|
|
visualmap_opts=options.VisualMapOpts(min_=10000,max_=80000000))
|
|
map_world.add("",[list(z) for z in zip(worldName, worldConfirmed)], maptype='world')
|
|
map_world.render("world.html")
|
|
|
|
|