"Corona visualizations"

"Awesome summary"

  • toc: true
  • branch: master
  • badges: true
  • comments: true
  • categories: [fastpages, jupyter]
  • image: images/some_folder/your_image.png
  • hide: false
  • search_exclude: true
  • metadata_key1: metadata_value1
  • metadata_key2: metadata_value2

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
%config InlineBackend.figure_format = 'retina'

chart_width = 550
chart_height= 400

# source: https://colab.research.google.com/github/github/covid19-dashboard/blob/master/_notebooks/2020-03-19-cases-and-deaths-per-million.ipynb

In [4]:
#hide 
data = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv", error_bad_lines=False)
data = data.drop(columns=["Lat", "Long"])
data = data.melt(id_vars= ["Province/State", "Country/Region"])
data = pd.DataFrame(data.groupby(['Country/Region', "variable"]).sum())
data.reset_index(inplace=True)  
data = data.rename(columns={"Country/Region": "location", "variable": "date", "value": "total_cases"})
data['date'] =pd.to_datetime(data.date)
data = data.sort_values(by = "date")
data.loc[data.location == "US","location"] = "United States"
data.loc[data.location == "Korea, South","location"] = "South Korea"

data_pwt = pd.read_stata("https://www.rug.nl/ggdc/docs/pwt91.dta")

filter1 = data_pwt["year"] == 2017
data_pop = data_pwt[filter1]
data_pop = data_pop[["country","pop"]]
data_pop.loc[data_pop.country == "Republic of Korea","country"] = "South Korea"
data_pop.loc[data_pop.country == "Iran (Islamic Republic of)","country"] = "Iran"

# per habitant
data_pc = data.copy()
countries = ["China", "Italy", "Spain", "France", "United Kingdom", "Germany", 
             "Portugal", "United States", "Singapore","South Korea", "Japan", 
             "Brazil","Iran"]
data_countries = []
data_countries_pc = []

# compute per habitant
for i in countries:
    data_pc.loc[data_pc.location == i,"total_cases"] = data_pc.loc[data_pc.location == i,"total_cases"]/float(data_pop.loc[data_pop.country == i, "pop"])

    # get each country time series
filter1 = data_pc["total_cases"] > 1
for i in countries:
    filter_country = data_pc["location"]== i
    data_countries_pc.append(data_pc[filter_country & filter1])

In [5]:
#hide_input
# Stack data to get it to Altair dataframe format
data_countries_pc2 = data_countries_pc.copy()
for i in range(0,len(countries)):
    data_countries_pc2[i] = data_countries_pc2[i].reset_index()
    data_countries_pc2[i]['n_days'] = data_countries_pc2[i].index
    data_countries_pc2[i]['log_cases'] = np.log(data_countries_pc2[i]["total_cases"])
data_plot = data_countries_pc2[0]
for i in range(1, len(countries)):    
    data_plot = pd.concat([data_plot, data_countries_pc2[i]], axis=0)
data_plot["trend_2days"] = data_plot["n_days"]*1/2
data_plot["trend_4days"] = data_plot["n_days"]*1/4
data_plot["trend_12days"] = data_plot["n_days"]*1/12
data_plot["trend_2days_label"] = "Doubles every 2 days"
data_plot["trend_4days_label"] = "Doubles evey 4 days"
data_plot["trend_12days_label"] = "Doubles every 12 days"


# Plot it using Altair
source = data_plot

scales = alt.selection_interval(bind='scales')
selection = alt.selection_multi(fields=['location'], bind='legend')

base = alt.Chart(source, title = "COVID-19 Deaths Per Million of Inhabitants").encode(
    x = alt.X('n_days:Q', title = "Days passed since reaching 1 death per million"),
    y = alt.Y("log_cases:Q",title = "Log of deaths per million"),
    color = alt.Color('location:N', legend=alt.Legend(title="Country", labelFontSize=15, titleFontSize=17),
                     scale=alt.Scale(scheme='tableau20')),
    opacity = alt.condition(selection, alt.value(1), alt.value(0.1))
)

lines = base.mark_line().add_selection(
    scales
).add_selection(
    selection
).properties(
    width=chart_width,
    height=chart_height
)

trend_2d = alt.Chart(source).encode(
    x = "n_days:Q",
    y = alt.Y("trend_2days:Q",  scale=alt.Scale(domain=(0, max(data_plot["log_cases"])))),
).mark_line(color="grey", strokeDash=[3,3])


labels = pd.DataFrame([{'label': 'Doubles every 2 days', 'x_coord': 6, 'y_coord': 4},
                       {'label': 'Doubles every 4 days', 'x_coord': 17, 'y_coord': 3.5},
                       {'label': 'Doubles every 12 days', 'x_coord': 25, 'y_coord': 2.5},
                      ])
trend_label = (alt.Chart(labels)
                    .mark_text(align='left', dx=-55, dy=-15, fontSize=12, color="grey")
                    .encode(x='x_coord:Q',
                            y='y_coord:Q',
                            text='label:N')
                   )

trend_4d = alt.Chart(source).mark_line(color="grey", strokeDash=[3,3]).encode(
    x = "n_days:Q",
    y = alt.Y("trend_4days:Q",  scale=alt.Scale(domain=(0, max(data_plot["log_cases"])))),
)

trend_12d = alt.Chart(source).mark_line(color="grey", strokeDash=[3,3]).encode(
    x = "n_days:Q",
    y = alt.Y("trend_12days:Q",  scale=alt.Scale(domain=(0, max(data_plot["log_cases"])))),
)


(
(trend_2d + trend_4d + trend_12d + trend_label + lines)
.configure_title(fontSize=20)
.configure_axis(labelFontSize=15,titleFontSize=18)
)


---------------------------------------------------------------------------
SchemaValidationError                     Traceback (most recent call last)
~/miniconda3/envs/github_page/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_code(self, code_obj, result, async_)
   3330                 else:
-> 3331                     exec(code_obj, self.user_global_ns, self.user_ns)
   3332             finally:

<ipython-input-5-89e6e5decabe> in <module>
     22 scales = alt.selection_interval(bind='scales')
---> 23 selection = alt.selection_multi(fields=['location'], bind='legend')
     24 

~/miniconda3/envs/github_page/lib/python3.7/site-packages/altair/vegalite/v3/api.py in selection_multi(**kwargs)
    235     """Create a selection with type='multi'"""
--> 236     return selection(type='multi', **kwargs)
    237 

~/miniconda3/envs/github_page/lib/python3.7/site-packages/altair/vegalite/v3/api.py in selection(name, type, **kwds)
    223     """
--> 224     return Selection(name, core.SelectionDef(type=type, **kwds))
    225 

~/miniconda3/envs/github_page/lib/python3.7/site-packages/altair/vegalite/v3/schema/core.py in __init__(self, *args, **kwds)
  12140     def __init__(self, *args, **kwds):
> 12141         super(SelectionDef, self).__init__(*args, **kwds)
  12142 

~/miniconda3/envs/github_page/lib/python3.7/site-packages/altair/utils/schemapi.py in __init__(self, *args, **kwds)
    153         if DEBUG_MODE and self._class_is_valid_at_instantiation:
--> 154             self.to_dict(validate=True)
    155 

~/miniconda3/envs/github_page/lib/python3.7/site-packages/altair/utils/schemapi.py in to_dict(self, validate, ignore, context)
    301             except jsonschema.ValidationError as err:
--> 302                 raise SchemaValidationError(self, err)
    303         return result

<class 'str'>: (<class 'TypeError'>, TypeError('sequence item 1: expected str instance, int found'))

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
~/miniconda3/envs/github_page/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_code(self, code_obj, result, async_)
   3346             if result is not None:
   3347                 result.error_in_exec = sys.exc_info()[1]
-> 3348             self.showtraceback(running_compiled_code=True)
   3349         else:
   3350             outflag = False

~/miniconda3/envs/github_page/lib/python3.7/site-packages/IPython/core/interactiveshell.py in showtraceback(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)
   2047                                             value, tb, tb_offset=tb_offset)
   2048 
-> 2049                     self._showtraceback(etype, value, stb)
   2050                     if self.call_pdb:
   2051                         # drop into debugger

~/miniconda3/envs/github_page/lib/python3.7/site-packages/ipykernel/zmqshell.py in _showtraceback(self, etype, evalue, stb)
    544             u'traceback' : stb,
    545             u'ename' : unicode_type(etype.__name__),
--> 546             u'evalue' : py3compat.safe_unicode(evalue),
    547         }
    548 

~/miniconda3/envs/github_page/lib/python3.7/site-packages/ipython_genutils/py3compat.py in safe_unicode(e)
     63     """
     64     try:
---> 65         return unicode_type(e)
     66     except UnicodeError:
     67         pass

~/miniconda3/envs/github_page/lib/python3.7/site-packages/altair/utils/schemapi.py in __unicode__(self)
     91         schema_path = ['{}.{}'.format(cls.__module__, cls.__name__)]
     92         schema_path.extend(self.schema_path)
---> 93         schema_path = '->'.join(val for val in schema_path[:-1]
     94                                 if val not in ('properties',
     95                                                'additionalProperties',

TypeError: sequence item 1: expected str instance, int found

In [7]:
#hide_input
label = 'Deaths'
temp = pd.concat([x.copy() for x in data_countries_pc]).loc[lambda x: x.date >= '3/1/2020']

metric_name = f'{label} per Million'
temp.columns = ['Country', 'date', metric_name]
# temp.loc[:, 'month'] = temp.date.dt.strftime('%Y-%m')
temp.loc[:, f'Log of {label} per Million'] = temp[f'{label} per Million'].apply(lambda x: np.log(x))

temp.groupby('Country').last()


Out[7]:
date Deaths per Million Log of Deaths per Million
Country
China 2020-03-20 2.307882 0.836330
France 2020-03-20 6.693804 1.901182
Iran 2020-03-20 17.655874 2.871069
Italy 2020-03-20 67.924641 4.218399
South Korea 2020-03-20 1.843780 0.611818
Spain 2020-03-20 22.500599 3.113542
United Kingdom 2020-03-20 2.689570 0.989381

In [8]:
#hide
# Get data and clean it

data = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv", error_bad_lines=False)
data = data.drop(columns=["Lat", "Long"])
data = data.melt(id_vars= ["Province/State", "Country/Region"])
data = pd.DataFrame(data.groupby(['Country/Region', "variable"]).sum())
data.reset_index(inplace=True)  
data = data.rename(columns={"Country/Region": "location", "variable": "date", "value": "total_cases"})
data['date'] =pd.to_datetime(data.date)
data = data.sort_values(by = "date")
data.loc[data.location == "US","location"] = "United States"
data.loc[data.location == "Korea, South","location"] = "South Korea"

# Population data (last year is 2017 which is what we use)
data_pwt = pd.read_stata("https://www.rug.nl/ggdc/docs/pwt91.dta")

filter1 = data_pwt["year"] == 2017
data_pop = data_pwt[filter1]
data_pop = data_pop[["country","pop"]]
data_pop.loc[data_pop.country == "Republic of Korea","country"] = "South Korea"
data_pop.loc[data_pop.country == "Iran (Islamic Republic of)","country"] = "Iran"

# per habitant
data_pc = data.copy()

# I can add more countries if needed
countries = ["China", "Italy", "Spain", "France", "United Kingdom", "Germany", 
             "Portugal", "United States", "Singapore","South Korea", "Japan", 
             "Brazil","Iran"]

data_countries = []
data_countries_pc = []

# compute per habitant
for i in countries:
    data_pc.loc[data_pc.location == i,"total_cases"] = data_pc.loc[data_pc.location == i,"total_cases"]/float(data_pop.loc[data_pop.country == i, "pop"])
    
# get each country time series
filter1 = data_pc["total_cases"] > 1

for i in countries:
    filter_country = data_pc["location"]== i
    data_countries_pc.append(data_pc[filter_country & filter1])

In [9]:
#hide_input
# Stack data to get it to Altair dataframe format
data_countries_pc2 = data_countries_pc.copy()
for i in range(0,len(countries)):
    data_countries_pc2[i] = data_countries_pc2[i].reset_index()
    data_countries_pc2[i]['n_days'] = data_countries_pc2[i].index
    data_countries_pc2[i]['log_cases'] = np.log(data_countries_pc2[i]["total_cases"])
data_plot = data_countries_pc2[0]
for i in range(1, len(countries)):    
    data_plot = pd.concat([data_plot, data_countries_pc2[i]], axis=0)
data_plot["trend_2days"] = data_plot["n_days"]*1/2
data_plot["trend_4days"] = data_plot["n_days"]*1/4
data_plot["trend_12days"] = data_plot["n_days"]*1/12
data_plot["trend_2days_label"] = "Doubles every 2 days"
data_plot["trend_4days_label"] = "Doubles evey 4 days"
data_plot["trend_12days_label"] = "Doubles every 12 days"


# Plot it using Altair
source = data_plot

scales = alt.selection_interval(bind='scales')
selection = alt.selection_multi(fields=['location'], bind='legend')

base = alt.Chart(source, title = "COVID-19 Confirmed Cases Per Million of Inhabitants").encode(
    x = alt.X('n_days:Q', title = "Days passed since reaching 1 case per million"),
    y = alt.Y("log_cases:Q",title = "Log of confirmed cases per million"),
    color = alt.Color('location:N', legend=alt.Legend(title="Country", labelFontSize=15, titleFontSize=17),
                     scale=alt.Scale(scheme='tableau20')),
    opacity = alt.condition(selection, alt.value(1), alt.value(0.1))
).properties(
    width=chart_width,
    height=chart_height
)

lines = base.mark_line().add_selection(
    scales
).add_selection(
    selection
)

trend_2d = alt.Chart(source).encode(
    x = "n_days:Q",
    y = alt.Y("trend_2days:Q",  scale=alt.Scale(domain=(0, max(data_plot["log_cases"])))),
).mark_line( strokeDash=[3,3], color="grey")

labels = pd.DataFrame([{'label': 'Doubles every 2 days', 'x_coord': 10, 'y_coord': 6},
                       {'label': 'Doubles every 4 days', 'x_coord': 30, 'y_coord': 6},
                       {'label': 'Doubles every 12 days', 'x_coord': 45, 'y_coord': 4},
                      ])
trend_label = (alt.Chart(labels)
                    .mark_text(align='left', dx=-55, dy=-15, fontSize=12, color="grey")
                    .encode(x='x_coord:Q',
                            y='y_coord:Q',
                            text='label:N')
                   )


trend_4d = alt.Chart(source).mark_line(color="grey", strokeDash=[3,3]).encode(
    x = "n_days:Q",
    y = alt.Y("trend_4days:Q",  scale=alt.Scale(domain=(0, max(data_plot["log_cases"])))),
)

trend_12d = alt.Chart(source).mark_line(color="grey", strokeDash=[3,3]).encode(
    x = "n_days:Q",
    y = alt.Y("trend_12days:Q",  scale=alt.Scale(domain=(0, max(data_plot["log_cases"])))),
)

(
(trend_2d  + trend_4d + trend_12d + trend_label + lines)
.configure_title(fontSize=20)
.configure_axis(labelFontSize=15,titleFontSize=18)
)


---------------------------------------------------------------------------
SchemaValidationError                     Traceback (most recent call last)
~/miniconda3/envs/github_page/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_code(self, code_obj, result, async_)
   3330                 else:
-> 3331                     exec(code_obj, self.user_global_ns, self.user_ns)
   3332             finally:

<ipython-input-9-f4a8f2102b29> in <module>
     22 scales = alt.selection_interval(bind='scales')
---> 23 selection = alt.selection_multi(fields=['location'], bind='legend')
     24 

~/miniconda3/envs/github_page/lib/python3.7/site-packages/altair/vegalite/v3/api.py in selection_multi(**kwargs)
    235     """Create a selection with type='multi'"""
--> 236     return selection(type='multi', **kwargs)
    237 

~/miniconda3/envs/github_page/lib/python3.7/site-packages/altair/vegalite/v3/api.py in selection(name, type, **kwds)
    223     """
--> 224     return Selection(name, core.SelectionDef(type=type, **kwds))
    225 

~/miniconda3/envs/github_page/lib/python3.7/site-packages/altair/vegalite/v3/schema/core.py in __init__(self, *args, **kwds)
  12140     def __init__(self, *args, **kwds):
> 12141         super(SelectionDef, self).__init__(*args, **kwds)
  12142 

~/miniconda3/envs/github_page/lib/python3.7/site-packages/altair/utils/schemapi.py in __init__(self, *args, **kwds)
    153         if DEBUG_MODE and self._class_is_valid_at_instantiation:
--> 154             self.to_dict(validate=True)
    155 

~/miniconda3/envs/github_page/lib/python3.7/site-packages/altair/utils/schemapi.py in to_dict(self, validate, ignore, context)
    301             except jsonschema.ValidationError as err:
--> 302                 raise SchemaValidationError(self, err)
    303         return result

<class 'str'>: (<class 'TypeError'>, TypeError('sequence item 1: expected str instance, int found'))

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
~/miniconda3/envs/github_page/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_code(self, code_obj, result, async_)
   3346             if result is not None:
   3347                 result.error_in_exec = sys.exc_info()[1]
-> 3348             self.showtraceback(running_compiled_code=True)
   3349         else:
   3350             outflag = False

~/miniconda3/envs/github_page/lib/python3.7/site-packages/IPython/core/interactiveshell.py in showtraceback(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)
   2047                                             value, tb, tb_offset=tb_offset)
   2048 
-> 2049                     self._showtraceback(etype, value, stb)
   2050                     if self.call_pdb:
   2051                         # drop into debugger

~/miniconda3/envs/github_page/lib/python3.7/site-packages/ipykernel/zmqshell.py in _showtraceback(self, etype, evalue, stb)
    544             u'traceback' : stb,
    545             u'ename' : unicode_type(etype.__name__),
--> 546             u'evalue' : py3compat.safe_unicode(evalue),
    547         }
    548 

~/miniconda3/envs/github_page/lib/python3.7/site-packages/ipython_genutils/py3compat.py in safe_unicode(e)
     63     """
     64     try:
---> 65         return unicode_type(e)
     66     except UnicodeError:
     67         pass

~/miniconda3/envs/github_page/lib/python3.7/site-packages/altair/utils/schemapi.py in __unicode__(self)
     91         schema_path = ['{}.{}'.format(cls.__module__, cls.__name__)]
     92         schema_path.extend(self.schema_path)
---> 93         schema_path = '->'.join(val for val in schema_path[:-1]
     94                                 if val not in ('properties',
     95                                                'additionalProperties',

TypeError: sequence item 1: expected str instance, int found

In [ ]: