In [1]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
print(__version__) # requires version >= 1.9.0
In [3]:
# ! pip install plotly --upgrade
In [2]:
init_notebook_mode() # run at the start of every ipython notebook to use plotly.offline
# this injects the plotly.js source files into the notebook
In [3]:
import plotly.plotly as py
from plotly.graph_objs import *
trace0 = Scatter(
x=[1, -30, 3, 10],
y=[10, 15, 13, 17]
)
trace1 = Scatter(
x=[1, 2, 3, 4],
y=[16, 5, 11, 9]
)
data = Data([trace0, trace1])
iplot(data)
In [4]:
# Data from workspace in Azure Machine Learning Studio: https://studio.azureml.net (a free service)
# from azureml import Workspace
# ws = Workspace()
# ds = ws.datasets['paho-who-cases-reported-2016-03-02.csv']
# frame = ds.to_dataframe()
# frame.head(20)
# OR
# The notebook is local and the file is too
import pandas as pd
frame = pd.read_csv('paho-who-cases-reported-2016-03-02.csv')
frame.head(20)
Out[4]:
In [5]:
import pandas as pd
# Some of the measured values have a ','
frame['Measure Values'] = [float(x.replace(',', '')) for x in frame['Measure Values']]
# Do a groupby for each country by year or month
df_by_year = frame.groupby(['Country / territory', 'Year of Date', 'Measure Names'],
as_index=False)['Measure Values'].sum()
df_by_month = frame.groupby(['Country / territory', 'Month of Date', 'Measure Names'],
as_index=False)['Measure Values'].sum()
# Focus on by year and convert to dataframe
df = pd.DataFrame(df_by_year)
df.head()
Out[5]:
In [ ]:
# ! pip install pycountry
In [9]:
import pycountry
# Fix country names to match pycountry "countries" naming (found these were missing)
subs = [{'Bolivia': 'Bolivia, Plurinational State of'},
{'Bonaire': 'Bonaire, Sint Eustatius and Saba'},
{'Curacao': 'Curaçao'},
{'Saint Martin': 'Saint Martin (French part)'},
{'Sint Maarten': 'Sint Maarten (Dutch part)'},
{'United States Virgin Islands': 'Virgin Islands, U.S.'},
{'Venezuela': 'Venezuela, Bolivarian Republic of'}]
for sub in subs:
df.replace(to_replace = {'Country / territory': sub}, inplace = True)
# Use pycountry package to translate country names to three-letter codes
mapping = {country.name: country.alpha3 for country in pycountry.countries}
codes = [mapping.get(name) for name in df['Country / territory']]
df['Country Code'] = codes
df.head(20)
Out[9]:
In [10]:
# Record max for plot scale later
total_max = max(df['Measure Values'])
print(total_max)
# Filter data for plot by year and "suspected" or "confirmed" cases
df1 = df[(df['Measure Names'] == 'Confirmed ') & (df['Year of Date'] == 2015)]
df2 = df[(df['Measure Names'] == 'Confirmed ') & (df['Year of Date'] == 2016)]
df3 = df[(df['Measure Names'] != 'Confirmed ') & (df['Year of Date'] == 2015)]
df4 = df[(df['Measure Names'] != 'Confirmed ') & (df['Year of Date'] == 2016)]
In [12]:
import plotly.plotly as py
# We are going to make four plots
title = ['Zika Outbreak: Confirmed 2015',
'Zika Outbreak: Confirmed 2016',
'Zika Outbreak: Suspected 2015',
'Zika Outbreak: Suspected 2016']
dfs = [df1, df2, df3, df4]
for i in range(0, len(dfs)):
dfplt = dfs[i]
data = [ dict(
type = 'choropleth',
locations = dfplt['Country Code'],
z = dfplt['Measure Values'],
text = dfplt['Country Code'],
zmin = 0,
zmax = total_max,
colorscale = [[0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
[0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],
autocolorscale = False,
reversescale = True,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5
)
),
colorbar = dict(
tickprefix = '',
title = 'Cases'
)
) ]
layout = dict(
title = title[i],
geo = dict(
showframe = True,
showcoastlines = True,
projection = dict(
type = 'Mercator'
) )
)
# Data for plot and IPython plotly plot
fig = dict( data=data, layout=layout )
iplot(fig)
In [ ]: