In [1]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot

print(__version__) # requires version >= 1.9.0


1.9.10

In [3]:
# ! pip install plotly --upgrade

In [2]:
init_notebook_mode() # run at the start of every ipython notebook to use plotly.offline
                     # this injects the plotly.js source files into the notebook



In [3]:
import plotly.plotly as py
from plotly.graph_objs import *

trace0 = Scatter(
  x=[1, -30, 3, 10],
  y=[10, 15, 13, 17]
)
trace1 = Scatter(
  x=[1, 2, 3, 4],
  y=[16, 5, 11, 9]
)
data = Data([trace0, trace1])

iplot(data)



In [4]:
# Data from workspace in Azure Machine Learning Studio: https://studio.azureml.net (a free service)
# from azureml import Workspace

# ws = Workspace()
# ds = ws.datasets['paho-who-cases-reported-2016-03-02.csv']
# frame = ds.to_dataframe()
# frame.head(20)


# OR


# The notebook is local and the file is too

import pandas as pd
frame = pd.read_csv('paho-who-cases-reported-2016-03-02.csv')
frame.head(20)


Out[4]:
Country / territory Measure Names Month of Date Report Epi Week Year of Date Country / territory.1 Date Measure Values Laboratory confirmed cases Suspected cases
0 Aruba Suspected January 1 2016 Aruba January, 2016 0 0 0
1 Aruba Confirmed January 1 2016 Aruba January, 2016 0 0 0
2 Aruba Suspected January 2 2016 Aruba January, 2016 0 0 0
3 Aruba Confirmed January 2 2016 Aruba January, 2016 0 0 0
4 Aruba Suspected January 3 2016 Aruba January, 2016 0 0 0
5 Aruba Confirmed January 3 2016 Aruba January, 2016 0 0 0
6 Aruba Suspected January 4 2016 Aruba January, 2016 0 0 0
7 Aruba Confirmed January 4 2016 Aruba January, 2016 0 0 0
8 Barbados Suspected January 1 2016 Barbados January, 2016 0 3 0
9 Barbados Confirmed January 1 2016 Barbados January, 2016 3 3 0
10 Barbados Suspected January 2 2016 Barbados January, 2016 0 0 0
11 Barbados Confirmed January 2 2016 Barbados January, 2016 0 0 0
12 Barbados Suspected January 3 2016 Barbados January, 2016 0 0 0
13 Barbados Confirmed January 3 2016 Barbados January, 2016 0 0 0
14 Barbados Suspected January 4 2016 Barbados January, 2016 0 0 0
15 Barbados Confirmed January 4 2016 Barbados January, 2016 0 0 0
16 Bolivia Suspected January 1 2016 Bolivia January, 2016 0 0 0
17 Bolivia Confirmed January 1 2016 Bolivia January, 2016 0 0 0
18 Bolivia Suspected January 2 2016 Bolivia January, 2016 0 1 0
19 Bolivia Confirmed January 2 2016 Bolivia January, 2016 1 1 0

In [5]:
import pandas as pd

# Some of the measured values have a ','
frame['Measure Values'] = [float(x.replace(',', '')) for x in frame['Measure Values']]

# Do a groupby for each country by year or month

df_by_year = frame.groupby(['Country / territory', 'Year of Date', 'Measure Names'],
                           as_index=False)['Measure Values'].sum()


df_by_month = frame.groupby(['Country / territory', 'Month of Date', 'Measure Names'], 
                            as_index=False)['Measure Values'].sum()

# Focus on by year and convert to dataframe

df = pd.DataFrame(df_by_year)
df.head()


Out[5]:
Country / territory Year of Date Measure Names Measure Values
0 Aruba 2015 Confirmed 0.0
1 Aruba 2015 Suspected 0.0
2 Aruba 2016 Confirmed 4.0
3 Aruba 2016 Suspected 0.0
4 Barbados 2015 Confirmed 0.0

In [ ]:
# ! pip install pycountry

In [9]:
import pycountry

# Fix country names to match pycountry "countries" naming (found these were missing)

subs = [{'Bolivia': 'Bolivia, Plurinational State of'}, 
        {'Bonaire': 'Bonaire, Sint Eustatius and Saba'},
        {'Curacao': 'Curaçao'},
        {'Saint Martin': 'Saint Martin (French part)'},
        {'Sint Maarten': 'Sint Maarten (Dutch part)'},
        {'United States Virgin Islands': 'Virgin Islands, U.S.'},
        {'Venezuela': 'Venezuela, Bolivarian Republic of'}]


for sub in subs:
    df.replace(to_replace = {'Country / territory': sub}, inplace = True)
    
# Use pycountry package to translate country names to three-letter codes
mapping = {country.name: country.alpha3 for country in pycountry.countries}

codes = [mapping.get(name) for name in df['Country / territory']]
df['Country Code'] = codes
df.head(20)


Out[9]:
Country / territory Year of Date Measure Names Measure Values Country Code
0 Aruba 2015 Confirmed 0 ABW
1 Aruba 2015 Suspected 0 ABW
2 Aruba 2016 Confirmed 4 ABW
3 Aruba 2016 Suspected 0 ABW
4 Barbados 2015 Confirmed 0 BRB
5 Barbados 2015 Suspected 0 BRB
6 Barbados 2016 Confirmed 9 BRB
7 Barbados 2016 Suspected 277 BRB
8 Bolivia, Plurinational State of 2015 Confirmed 0 BOL
9 Bolivia, Plurinational State of 2015 Suspected 0 BOL
10 Bolivia, Plurinational State of 2016 Confirmed 1 BOL
11 Bolivia, Plurinational State of 2016 Suspected 0 BOL
12 Bonaire, Sint Eustatius and Saba 2015 Confirmed 0 BES
13 Bonaire, Sint Eustatius and Saba 2015 Suspected 0 BES
14 Bonaire, Sint Eustatius and Saba 2016 Confirmed 1 BES
15 Bonaire, Sint Eustatius and Saba 2016 Suspected 0 BES
16 Brazil 2015 Confirmed 8 BRA
17 Brazil 2015 Suspected 39 BRA
18 Brazil 2016 Confirmed 526 BRA
19 Brazil 2016 Suspected 1451 BRA

In [10]:
# Record max for plot scale later

total_max = max(df['Measure Values'])
print(total_max)


# Filter data for plot by year and "suspected" or "confirmed" cases

df1 = df[(df['Measure Names'] == 'Confirmed ') & (df['Year of Date'] == 2015)]
df2 = df[(df['Measure Names'] == 'Confirmed ') & (df['Year of Date'] == 2016)]

df3 = df[(df['Measure Names'] != 'Confirmed ') & (df['Year of Date'] == 2015)]
df4 = df[(df['Measure Names'] != 'Confirmed ') & (df['Year of Date'] == 2016)]


30128.0

In [12]:
import plotly.plotly as py

# We are going to make four plots

title = ['Zika Outbreak:  Confirmed 2015',
            'Zika Outbreak:  Confirmed 2016',
            'Zika Outbreak:  Suspected 2015',
            'Zika Outbreak:  Suspected 2016']

dfs = [df1, df2, df3, df4]

for i in range(0, len(dfs)):

    dfplt = dfs[i]

    data = [ dict(
            type = 'choropleth',
            locations = dfplt['Country Code'],
            z = dfplt['Measure Values'],
            text = dfplt['Country Code'],
            zmin = 0,
            zmax = total_max,
            colorscale = [[0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
                [0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],
            autocolorscale = False,
            reversescale = True,
            marker = dict(
                line = dict (
                    color = 'rgb(180,180,180)',
                    width = 0.5
                )
            ),
            colorbar = dict(
                tickprefix = '',
                title = 'Cases'
            )
        ) ]

    layout = dict(
        title = title[i],
        geo = dict(
            showframe = True,
            showcoastlines = True,
            projection = dict(
                type = 'Mercator'
            )    )
        )
    
    # Data for plot and IPython plotly plot
    fig = dict( data=data, layout=layout )
    iplot(fig)



In [ ]: