In [2]:
import pandas as pd
from bokeh.io import output_notebook, show
output_notebook()


Loading BokehJS ...

In [8]:
df = pd.read_table('UKDA-6884-tab/tab/adult_dental_health_survey_2009_end_user_licence_270712.tab')


/Users/caged/miniconda3/envs/notebooks/lib/python3.4/site-packages/IPython/core/interactiveshell.py:2723: DtypeWarning: Columns (6,477,479,480,499,500,501,502,503,504,505,506,507,508,509,510,511,512,513,1166,1167,1168,1169) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [9]:
missing = df[df.nummiss != -1].copy()
counts = missing.groupby(['ageband5', 'nummiss']).size()
missing_table = counts.unstack('ageband5')
missing_table.head()


Out[9]:
ageband5 1 2 3 4 5 6 7 8
nummiss
0 69 137 128 38 16 3 NaN NaN
1 49 94 76 55 20 6 3 NaN
2 68 123 118 75 46 13 2 NaN
3 73 106 137 98 58 25 4 1
4 272 214 288 227 124 46 9 1

In [10]:
def normalize(df):
    result = df.copy()
    for feature_name in df.columns:
        result[feature_name] = df[feature_name] / df[feature_name].sum() * 100
    return result
normalized_missing = normalize(missing_table)
normalized_missing.head()


Out[10]:
ageband5 1 2 3 4 5 6 7 8
nummiss
0 10.697674 15.054945 9.984399 3.169308 1.384083 0.372671 NaN NaN
1 7.596899 10.329670 5.928237 4.587156 1.730104 0.745342 0.761421 NaN
2 10.542636 13.516484 9.204368 6.255213 3.979239 1.614907 0.507614 NaN
3 11.317829 11.648352 10.686427 8.173478 5.017301 3.105590 1.015228 1.282051
4 42.170543 23.516484 22.464899 18.932444 10.726644 5.714286 2.284264 1.282051

In [11]:
ageband5_labels = {
    1: "16 - 24",
    2: "25 - 34",
    3: "35 - 44",
    4: "45 - 54",
    5: "55 - 64",
    6: "65 - 74",
    7: "75 - 84",
    8: "85+",    
}

In [12]:
from bokeh.palettes import Spectral8
from bokeh.plotting import figure
Spectral8.reverse()

In [14]:
p = figure(
    title="Number of missing teeth by age group in UK. Source: Adult Dental Health Survey 2009",
    min_border_left=0, responsive=True, width=900, outline_line_color=None,
)
p.xaxis.axis_label = 'Number of teeth'
p.xaxis.axis_label_text_font_size = '10pt'
p.yaxis.axis_label = 'Count'
p.yaxis.axis_label_text_font_size = '10pt'
p.title_text_align = 'left'
p.title_text_font_size = '12pt'
for count in range(1, 9):
    data = counts[count]
    p.line(
        x=data.index,
        y=data.values,
        color=Spectral8[count-1],
        line_width=5,
        line_cap='round',
        line_join='round',
        line_alpha=0.8,
        legend=ageband5_labels[count]
    )
p.legend.border_line_color = None
show(p)


Out[14]:

<Bokeh Notebook handle for In[14]>


In [15]:
p = figure(
    title="Normalized number of missing teeth by age group in UK. Source: Adult Dental Health Survey 2009",
    min_border_left=0, responsive=True, width=900, outline_line_color=None,
)
p.xaxis.axis_label = 'Number of teeth'
p.xaxis.axis_label_text_font_size = '10pt'
p.yaxis.axis_label = '%age of group'
p.yaxis.axis_label_text_font_size = '10pt'
p.title_text_align = 'left'
p.title_text_font_size = '12pt'
for count in range(1, 9):
    data = counts[count]
    p.line(
        x=normalized_missing.index,
        y=normalized_missing[count],
        color=Spectral8[count-1],
        line_width=5,
        line_cap='round',
        line_join='round',
        line_alpha=0.8,
        legend=ageband5_labels[count]
    )
p.legend.border_line_color = None    
show(p)


Out[15]:

<Bokeh Notebook handle for In[15]>


In [ ]: