In [2]:
import pandas as pd
from bokeh.io import output_notebook, show
output_notebook()
In [8]:
df = pd.read_table('UKDA-6884-tab/tab/adult_dental_health_survey_2009_end_user_licence_270712.tab')
In [9]:
missing = df[df.nummiss != -1].copy()
counts = missing.groupby(['ageband5', 'nummiss']).size()
missing_table = counts.unstack('ageband5')
missing_table.head()
Out[9]:
In [10]:
def normalize(df):
result = df.copy()
for feature_name in df.columns:
result[feature_name] = df[feature_name] / df[feature_name].sum() * 100
return result
normalized_missing = normalize(missing_table)
normalized_missing.head()
Out[10]:
In [11]:
ageband5_labels = {
1: "16 - 24",
2: "25 - 34",
3: "35 - 44",
4: "45 - 54",
5: "55 - 64",
6: "65 - 74",
7: "75 - 84",
8: "85+",
}
In [12]:
from bokeh.palettes import Spectral8
from bokeh.plotting import figure
Spectral8.reverse()
In [14]:
p = figure(
title="Number of missing teeth by age group in UK. Source: Adult Dental Health Survey 2009",
min_border_left=0, responsive=True, width=900, outline_line_color=None,
)
p.xaxis.axis_label = 'Number of teeth'
p.xaxis.axis_label_text_font_size = '10pt'
p.yaxis.axis_label = 'Count'
p.yaxis.axis_label_text_font_size = '10pt'
p.title_text_align = 'left'
p.title_text_font_size = '12pt'
for count in range(1, 9):
data = counts[count]
p.line(
x=data.index,
y=data.values,
color=Spectral8[count-1],
line_width=5,
line_cap='round',
line_join='round',
line_alpha=0.8,
legend=ageband5_labels[count]
)
p.legend.border_line_color = None
show(p)
Out[14]:
In [15]:
p = figure(
title="Normalized number of missing teeth by age group in UK. Source: Adult Dental Health Survey 2009",
min_border_left=0, responsive=True, width=900, outline_line_color=None,
)
p.xaxis.axis_label = 'Number of teeth'
p.xaxis.axis_label_text_font_size = '10pt'
p.yaxis.axis_label = '%age of group'
p.yaxis.axis_label_text_font_size = '10pt'
p.title_text_align = 'left'
p.title_text_font_size = '12pt'
for count in range(1, 9):
data = counts[count]
p.line(
x=normalized_missing.index,
y=normalized_missing[count],
color=Spectral8[count-1],
line_width=5,
line_cap='round',
line_join='round',
line_alpha=0.8,
legend=ageband5_labels[count]
)
p.legend.border_line_color = None
show(p)
Out[15]:
In [ ]: