notebook.community

Edit and run



In [2]:

    
import pandas as pd
from bokeh.io import output_notebook, show
output_notebook()









    





    
        
        Loading BokehJS ...



In [8]:

    
df = pd.read_table('UKDA-6884-tab/tab/adult_dental_health_survey_2009_end_user_licence_270712.tab')









    



/Users/caged/miniconda3/envs/notebooks/lib/python3.4/site-packages/IPython/core/interactiveshell.py:2723: DtypeWarning: Columns (6,477,479,480,499,500,501,502,503,504,505,506,507,508,509,510,511,512,513,1166,1167,1168,1169) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)



In [9]:

    
missing = df[df.nummiss != -1].copy()
counts = missing.groupby(['ageband5', 'nummiss']).size()
missing_table = counts.unstack('ageband5')
missing_table.head()



In [10]:

    
def normalize(df):
    result = df.copy()
    for feature_name in df.columns:
        result[feature_name] = df[feature_name] / df[feature_name].sum() * 100
    return result
normalized_missing = normalize(missing_table)
normalized_missing.head()



In [11]:

    
ageband5_labels = {
    1: "16 - 24",
    2: "25 - 34",
    3: "35 - 44",
    4: "45 - 54",
    5: "55 - 64",
    6: "65 - 74",
    7: "75 - 84",
    8: "85+",    
}



In [12]:

    
from bokeh.palettes import Spectral8
from bokeh.plotting import figure
Spectral8.reverse()



In [14]:

    
p = figure(
    title="Number of missing teeth by age group in UK. Source: Adult Dental Health Survey 2009",
    min_border_left=0, responsive=True, width=900, outline_line_color=None,
)
p.xaxis.axis_label = 'Number of teeth'
p.xaxis.axis_label_text_font_size = '10pt'
p.yaxis.axis_label = 'Count'
p.yaxis.axis_label_text_font_size = '10pt'
p.title_text_align = 'left'
p.title_text_font_size = '12pt'
for count in range(1, 9):
    data = counts[count]
    p.line(
        x=data.index,
        y=data.values,
        color=Spectral8[count-1],
        line_width=5,
        line_cap='round',
        line_join='round',
        line_alpha=0.8,
        legend=ageband5_labels[count]
    )
p.legend.border_line_color = None
show(p)









    






    







    Out[14]:




<Bokeh Notebook handle for In[14]>



In [15]:

    
p = figure(
    title="Normalized number of missing teeth by age group in UK. Source: Adult Dental Health Survey 2009",
    min_border_left=0, responsive=True, width=900, outline_line_color=None,
)
p.xaxis.axis_label = 'Number of teeth'
p.xaxis.axis_label_text_font_size = '10pt'
p.yaxis.axis_label = '%age of group'
p.yaxis.axis_label_text_font_size = '10pt'
p.title_text_align = 'left'
p.title_text_font_size = '12pt'
for count in range(1, 9):
    data = counts[count]
    p.line(
        x=normalized_missing.index,
        y=normalized_missing[count],
        color=Spectral8[count-1],
        line_width=5,
        line_cap='round',
        line_join='round',
        line_alpha=0.8,
        legend=ageband5_labels[count]
    )
p.legend.border_line_color = None    
show(p)









    






    







    Out[15]:




<Bokeh Notebook handle for In[15]>



In [ ]:

ageband5	1	2	3	4	5	6	7	8
nummiss
0	69	137	128	38	16	3	NaN	NaN
1	49	94	76	55	20	6	3	NaN
2	68	123	118	75	46	13	2	NaN
3	73	106	137	98	58	25	4	1
4	272	214	288	227	124	46	9	1

ageband5	1	2	3	4	5	6	7	8
nummiss
0	10.697674	15.054945	9.984399	3.169308	1.384083	0.372671	NaN	NaN
1	7.596899	10.329670	5.928237	4.587156	1.730104	0.745342	0.761421	NaN
2	10.542636	13.516484	9.204368	6.255213	3.979239	1.614907	0.507614	NaN
3	11.317829	11.648352	10.686427	8.173478	5.017301	3.105590	1.015228	1.282051
4	42.170543	23.516484	22.464899	18.932444	10.726644	5.714286	2.284264	1.282051