This notebook represents the statistics related to the state

The datasets which we use in this notebook are

  1. Education Level by Age and Sex
  2. Graduate and Above by Age and Sex
  3. Household Size
  4. Non Workers
  5. Population attending education institutions
  6. Population by religious community
  7. Main Worker, Marginal Worker and Non Worker
  8. Adolescent and Youth Population

In [3]:
import os
os.chdir("/home/archimedeas/wrkspc/anaconda/the-visual-verdict/visualizations/4_the_folk/datasets")
os.getcwd()


Out[3]:
'/home/archimedeas/wrkspc/anaconda/the-visual-verdict/visualizations/4_the_folk/datasets'

BURTIN PLOT


In [4]:
"""
Example from the bokeh gallery

http://bokeh.pydata.org/en/latest/docs/gallery/burtin.html

"""


from collections import OrderedDict
from math import log, sqrt

import numpy as np
import pandas as pd
from six.moves import cStringIO as StringIO

from bokeh.plotting import figure, show, output_file

antibiotics = """
bacteria,                        penicillin, streptomycin, neomycin, gram
Mycobacterium tuberculosis,      800,        5,            2,        negative
Salmonella schottmuelleri,       10,         0.8,          0.09,     negative
Proteus vulgaris,                3,          0.1,          0.1,      negative
Klebsiella pneumoniae,           850,        1.2,          1,        negative
Brucella abortus,                1,          2,            0.02,     negative
Pseudomonas aeruginosa,          850,        2,            0.4,      negative
Escherichia coli,                100,        0.4,          0.1,      negative
Salmonella (Eberthella) typhosa, 1,          0.4,          0.008,    negative
Aerobacter aerogenes,            870,        1,            1.6,      negative
Brucella antracis,               0.001,      0.01,         0.007,    positive
Streptococcus fecalis,           1,          1,            0.1,      positive
Staphylococcus aureus,           0.03,       0.03,         0.001,    positive
Staphylococcus albus,            0.007,      0.1,          0.001,    positive
Streptococcus hemolyticus,       0.001,      14,           10,       positive
Streptococcus viridans,          0.005,      10,           40,       positive
Diplococcus pneumoniae,          0.005,      11,           10,       positive
"""

drug_color = OrderedDict([
    ("Penicillin",   "#0d3362"),
    ("Streptomycin", "#c64737"),
    ("Neomycin",     "black"  ),
])

gram_color = {
    "positive" : "#aeaeb8",
    "negative" : "#e69584",
}

df = pd.read_csv(StringIO(antibiotics),
                 skiprows=1,
                 skipinitialspace=True,
                 engine='python')

width = 800
height = 800
inner_radius = 90
outer_radius = 300 - 10

minr = sqrt(log(.001 * 1E4))
maxr = sqrt(log(1000 * 1E4))
a = (outer_radius - inner_radius) / (minr - maxr)
b = inner_radius - a * maxr

def rad(mic):
    return a * np.sqrt(np.log(mic * 1E4)) + b

big_angle = 2.0 * np.pi / (len(df) + 1)
small_angle = big_angle / 7

x = np.zeros(len(df))
y = np.zeros(len(df))

output_file("burtin.html", title="burtin.py example")

p = figure(plot_width=width, plot_height=height, title="",
    x_axis_type=None, y_axis_type=None,
    x_range=[-420, 420], y_range=[-420, 420],
    min_border=0, outline_line_color="black",
    background_fill="#f0e1d2", border_fill="#f0e1d2")

p.line(x+1, y+1, alpha=0)

# annular wedges
angles = np.pi/2 - big_angle/2 - df.index.to_series()*big_angle
colors = [gram_color[gram] for gram in df.gram]
p.annular_wedge(
    x, y, inner_radius, outer_radius, -big_angle+angles, angles, color=colors,
)

# small wedges
p.annular_wedge(x, y, inner_radius, rad(df.penicillin),
    -big_angle+angles+5*small_angle, -big_angle+angles+6*small_angle,
    color=drug_color['Penicillin'])
p.annular_wedge(x, y, inner_radius, rad(df.streptomycin),
    -big_angle+angles+3*small_angle, -big_angle+angles+4*small_angle,
    color=drug_color['Streptomycin'])
p.annular_wedge(x, y, inner_radius, rad(df.neomycin),
    -big_angle+angles+1*small_angle, -big_angle+angles+2*small_angle,
    color=drug_color['Neomycin'])

# circular axes and lables
labels = np.power(10.0, np.arange(-3, 4))
radii = a * np.sqrt(np.log(labels * 1E4)) + b
p.circle(x, y, radius=radii, fill_color=None, line_color="white")
p.text(x[:-1], radii[:-1], [str(r) for r in labels[:-1]],
    text_font_size="8pt", text_align="center", text_baseline="middle")

# radial axes
p.annular_wedge(x, y, inner_radius-10, outer_radius+10,
    -big_angle+angles, -big_angle+angles, color="black")

# bacteria labels
xr = radii[0]*np.cos(np.array(-big_angle/2 + angles))
yr = radii[0]*np.sin(np.array(-big_angle/2 + angles))
label_angle=np.array(-big_angle/2+angles)
label_angle[label_angle < -np.pi/2] += np.pi # easier to read labels on the left side
p.text(xr, yr, df.bacteria, angle=label_angle,
    text_font_size="9pt", text_align="center", text_baseline="middle")

# OK, these hand drawn legends are pretty clunky, will be improved in future release
p.circle([-40, -40], [-370, -390], color=list(gram_color.values()), radius=5)
p.text([-30, -30], [-370, -390], text=["Gram-" + gr for gr in gram_color.keys()],
    text_font_size="7pt", text_align="left", text_baseline="middle")

p.rect([-40, -40, -40], [18, 0, -18], width=30, height=13,
    color=list(drug_color.values()))
p.text([-15, -15, -15], [18, 0, -18], text=list(drug_color.keys()),
    text_font_size="9pt", text_align="left", text_baseline="middle")

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

show(p)

Donut Chart


In [5]:
"""

The donut graph from bokeh gallery

http://bokeh.pydata.org/en/latest/docs/gallery/donut_chart.html

"""


from collections import OrderedDict

import pandas as pd

from bokeh.charts import Donut, show, output_file
from bokeh.sampledata.olympics2014 import data

# throw the data into a pandas data frame
df = pd.io.json.json_normalize(data['data'])

# filter by countries with at least one medal and sort
df = df[df['medals.total'] > 8]
df = df.sort("medals.total", ascending=False)

# get the countries and we group the data by medal type
countries = df.abbr.values.tolist()
gold = df['medals.gold'].astype(float).values
silver = df['medals.silver'].astype(float).values
bronze = df['medals.bronze'].astype(float).values

# build a dict containing the grouped data
medals = OrderedDict()
medals['bronze'] = bronze
medals['silver'] = silver
medals['gold'] = gold

# any of the following commented are also valid Donut inputs
#medals = list(medals.values())
#medals = np.array(list(medals.values()))
#medals = pd.DataFrame(medals)

output_file("donut.html")

donut = Donut(medals, countries)

show(donut)

Charts


In [6]:
"""
http://bokeh.pydata.org/en/latest/docs/reference/charts.html

"""


from collections import OrderedDict
from bokeh.charts import HeatMap, output_file, show

# (dict, OrderedDict, lists, arrays and DataFrames are valid inputs)
xyvalues = OrderedDict()
xyvalues['apples'] = [4,5,8]
xyvalues['bananas'] = [1,2,4]
xyvalues['pears'] = [6,5,4]

hm = HeatMap(xyvalues, title='Fruits')

output_file('heatmap.html')
show(hm)

################

Image


In [7]:
"""

http://bokeh.pydata.org/en/latest/docs/gallery/image.html
"""

import numpy as np

from bokeh.plotting import figure, show, output_file

N = 1000

x = np.linspace(0, 10, N)
y = np.linspace(0, 10, N)
xx, yy = np.meshgrid(x, y)
d = np.sin(xx)*np.cos(yy)

output_file("image.html", title="image.py example")

p = figure(x_range=[0, 10], y_range=[0, 10])
p.image(image=[d], x=[0], y=[0], dw=[10], dh=[10], palette="Spectral11")

show(p)  # open a browser


################

Les Miserable


In [8]:
"""
http://bokeh.pydata.org/en/latest/docs/gallery/les_mis.html

"""


from collections import OrderedDict

import numpy as np

from bokeh.plotting import figure, show, output_file
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.sampledata.les_mis import data

nodes = data['nodes']
names = [node['name'] for node in sorted(data['nodes'], key=lambda x: x['group'])]

N = len(nodes)
counts = np.zeros((N, N))
for link in data['links']:
    counts[link['source'], link['target']] = link['value']
    counts[link['target'], link['source']] = link['value']

colormap = [
    "#444444", "#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99",
    "#e31a1c", "#fdbf6f", "#ff7f00", "#cab2d6", "#6a3d9a"
]

xname = []
yname = []
color = []
alpha = []
for i, n1 in enumerate(nodes):
    for j, n2 in enumerate(nodes):
        xname.append(n1['name'])
        yname.append(n2['name'])

        a = min(counts[i,j]/4.0, 0.9) + 0.1
        alpha.append(a)

        if n1['group'] == n2['group']:
            color.append(colormap[n1['group']])
        else:
            color.append('lightgrey')


source = ColumnDataSource(
    data=dict(
        xname=xname,
        yname=yname,
        colors=color,
        alphas=alpha,
        count=counts.flatten(),
    )
)

output_file("les_mis.html")

p = figure(title="Les Mis Occurrences",
    x_axis_location="above", tools="resize,hover,save",
    x_range=list(reversed(names)), y_range=names)
p.plot_width = 800
p.plot_height = 800

p.rect('xname', 'yname', 0.9, 0.9, source=source,
     color='colors', alpha='alphas', line_color=None)

p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "5pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi/3

hover = p.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
    ('names', '@yname, @xname'),
    ('count', '@count'),
])

show(p)      # show the plot

####################

Stacked Bars


In [1]:
"""
http://bokeh.pydata.org/en/latest/docs/reference/charts.html

"""


from collections import OrderedDict
from bokeh.charts import Bar, output_file, show

# (dict, OrderedDict, lists, arrays and DataFrames are valid inputs)
xyvalues = OrderedDict()
xyvalues['python']=[-2, 5]
xyvalues['pypy']=[12, 40]
xyvalues['jython']=[22, 30]

cat = ['1st', '2nd']

bar = Bar(xyvalues, cat, title="Stacked bars",
        xlabel="category", ylabel="language")

output_file("stacked_bar.html")
show(bar)


---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-1-49281143a8e6> in <module>()
     18 
     19 bar = Bar(xyvalues, cat, title="Stacked bars",
---> 20         xlabel="category", ylabel="language")
     21 
     22 output_file("stacked_bar.html")

C:\Anaconda3\lib\site-packages\bokeh\charts\builder\bar_builder.py in Bar(data, label, values, color, stack, group, agg, xscale, yscale, xgrid, ygrid, continuous_range, **kw)
     97     kw['y_range'] = y_range
     98 
---> 99     return create_and_build(BarBuilder, data, **kw)
    100 
    101 

C:\Anaconda3\lib\site-packages\bokeh\charts\_builder.py in create_and_build(builder_class, *data, **kws)
     57     # create the new builder
     58     builder_kws = {k: v for k, v in kws.items() if k in builder_props}
---> 59     builder = builder_class(*data, **builder_kws)
     60 
     61     # create a chart to return, since there isn't one already

C:\Anaconda3\lib\site-packages\bokeh\charts\_builder.py in __init__(self, *args, **kws)
    174 
    175             # handle input attrs and ensure attrs have access to data
--> 176             self._setup_attrs(data, kws)
    177 
    178         super(Builder, self).__init__(**kws)

C:\Anaconda3\lib\site-packages\bokeh\charts\_builder.py in _setup_attrs(self, data, kws)
    205             elif isinstance(attr, str) or isinstance(attr, list):
    206                 self.attributes[attr_name] = self.default_attributes[attr_name].clone()
--> 207                 self.attributes[attr_name].setup(data=source, columns=attr)
    208 
    209             else:

C:\Anaconda3\lib\site-packages\bokeh\charts\_attributes.py in setup(self, data, columns)
    107 
    108             if columns is not None:
--> 109                 self.set_columns(columns)
    110 
    111         if self.columns is not None and self.data is not None:

C:\Anaconda3\lib\site-packages\bokeh\charts\_attributes.py in set_columns(self, columns)
    100             # assume this is now the iterable at this point
    101             self.iterable = columns
--> 102             self._setup_default()
    103 
    104     def setup(self, data=None, columns=None):

C:\Anaconda3\lib\site-packages\bokeh\charts\_attributes.py in _setup_default(self)
     68 
     69     def _setup_default(self):
---> 70         self.default = next(self._setup_iterable())
     71 
     72     def _setup_iterable(self):

StopIteration: 

In [2]:
from collections import OrderedDict

import pandas as pd

from bokeh._legacy_charts import Bar, output_file, show
from bokeh.sampledata.olympics2014 import data

df = pd.io.json.json_normalize(data['data'])

# filter by countries with at least one medal and sort
df = df[df['medals.total'] > 0]
df = df.sort("medals.total", ascending=False)

# get the countries and we group the data by medal type
countries = df.abbr.values.tolist()
gold = df['medals.gold'].astype(float).values
silver = df['medals.silver'].astype(float).values
bronze = df['medals.bronze'].astype(float).values

# build a dict containing the grouped data
medals = OrderedDict(bronze=bronze, silver=silver, gold=gold)

# any of the following commented are also alid Bar inputs
#medals = pd.DataFrame(medals)
#medals = list(medals.values())

output_file("stacked_bar.html")

bar = Bar(medals, countries, title="Stacked bars", stacked=True)

show(bar)


C:\Anaconda3\lib\site-packages\IPython\kernel\__main__.py:12: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
C:\Anaconda3\lib\site-packages\bokeh\_legacy_charts\_chart.py:92: UserWarning: Instantiating a Legacy Chart from bokeh._legacy_charts
  warn("Instantiating a Legacy Chart from bokeh._legacy_charts")

In [ ]: