In [14]:
%matplotlib inline

sns.set_style('white')
sns.set_context('poster')
cmap = sns.diverging_palette(220, 10, as_cmap=True)

In [15]:
# Get registration application data
res = requests.get('https://www.performance.service.gov.uk/data/register-to-vote/volumetrics?duration=30&collect=count%3Asum&group_by=value&period=day&filter_by=metricName%3Aage_band')
# res.json()['data']

In [16]:
def munge_reg_apps(res):
    """ Transorm JSON into DataFrame-compabible format. """
    
    apps = DataFrame()
    data = res.json()['data']
    
    for age_group in data:
        age_df = DataFrame(age_group['values'])
        age_df['age_group'] = age_group['value']
        apps = apps.append(age_df)
    
    apps[['_end_at', '_start_at']] = apps[['_end_at', '_start_at']].apply(pd.to_datetime)
    apps = apps.rename(columns={'_end_at': 'end_date', '_start_at': 'start_date', 'count:sum': 'total'})
    apps = apps.drop(labels=['_count'], axis=1)
    apps['age_group'] = apps.age_group.astype('category')

    return apps.reset_index(drop=True)

In [17]:
apps = munge_reg_apps(res)
apps.head()


Out[17]:
end_date start_date total age_group
0 2016-05-12 2016-05-11 5.0 14-15
1 2016-05-13 2016-05-12 14.0 14-15
2 2016-05-14 2016-05-13 4.0 14-15
3 2016-05-15 2016-05-14 13.0 14-15
4 2016-05-16 2016-05-15 4.0 14-15

Total registration apps by age group


In [18]:
apps.groupby('age_group').sum()


Out[18]:
total
age_group
14-15 313.0
16-17 23840.0
18-24 678106.0
25-34 881919.0
35-44 517332.0
45-54 369395.0
55-64 190378.0
65-74 89503.0
> 75 34393.0
not_provided 26.0

In [19]:
apps.groupby('age_group').sum().plot(kind='bar', title='Registration applications by age group');



In [20]:
apps.groupby('age_group')[['total']].describe().unstack()


/Users/Andrew/anaconda/envs/dev/lib/python3.5/site-packages/numpy/lib/function_base.py:3823: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)
Out[20]:
total
count mean std min 25% 50% 75% max
age_group
14-15 30.0 10.433333 9.719101 2.0 4.25 7.0 12.75 53.0
16-17 30.0 794.666667 607.136372 222.0 478.75 670.0 810.00 3336.0
18-24 30.0 22603.533333 25533.046481 6222.0 9182.00 13852.0 19222.25 128670.0
25-34 30.0 29397.300000 32954.802727 7626.0 12599.00 18186.0 23892.75 169916.0
35-44 30.0 17244.400000 19123.224552 3823.0 8736.75 11809.0 13198.75 99721.0
45-54 30.0 12313.166667 13325.245628 2709.0 6886.25 8484.0 10144.00 72054.0
55-64 30.0 6345.933333 6025.429411 1483.0 3398.75 4917.5 6297.75 34152.0
65-74 30.0 2983.433333 2184.714606 698.0 1697.50 2729.0 3681.25 12851.0
> 75 30.0 1146.433333 782.079354 296.0 559.50 1186.0 1379.00 4523.0
not_provided 14.0 1.857143 1.833750 1.0 NaN NaN NaN 8.0

In [21]:
age_apps = apps.pivot(index='start_date', columns='age_group', values='total')
age_apps


Out[21]:
age_group 14-15 16-17 18-24 25-34 35-44 45-54 55-64 65-74 > 75 not_provided
start_date
2016-05-11 5.0 520.0 7021.0 13265.0 10408.0 8798.0 5128.0 2720.0 877.0 1.0
2016-05-12 14.0 416.0 6607.0 12377.0 9641.0 8381.0 4706.0 2619.0 878.0 NaN
2016-05-13 4.0 465.0 7832.0 11259.0 8585.0 7423.0 4371.0 2420.0 808.0 NaN
2016-05-14 13.0 268.0 6383.0 8929.0 6491.0 5673.0 3083.0 1659.0 446.0 NaN
2016-05-15 4.0 331.0 8791.0 11553.0 8440.0 7424.0 4099.0 2145.0 612.0 NaN
2016-05-16 7.0 659.0 13180.0 18447.0 13292.0 11114.0 6749.0 4118.0 1305.0 1.0
2016-05-17 12.0 585.0 12731.0 17080.0 12142.0 10093.0 6213.0 3752.0 1337.0 NaN
2016-05-18 10.0 681.0 13247.0 17831.0 12184.0 10161.0 6350.0 3774.0 1416.0 NaN
2016-05-19 6.0 636.0 14457.0 18735.0 12539.0 9915.0 5644.0 3102.0 1192.0 NaN
2016-05-20 6.0 749.0 14978.0 17229.0 11046.0 8318.0 4712.0 2738.0 1101.0 NaN
2016-05-21 7.0 528.0 10893.0 15447.0 8354.0 5731.0 3017.0 1555.0 538.0 1.0
2016-05-22 4.0 544.0 12718.0 16825.0 9192.0 6295.0 3254.0 1400.0 495.0 2.0
2016-05-23 4.0 946.0 18959.0 25871.0 15139.0 10781.0 6326.0 3469.0 1536.0 NaN
2016-05-24 7.0 708.0 16322.0 21147.0 12279.0 8587.0 5039.0 2776.0 1250.0 1.0
2016-05-25 8.0 724.0 17486.0 23318.0 12813.0 8817.0 5130.0 2794.0 1209.0 1.0
2016-05-26 7.0 540.0 12934.0 17925.0 9867.0 6757.0 3988.0 2370.0 1180.0 1.0
2016-05-27 3.0 420.0 9275.0 12287.0 6881.0 4904.0 2959.0 1813.0 966.0 NaN
2016-05-28 2.0 290.0 6732.0 8564.0 4458.0 3093.0 1808.0 866.0 346.0 NaN
2016-05-29 4.0 222.0 6222.0 7626.0 3823.0 2709.0 1483.0 698.0 296.0 NaN
2016-05-30 4.0 315.0 9151.0 11575.0 5672.0 4085.0 2228.0 1079.0 369.0 NaN
2016-05-31 19.0 891.0 18457.0 26342.0 14075.0 9973.0 5690.0 2843.0 1329.0 NaN
2016-06-01 10.0 712.0 18033.0 23889.0 12919.0 9125.0 5335.0 2870.0 1393.0 NaN
2016-06-02 5.0 778.0 19310.0 23894.0 12347.0 8236.0 4796.0 2573.0 1288.0 1.0
2016-06-03 53.0 1535.0 50150.0 67561.0 35432.0 21607.0 9713.0 3998.0 1513.0 2.0
2016-06-04 15.0 811.0 20760.0 21449.0 11476.0 7274.0 3373.0 1430.0 542.0 2.0
2016-06-05 12.0 807.0 24672.0 23863.0 11160.0 7309.0 3476.0 1169.0 464.0 NaN
2016-06-06 21.0 1779.0 71954.0 74417.0 35754.0 23711.0 11664.0 5045.0 1985.0 2.0
2016-06-07 23.0 3336.0 128670.0 169916.0 99721.0 72054.0 34152.0 12851.0 4523.0 8.0
2016-06-08 14.0 1448.0 54496.0 79649.0 51334.0 34195.0 14140.0 4736.0 1657.0 2.0
2016-06-09 10.0 1196.0 45685.0 63649.0 39868.0 26852.0 11752.0 4121.0 1542.0 1.0

In [22]:
# Breaks plot otherwise :(
age_apps.index = age_apps.index.astype('str')

Registration applications over time


In [23]:
age_apps.plot(title='Registration applications by age over time');


Cumulative applications over time


In [24]:
age_apps.cumsum().plot(title='Cumulative applications by age over time');


Log applications by age over time


In [25]:
ax = sns.heatmap(
    age_apps.drop(labels=['14-15', '16-17', 'not_provided'], axis=1).apply(lambda s: np.log(s)).T, 
    cmap=cmap, 
    square=True,
    linewidths=2,
    cbar_kws={'shrink': 0.4},
)

ax.set_title('Log applications by age over time');


Cumulative applications by age over time


In [26]:
ax = sns.heatmap(
    age_apps.cumsum().drop(labels=['14-15', '16-17', 'not_provided'], axis=1).T, 
    cmap=cmap, 
    square=True,
    linewidths=2,
    cbar_kws={'shrink': 0.4},
)

ax.set_title('Cumulative applications by age over time');



In [ ]: