Lets prepare the data for analysis some more


In [1]:
import pandas as pd
from pandas import Series, DataFrame, Panel
import pickle

In [2]:
pypf = pd.read_pickle('.././data/pickle/pypf.pkl')
pypop = pd.read_pickle('.././data/pickle/pypop.pkl')
df = pd.merge(pypf, pypop)

In [3]:
mesocorrection = pd.read_csv('.././data/eng/mesocorrection.csv')
mesocorrection['Year'] = pd.to_datetime(mesocorrection['Year'])
mesocorrection.index = mesocorrection.Year
mesocorrection_lookup = mesocorrection.to_dict()
mesocorrection_lookup = mesocorrection_lookup['Meso correction factor']

In [4]:
df_male = df[df['Sex'] == 'Male']

standpop08 = df_male[df_male['Year'] == pd.to_datetime('2008')]

wholeref = Series(standpop08.Population.values, index=standpop08.Agegroup).to_dict()

neref = Series(standpop08[standpop08['Region'] == 'NORTH EAST'].Population.values, index=standpop08[standpop08['Region'] == 'NORTH EAST'].Agegroup).to_dict()
yhref = Series(standpop08[standpop08['Region'] == 'YORKSHIRE AND THE HUMBER'].Population.values, index=standpop08[standpop08['Region'] == 'YORKSHIRE AND THE HUMBER'].Agegroup).to_dict()
nwref = Series(standpop08[standpop08['Region'] == 'NORTH WEST'].Population.values, index=standpop08[standpop08['Region'] == 'NORTH WEST'].Agegroup).to_dict()
emref = Series(standpop08[standpop08['Region'] == 'EAST MIDLANDS'].Population.values, index=standpop08[standpop08['Region'] == 'EAST MIDLANDS'].Agegroup).to_dict()
wmref = Series(standpop08[standpop08['Region'] == 'WEST MIDLANDS'].Population.values, index=standpop08[standpop08['Region'] == 'WEST MIDLANDS'].Agegroup).to_dict()
eeref = Series(standpop08[standpop08['Region'] == 'EAST'].Population.values, index=standpop08[standpop08['Region'] == 'EAST'].Agegroup).to_dict()
seref = Series(standpop08[standpop08['Region'] == 'SOUTH EAST'].Population.values, index=standpop08[standpop08['Region'] == 'SOUTH EAST'].Agegroup).to_dict()
swref = Series(standpop08[standpop08['Region'] == 'SOUTH WEST'].Population.values, index=standpop08[standpop08['Region'] == 'SOUTH WEST'].Agegroup).to_dict()
wwref = Series(standpop08[standpop08['Region'] == 'WALES'].Population.values, index=standpop08[standpop08['Region'] == 'WALES'].Agegroup).to_dict()
llref = Series(standpop08[standpop08['Region'] == 'LONDON'].Population.values, index=standpop08[standpop08['Region'] == 'LONDON'].Agegroup).to_dict()

ref_list = (neref, yhref, nwref, emref, wmref, eeref, seref, swref, wwref, llref)


ne = df_male[df_male['Region'] == 'NORTH EAST']
yh = df_male[df_male['Region'] == 'YORKSHIRE AND THE HUMBER']
nw = df_male[df_male['Region'] == 'NORTH WEST']
em = df_male[df_male['Region'] == 'EAST MIDLANDS']
wm = df_male[df_male['Region'] == 'WEST MIDLANDS']
ee = df_male[df_male['Region'] == 'EAST']
se = df_male[df_male['Region'] == 'SOUTH EAST']
sw = df_male[df_male['Region'] == 'SOUTH WEST']
ww = df_male[df_male['Region'] == 'WALES']
ll = df_male[df_male['Region'] == 'LONDON']

reg_list = (ne, yh, nw, em, wm, ee, se, sw, ww, ll)


for i, item in enumerate(reg_list):
    reg_list[i]['2008 population'] = reg_list[i]['Agegroup'].map(lambda x: ref_list[i][x] )
    
df1 = pd.concat(reg_list)

df_female = df[df['Sex'] == 'Female']

standpop08 = df_female[df_female['Year'] == pd.to_datetime('2008')]

neref = Series(standpop08[standpop08['Region'] == 'NORTH EAST'].Population.values, index=standpop08[standpop08['Region'] == 'NORTH EAST'].Agegroup).to_dict()
yhref = Series(standpop08[standpop08['Region'] == 'YORKSHIRE AND THE HUMBER'].Population.values, index=standpop08[standpop08['Region'] == 'YORKSHIRE AND THE HUMBER'].Agegroup).to_dict()
nwref = Series(standpop08[standpop08['Region'] == 'NORTH WEST'].Population.values, index=standpop08[standpop08['Region'] == 'NORTH WEST'].Agegroup).to_dict()
emref = Series(standpop08[standpop08['Region'] == 'EAST MIDLANDS'].Population.values, index=standpop08[standpop08['Region'] == 'EAST MIDLANDS'].Agegroup).to_dict()
wmref = Series(standpop08[standpop08['Region'] == 'WEST MIDLANDS'].Population.values, index=standpop08[standpop08['Region'] == 'WEST MIDLANDS'].Agegroup).to_dict()
eeref = Series(standpop08[standpop08['Region'] == 'EAST'].Population.values, index=standpop08[standpop08['Region'] == 'EAST'].Agegroup).to_dict()
seref = Series(standpop08[standpop08['Region'] == 'SOUTH EAST'].Population.values, index=standpop08[standpop08['Region'] == 'SOUTH EAST'].Agegroup).to_dict()
swref = Series(standpop08[standpop08['Region'] == 'SOUTH WEST'].Population.values, index=standpop08[standpop08['Region'] == 'SOUTH WEST'].Agegroup).to_dict()
wwref = Series(standpop08[standpop08['Region'] == 'WALES'].Population.values, index=standpop08[standpop08['Region'] == 'WALES'].Agegroup).to_dict()
llref = Series(standpop08[standpop08['Region'] == 'LONDON'].Population.values, index=standpop08[standpop08['Region'] == 'LONDON'].Agegroup).to_dict()

ref_list = (neref, yhref, nwref, emref, wmref, eeref, seref, swref, wwref, llref)


ne = df_female[df_female['Region'] == 'NORTH EAST']
yh = df_female[df_female['Region'] == 'YORKSHIRE AND THE HUMBER']
nw = df_female[df_female['Region'] == 'NORTH WEST']
em = df_female[df_female['Region'] == 'EAST MIDLANDS']
wm = df_female[df_female['Region'] == 'WEST MIDLANDS']
ee = df_female[df_female['Region'] == 'EAST']
se = df_female[df_female['Region'] == 'SOUTH EAST']
sw = df_female[df_female['Region'] == 'SOUTH WEST']
ww = df_female[df_female['Region'] == 'WALES']
ll = df_female[df_female['Region'] == 'LONDON']


reg_list = (ne, yh, nw, em, wm, ee, se, sw, ww, ll)

for i, item in enumerate(reg_list):
    reg_list[i]['2008 population'] = reg_list[i]['Agegroup'].map(lambda x: ref_list[i][x] )
    
df2 = pd.concat(reg_list)

df_list = [df1, df2]
df = pd.concat(df_list)

In [5]:
df['Rate per 100,000 population'] = (df['Deaths'] / df['Population']) * 100000
#df1 = df[df['Agegroup'] != 'ALL AGES'] #lets throw away all ages rows
df['Estimated deaths age standardised to 2008 population'] = (df['2008 population'] / 100000) * df['Rate per 100,000 population']

In [6]:
df['Corrected Meso Deaths'] = (df['Estimated deaths age standardised to 2008 population'] / df['Year'].map(lambda x: mesocorrection_lookup[x])) * 100

In [7]:
df['Corrected Meso Deaths per 100,000 (standardised)'] = (df['Corrected Meso Deaths'] / df['2008 population']) * 100000
df['Rate per 100,000 (standardised)'] = (df['Estimated deaths age standardised to 2008 population'] / df['2008 population']) * 100000

In [9]:
df.to_pickle('.././data/pickle/pypf_prep.pkl')

In [8]: