In [1]:
import pandas as pd
from pandas import Series, DataFrame, Panel
import pickle
In [2]:
pypf = pd.read_pickle('./data/pickle/pypf.pkl')
pypop = pd.read_pickle('./data/pickle/pypop.pkl')
df = pd.merge(pypf, pypop)
In [3]:
mesocorrection = pd.read_csv('./data/eng/mesocorrection.csv')
mesocorrection['Year'] = pd.to_datetime(mesocorrection['Year'])
mesocorrection_lookup = mesocorrection.to_dict()
mesocorrection_lookup
Out[3]:
In [4]:
df_male = df[df['Sex'] == 'Male']
standpop08 = df_male[df_male['Year'] == pd.to_datetime('2008')]
wholeref = Series(standpop08.Population.values, index=standpop08.Agegroup).to_dict()
neref = Series(standpop08[standpop08['Region'] == 'NORTH EAST'].Population.values, index=standpop08[standpop08['Region'] == 'NORTH EAST'].Agegroup).to_dict()
yhref = Series(standpop08[standpop08['Region'] == 'YORKSHIRE AND THE HUMBER'].Population.values, index=standpop08[standpop08['Region'] == 'YORKSHIRE AND THE HUMBER'].Agegroup).to_dict()
nwref = Series(standpop08[standpop08['Region'] == 'NORTH WEST'].Population.values, index=standpop08[standpop08['Region'] == 'NORTH WEST'].Agegroup).to_dict()
emref = Series(standpop08[standpop08['Region'] == 'EAST MIDLANDS'].Population.values, index=standpop08[standpop08['Region'] == 'EAST MIDLANDS'].Agegroup).to_dict()
wmref = Series(standpop08[standpop08['Region'] == 'WEST MIDLANDS'].Population.values, index=standpop08[standpop08['Region'] == 'WEST MIDLANDS'].Agegroup).to_dict()
eeref = Series(standpop08[standpop08['Region'] == 'EAST'].Population.values, index=standpop08[standpop08['Region'] == 'EAST'].Agegroup).to_dict()
seref = Series(standpop08[standpop08['Region'] == 'SOUTH EAST'].Population.values, index=standpop08[standpop08['Region'] == 'SOUTH EAST'].Agegroup).to_dict()
swref = Series(standpop08[standpop08['Region'] == 'SOUTH WEST'].Population.values, index=standpop08[standpop08['Region'] == 'SOUTH WEST'].Agegroup).to_dict()
wwref = Series(standpop08[standpop08['Region'] == 'WALES'].Population.values, index=standpop08[standpop08['Region'] == 'WALES'].Agegroup).to_dict()
llref = Series(standpop08[standpop08['Region'] == 'LONDON'].Population.values, index=standpop08[standpop08['Region'] == 'LONDON'].Agegroup).to_dict()
ref_list = (neref, yhref, nwref, emref, wmref, eeref, seref, swref, wwref, llref)
ne = df_male[df_male['Region'] == 'NORTH EAST']
yh = df_male[df_male['Region'] == 'YORKSHIRE AND THE HUMBER']
nw = df_male[df_male['Region'] == 'NORTH WEST']
em = df_male[df_male['Region'] == 'EAST MIDLANDS']
wm = df_male[df_male['Region'] == 'WEST MIDLANDS']
ee = df_male[df_male['Region'] == 'EAST']
se = df_male[df_male['Region'] == 'SOUTH EAST']
sw = df_male[df_male['Region'] == 'SOUTH WEST']
ww = df_male[df_male['Region'] == 'WALES']
ll = df_male[df_male['Region'] == 'LONDON']
reg_list = (ne, yh, nw, em, wm, ee, se, sw, ww, ll)
for i, item in enumerate(reg_list):
reg_list[i]['2008 population'] = reg_list[i]['Agegroup'].map(lambda x: ref_list[i][x] )
df1 = pd.concat(reg_list)
df_female = df[df['Sex'] == 'Female']
standpop08 = df_female[df_female['Year'] == pd.to_datetime('2008')]
neref = Series(standpop08[standpop08['Region'] == 'NORTH EAST'].Population.values, index=standpop08[standpop08['Region'] == 'NORTH EAST'].Agegroup).to_dict()
yhref = Series(standpop08[standpop08['Region'] == 'YORKSHIRE AND THE HUMBER'].Population.values, index=standpop08[standpop08['Region'] == 'YORKSHIRE AND THE HUMBER'].Agegroup).to_dict()
nwref = Series(standpop08[standpop08['Region'] == 'NORTH WEST'].Population.values, index=standpop08[standpop08['Region'] == 'NORTH WEST'].Agegroup).to_dict()
emref = Series(standpop08[standpop08['Region'] == 'EAST MIDLANDS'].Population.values, index=standpop08[standpop08['Region'] == 'EAST MIDLANDS'].Agegroup).to_dict()
wmref = Series(standpop08[standpop08['Region'] == 'WEST MIDLANDS'].Population.values, index=standpop08[standpop08['Region'] == 'WEST MIDLANDS'].Agegroup).to_dict()
eeref = Series(standpop08[standpop08['Region'] == 'EAST'].Population.values, index=standpop08[standpop08['Region'] == 'EAST'].Agegroup).to_dict()
seref = Series(standpop08[standpop08['Region'] == 'SOUTH EAST'].Population.values, index=standpop08[standpop08['Region'] == 'SOUTH EAST'].Agegroup).to_dict()
swref = Series(standpop08[standpop08['Region'] == 'SOUTH WEST'].Population.values, index=standpop08[standpop08['Region'] == 'SOUTH WEST'].Agegroup).to_dict()
wwref = Series(standpop08[standpop08['Region'] == 'WALES'].Population.values, index=standpop08[standpop08['Region'] == 'WALES'].Agegroup).to_dict()
llref = Series(standpop08[standpop08['Region'] == 'LONDON'].Population.values, index=standpop08[standpop08['Region'] == 'LONDON'].Agegroup).to_dict()
ref_list = (neref, yhref, nwref, emref, wmref, eeref, seref, swref, wwref, llref)
ne = df_female[df_female['Region'] == 'NORTH EAST']
yh = df_female[df_female['Region'] == 'YORKSHIRE AND THE HUMBER']
nw = df_female[df_female['Region'] == 'NORTH WEST']
em = df_female[df_female['Region'] == 'EAST MIDLANDS']
wm = df_female[df_female['Region'] == 'WEST MIDLANDS']
ee = df_female[df_female['Region'] == 'EAST']
se = df_female[df_female['Region'] == 'SOUTH EAST']
sw = df_female[df_female['Region'] == 'SOUTH WEST']
ww = df_female[df_female['Region'] == 'WALES']
ll = df_female[df_female['Region'] == 'LONDON']
reg_list = (ne, yh, nw, em, wm, ee, se, sw, ww, ll)
for i, item in enumerate(reg_list):
reg_list[i]['2008 population'] = reg_list[i]['Agegroup'].map(lambda x: ref_list[i][x] )
df2 = pd.concat(reg_list)
df_list = [df1, df2]
df = pd.concat(df_list)
In [ ]:
df['Rate per 100,000 population'] = (df['Deaths'] / df['Population']) * 100000
df1 = df[df['Agegroup'] != 'ALL AGES'] #lets throw away all ages rows
df1['Estimated deaths age standardised to 2008 population'] = (df1['2008 population'] / 100000) * df1['Rate per 100,000 population']
In [ ]:
df1['Corrected Meso Deaths'] = (df1['Estimated deaths age standardised to 2008 population'] / df1['Year'].map(lambda x: mesocorrection_lookup[x])) * 100