In [1]:
import pandas as pd
from pandas import Series, DataFrame, Panel
import pickle

In [2]:
pypf = pd.read_pickle('./data/pickle/pypf.pkl')
pypop = pd.read_pickle('./data/pickle/pypop.pkl')
df = pd.merge(pypf, pypop)

In [3]:
mesocorrection = pd.read_csv('./data/eng/mesocorrection.csv')
mesocorrection['Year'] = pd.to_datetime(mesocorrection['Year'])
mesocorrection_lookup = mesocorrection.to_dict()
mesocorrection_lookup


Out[3]:
{'Meso correction factor': {0: 56.0,
  1: 59.600000000000001,
  2: 59.899999999999999,
  3: 63.299999999999997,
  4: 59.399999999999999,
  5: 59.0,
  6: 56.0,
  7: 53.0,
  8: 54.0,
  9: 60.899999999999999,
  10: 60.799999999999997,
  11: 55.200000000000003,
  12: 60.299999999999997,
  13: 57.200000000000003,
  14: 54.399999999999999,
  15: 52.700000000000003,
  16: 52.399999999999999,
  17: 52.200000000000003,
  18: 52.5,
  19: 40.799999999999997,
  20: 36.799999999999997,
  21: 35.200000000000003,
  22: 36.5,
  23: 35.799999999999997,
  24: 33.700000000000003,
  25: 34.100000000000001,
  26: 26.899999999999999,
  27: 100.0,
  28: 100.0,
  29: 100.0,
  30: 100.0,
  31: 100.0,
  32: 100.0,
  33: 100.0,
  34: 100.0,
  35: 100.0,
  36: 100.0,
  37: 100.0,
  38: 100.0},
 'Year': {0: Timestamp('1974-01-01 00:00:00', tz=None),
  1: Timestamp('1975-01-01 00:00:00', tz=None),
  2: Timestamp('1976-01-01 00:00:00', tz=None),
  3: Timestamp('1977-01-01 00:00:00', tz=None),
  4: Timestamp('1978-01-01 00:00:00', tz=None),
  5: Timestamp('1979-01-01 00:00:00', tz=None),
  6: Timestamp('1980-01-01 00:00:00', tz=None),
  7: Timestamp('1981-01-01 00:00:00', tz=None),
  8: Timestamp('1982-01-01 00:00:00', tz=None),
  9: Timestamp('1983-01-01 00:00:00', tz=None),
  10: Timestamp('1984-01-01 00:00:00', tz=None),
  11: Timestamp('1985-01-01 00:00:00', tz=None),
  12: Timestamp('1986-01-01 00:00:00', tz=None),
  13: Timestamp('1987-01-01 00:00:00', tz=None),
  14: Timestamp('1988-01-01 00:00:00', tz=None),
  15: Timestamp('1989-01-01 00:00:00', tz=None),
  16: Timestamp('1990-01-01 00:00:00', tz=None),
  17: Timestamp('1991-01-01 00:00:00', tz=None),
  18: Timestamp('1992-01-01 00:00:00', tz=None),
  19: Timestamp('1993-01-01 00:00:00', tz=None),
  20: Timestamp('1994-01-01 00:00:00', tz=None),
  21: Timestamp('1995-01-01 00:00:00', tz=None),
  22: Timestamp('1996-01-01 00:00:00', tz=None),
  23: Timestamp('1997-01-01 00:00:00', tz=None),
  24: Timestamp('1998-01-01 00:00:00', tz=None),
  25: Timestamp('1999-01-01 00:00:00', tz=None),
  26: Timestamp('2000-01-01 00:00:00', tz=None),
  27: Timestamp('2001-01-01 00:00:00', tz=None),
  28: Timestamp('2002-01-01 00:00:00', tz=None),
  29: Timestamp('2003-01-01 00:00:00', tz=None),
  30: Timestamp('2004-01-01 00:00:00', tz=None),
  31: Timestamp('2005-01-01 00:00:00', tz=None),
  32: Timestamp('2006-01-01 00:00:00', tz=None),
  33: Timestamp('2007-01-01 00:00:00', tz=None),
  34: Timestamp('2008-01-01 00:00:00', tz=None),
  35: Timestamp('2009-01-01 00:00:00', tz=None),
  36: Timestamp('2010-01-01 00:00:00', tz=None),
  37: Timestamp('2011-01-01 00:00:00', tz=None),
  38: Timestamp('2012-01-01 00:00:00', tz=None)}}

In [4]:
df_male = df[df['Sex'] == 'Male']

standpop08 = df_male[df_male['Year'] == pd.to_datetime('2008')]

wholeref = Series(standpop08.Population.values, index=standpop08.Agegroup).to_dict()

neref = Series(standpop08[standpop08['Region'] == 'NORTH EAST'].Population.values, index=standpop08[standpop08['Region'] == 'NORTH EAST'].Agegroup).to_dict()
yhref = Series(standpop08[standpop08['Region'] == 'YORKSHIRE AND THE HUMBER'].Population.values, index=standpop08[standpop08['Region'] == 'YORKSHIRE AND THE HUMBER'].Agegroup).to_dict()
nwref = Series(standpop08[standpop08['Region'] == 'NORTH WEST'].Population.values, index=standpop08[standpop08['Region'] == 'NORTH WEST'].Agegroup).to_dict()
emref = Series(standpop08[standpop08['Region'] == 'EAST MIDLANDS'].Population.values, index=standpop08[standpop08['Region'] == 'EAST MIDLANDS'].Agegroup).to_dict()
wmref = Series(standpop08[standpop08['Region'] == 'WEST MIDLANDS'].Population.values, index=standpop08[standpop08['Region'] == 'WEST MIDLANDS'].Agegroup).to_dict()
eeref = Series(standpop08[standpop08['Region'] == 'EAST'].Population.values, index=standpop08[standpop08['Region'] == 'EAST'].Agegroup).to_dict()
seref = Series(standpop08[standpop08['Region'] == 'SOUTH EAST'].Population.values, index=standpop08[standpop08['Region'] == 'SOUTH EAST'].Agegroup).to_dict()
swref = Series(standpop08[standpop08['Region'] == 'SOUTH WEST'].Population.values, index=standpop08[standpop08['Region'] == 'SOUTH WEST'].Agegroup).to_dict()
wwref = Series(standpop08[standpop08['Region'] == 'WALES'].Population.values, index=standpop08[standpop08['Region'] == 'WALES'].Agegroup).to_dict()
llref = Series(standpop08[standpop08['Region'] == 'LONDON'].Population.values, index=standpop08[standpop08['Region'] == 'LONDON'].Agegroup).to_dict()

ref_list = (neref, yhref, nwref, emref, wmref, eeref, seref, swref, wwref, llref)


ne = df_male[df_male['Region'] == 'NORTH EAST']
yh = df_male[df_male['Region'] == 'YORKSHIRE AND THE HUMBER']
nw = df_male[df_male['Region'] == 'NORTH WEST']
em = df_male[df_male['Region'] == 'EAST MIDLANDS']
wm = df_male[df_male['Region'] == 'WEST MIDLANDS']
ee = df_male[df_male['Region'] == 'EAST']
se = df_male[df_male['Region'] == 'SOUTH EAST']
sw = df_male[df_male['Region'] == 'SOUTH WEST']
ww = df_male[df_male['Region'] == 'WALES']
ll = df_male[df_male['Region'] == 'LONDON']

reg_list = (ne, yh, nw, em, wm, ee, se, sw, ww, ll)


for i, item in enumerate(reg_list):
    reg_list[i]['2008 population'] = reg_list[i]['Agegroup'].map(lambda x: ref_list[i][x] )
    
df1 = pd.concat(reg_list)

df_female = df[df['Sex'] == 'Female']

standpop08 = df_female[df_female['Year'] == pd.to_datetime('2008')]

neref = Series(standpop08[standpop08['Region'] == 'NORTH EAST'].Population.values, index=standpop08[standpop08['Region'] == 'NORTH EAST'].Agegroup).to_dict()
yhref = Series(standpop08[standpop08['Region'] == 'YORKSHIRE AND THE HUMBER'].Population.values, index=standpop08[standpop08['Region'] == 'YORKSHIRE AND THE HUMBER'].Agegroup).to_dict()
nwref = Series(standpop08[standpop08['Region'] == 'NORTH WEST'].Population.values, index=standpop08[standpop08['Region'] == 'NORTH WEST'].Agegroup).to_dict()
emref = Series(standpop08[standpop08['Region'] == 'EAST MIDLANDS'].Population.values, index=standpop08[standpop08['Region'] == 'EAST MIDLANDS'].Agegroup).to_dict()
wmref = Series(standpop08[standpop08['Region'] == 'WEST MIDLANDS'].Population.values, index=standpop08[standpop08['Region'] == 'WEST MIDLANDS'].Agegroup).to_dict()
eeref = Series(standpop08[standpop08['Region'] == 'EAST'].Population.values, index=standpop08[standpop08['Region'] == 'EAST'].Agegroup).to_dict()
seref = Series(standpop08[standpop08['Region'] == 'SOUTH EAST'].Population.values, index=standpop08[standpop08['Region'] == 'SOUTH EAST'].Agegroup).to_dict()
swref = Series(standpop08[standpop08['Region'] == 'SOUTH WEST'].Population.values, index=standpop08[standpop08['Region'] == 'SOUTH WEST'].Agegroup).to_dict()
wwref = Series(standpop08[standpop08['Region'] == 'WALES'].Population.values, index=standpop08[standpop08['Region'] == 'WALES'].Agegroup).to_dict()
llref = Series(standpop08[standpop08['Region'] == 'LONDON'].Population.values, index=standpop08[standpop08['Region'] == 'LONDON'].Agegroup).to_dict()

ref_list = (neref, yhref, nwref, emref, wmref, eeref, seref, swref, wwref, llref)


ne = df_female[df_female['Region'] == 'NORTH EAST']
yh = df_female[df_female['Region'] == 'YORKSHIRE AND THE HUMBER']
nw = df_female[df_female['Region'] == 'NORTH WEST']
em = df_female[df_female['Region'] == 'EAST MIDLANDS']
wm = df_female[df_female['Region'] == 'WEST MIDLANDS']
ee = df_female[df_female['Region'] == 'EAST']
se = df_female[df_female['Region'] == 'SOUTH EAST']
sw = df_female[df_female['Region'] == 'SOUTH WEST']
ww = df_female[df_female['Region'] == 'WALES']
ll = df_female[df_female['Region'] == 'LONDON']


reg_list = (ne, yh, nw, em, wm, ee, se, sw, ww, ll)

for i, item in enumerate(reg_list):
    reg_list[i]['2008 population'] = reg_list[i]['Agegroup'].map(lambda x: ref_list[i][x] )
    
df2 = pd.concat(reg_list)

df_list = [df1, df2]
df = pd.concat(df_list)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-1005ab90c015> in <module>()
     64 se = df_female[df_female['Region'] == 'SOUTH EAST']
     65 sw = df_female[df_female['Region'] == 'SOUTH WEST']
---> 66 ww = df_female[df_ff1emale['Region'] == 'WALES']
     67 ll = df_female[df_female['Region'] == 'LONDON']
     68 

NameError: name 'df_ff1emale' is not defined

In [ ]:
df['Rate per 100,000 population'] = (df['Deaths'] / df['Population']) * 100000
df1 = df[df['Agegroup'] != 'ALL AGES'] #lets throw away all ages rows
df1['Estimated deaths age standardised to 2008 population'] = (df1['2008 population'] / 100000) * df1['Rate per 100,000 population']

In [ ]:
df1['Corrected Meso Deaths'] = (df1['Estimated deaths age standardised to 2008 population'] / df1['Year'].map(lambda x: mesocorrection_lookup[x])) * 100