In [1]:
import pandas as pd
from pandas import DataFrame
import pickle
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
%matplotlib inline
In [2]:
df = pd.read_pickle('.././data/pickle/pypf_prep.pkl') # load data
In [3]:
df = df[df['Agegroup'] != 'ALL AGES'] # lets throw away all ages rows
agemidpoint = {'ALL AGES': 100, 'UNDER 25': 20, '25-34':30, '35-44':40,
'45-54':50, '55-64':60, '65-74':70, '75-84':80, '85+':90}
df['Agemidpoint'] = df.Agegroup.map(lambda x: agemidpoint.get(x)) # lets give everyone an age mid point
df['YearOfDeath'] = df.Year.map(lambda x: x.year)
df['dob'] = df['YearOfDeath'] - df['Agemidpoint'] # dob is appoximately year of death - age midpoint
df['birthcohort'] = pd.cut(df.dob, [1880, 1890, 1900, 1910, 1920, 1930,
1940, 1950, 1960, 1970, 1980, 1990, 2000],
labels=['1880-1890', '1890-1900', '1900-1910', '1910-1920',
'1920-1930','1930-1940','1940-1950','1950-1960',
'1960-1970', '1970-1980', '1980-1990', '1990-2000']) # add birth cohorts
In [4]:
mask = {}
mask['male-ipf-cohort'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1980-1990')")
mask['male-meso-cohort'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1980-1990')")
mask['male-asb-cohort'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'Asbestosis') & (df['birthcohort'] < '1980-1990')")
mask['male-ipf-cohort-ne'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1980-1990') & (df['Region'] == 'NORTH EAST')")
mask['male-meso-cohort-ne'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1980-1990')& (df['Region'] == 'NORTH EAST')")
def plot_cohort_year(df, mask, title):
cohort_data = {}
df = df[mask]
for cohort in df['birthcohort'].unique():
grp = df[df['birthcohort'] == cohort].groupby('Year')
cohort_data[cohort] = grp['Rate per 100,000 population'].mean()
ax = pd.DataFrame(cohort_data).plot(figsize=(7,7), title=title)
ax.set_ylabel("Rate per 100,000 population")
plt.show()
def plot_cohort_age(df, mask, title):
cohort_data = {}
df = df[mask]
for cohort in df['birthcohort'].unique():
grp = df[df['birthcohort'] == cohort].groupby('Agemidpoint')
cohort_data[cohort] = grp['Rate per 100,000 population'].mean()
ax = pd.DataFrame(cohort_data).plot(figsize=(7,7), title=title)
ax.set_ylabel("Rate per 100,000 population")
plt.show()
In [5]:
# plot_cohort_year(df, mask['male-ipf-cohort'], 'IPF Mortality trends in England and Wales for Males \n by birth cohort')
# plot_cohort_year(df, mask['male-meso-cohort'], 'Mesothelioma Mortality trends in England and Wales for Males \n by birth cohort')
In [14]:
plot_cohort_age(df, mask['male-ipf-cohort'], 'IPF Mortality trends in England and Wales for Males \n by birth cohort')
In [15]:
plot_cohort_age(df, mask['male-meso-cohort'], 'Mesothelioma Mortality trends in England and Wales for Males \n by birth cohort')
In [16]:
plot_cohort_age(df, mask['male-asb-cohort'], 'Asbestosis Mortality trends in England and Wales for Males \n by birth cohort')
In [12]:
# plot_cohort_age(df, mask['male-meso-cohort-ne'], 'Mesothelioma Mortality trends in England and Wales for Males \n by birth cohort \n for the North East')
# plot_cohort_age(df, mask['male-ipf-cohort-ne'], 'IPF Mortality trends in England and Wales for Males \n by birth cohort \n for the North East')
In [ ]: