In [1]:
import pandas as pd
from pandas import DataFrame
import pickle
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import ScalarFormatter
import numpy as np
%matplotlib inline
In [2]:
df = pd.read_pickle('.././data/pickle/pypf_prep.pkl') # load data
In [3]:
df = df[df['Agegroup'] != 'ALL AGES'] # lets throw away all ages rows
agemidpoint = {'ALL AGES': 100, 'UNDER 25': 20, '25-34':30, '35-44':40,
'45-54':50, '55-64':60, '65-74':70, '75-84':80, '85+':90}
df['Agemidpoint'] = df.Agegroup.map(lambda x: agemidpoint.get(x)) # lets give everyone an age mid point
df['YearOfDeath'] = df.Year.map(lambda x: x.year)
df['dob'] = df['YearOfDeath'] - df['Agemidpoint'] # dob is appoximately year of death - age midpoint
df['birthcohort'] = pd.cut(df.dob, range(1880,2010,10),
labels=['1880-1889', '1890-1899', '1900-1909', '1910-1919',
'1920-1929','1930-1939','1940-1949','1950-1959',
'1960-1969', '1970-1979', '1980-1989', '1990-1999'],
right=False) # add birth cohorts
df['Rate per million population'] = df['Rate per 100,000 population'] * 10
In [4]:
mask = {}
mask['male-ipf-cohort'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1980-1989')")
mask['female-ipf-cohort'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1980-1989')")
mask['malefemale-ipf-cohort'] = eval("(df['Cause'] == 'IPF') & (df['birthcohort'] < '1980-1989')")
mask['male-ipf-cohort-pre-1940'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1940-1949')")
mask['female-ipf-cohort-pre-1940'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1940-1949')")
mask['male-meso-cohort'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1980-1989')")
mask['male-meso-cohort-pre-1940'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1940-1949')")
mask['female-meso-cohort'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1980-1989')")
mask['female-meso-cohort-pre-1940'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1940-1949')")
mask['male-asb-cohort'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'Asbestosis') & (df['birthcohort'] < '1980-1989')")
mask['female-asb-cohort'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'Asbestosis') & (df['birthcohort'] < '1980-1989')")
mask['male-ipf-cohort-ne'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1980-1989') & (df['Region'] == 'NORTH EAST')")
mask['male-meso-cohort-ne'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1980-1989')& (df['Region'] == 'NORTH EAST')")
def plot_cohort_year(df, mask, title):
cohort_data = {}
df = df[mask]
for cohort in df['birthcohort'].unique():
grp = df[df['birthcohort'] == cohort].groupby('Year')
cohort_data[cohort] = grp['Rate per million population'].mean()
ax = pd.DataFrame(cohort_data).plot(figsize=(7,7), title=title)
ax.set_ylabel("Rate per million")
ax.set_xlabel("Age")
ax.set_yscale("log", nonposy='clip')
ax.legend(loc='upper left', frameon=False)
# plt.ylim(ymin = 0.0)
ax.yaxis.set_major_formatter(ScalarFormatter()) # scale as a full number not a power
plt.grid(True)
plt.show()
# return cohort_data
def plot_cohort_age(df, mask, title):
cohort_data = {}
df = df[mask]
for cohort in df['birthcohort'].unique():
grp = df[df['birthcohort'] == cohort].groupby('Agemidpoint')
cohort_data[cohort] = grp['Rate per million population'].mean()
ax = pd.DataFrame(cohort_data).plot(figsize=(7,7), title=title)
ax.set_ylabel("Rate per million")
ax.set_xlabel("Age")
ax.set_yscale("log", nonposy='clip')
ax.legend(loc='upper left', frameon=False)
# plt.ylim(ymin = 0.0)
ax.yaxis.set_major_formatter(ScalarFormatter()) # scale as a full number not a power
plt.grid(True)
plt.savefig('.././fig/{}'.format(title))
plt.show()
# return cohort_data
def plot_cohort_age_region(df, mask, title):
cohort_data = {}
df = df[mask]
for region in df['Region'].unique():
print('\n{}\n'.format(region))
for cohort in df['birthcohort'].unique():
grp = df[df['birthcohort'] == cohort].groupby('Agemidpoint')
cohort_data[cohort] = grp['Rate per million population'].mean()
ax = pd.DataFrame(cohort_data).plot(figsize=(7,7), title=title)
ax.set_ylabel("Rate per million")
ax.set_xlabel("Age")
ax.set_yscale("log", nonposy='clip')
ax.legend(loc='upper left', frameon=False)
# plt.ylim(ymin = 0.0)
ax.yaxis.set_major_formatter(ScalarFormatter()) # scale as a full number not a power
plt.grid(True)
plt.savefig('.././fig/{}{}'.format(cohort, region))
plt.show()
In [5]:
# black and white mode
def black_and_white_mode():
from cycler import cycler
color_c = cycler('color', ['k'])
style_c = cycler('linestyle', ['-', '--', ':', '-.'])
markr_c = cycler('marker', ['', '.', 'o'])
c_cms = color_c * markr_c * style_c
c_csm = color_c * style_c * markr_c
plt.rc('axes', prop_cycle=c_cms)
In [6]:
# plot_cohort_year(df, mask['male-ipf-cohort'], 'IPF Mortality trends in England and Wales for Males \n by birth cohort')
# plot_cohort_year(df, mask['male-meso-cohort'], 'Mesothelioma Mortality trends in England and Wales for Males \n by birth cohort')
In [7]:
black_and_white_mode() # winter bts doesn't support colour
plot_cohort_age(df, mask['male-ipf-cohort'], 'IPF Death Rates in England and Wales for Males \n for ten birth cohorts')
In [8]:
plot_cohort_age(df, mask['female-ipf-cohort'], 'IPF Death Rates in England and Wales for Females \n for ten birth cohorts')
In [9]:
plot_cohort_age(df, mask['malefemale-ipf-cohort'], 'IPF Death Rates in England and Wales \nfor ten birth cohorts')
In [10]:
plot_cohort_age(df, mask['male-meso-cohort'], 'Mesothelioma Death Rates in England and Wales for Males \n for ten birth cohorts')
In [11]:
plot_cohort_age(df, mask['male-meso-cohort-pre-1940'], 'Mesothelioma Death Rates in England and Wales for Males \n for pre-1940 birth cohorts')
In [12]:
plot_cohort_age(df, mask['female-meso-cohort'], 'Mesothelioma Death Rates in England and Wales for Females \n for ten birth cohorts')
In [13]:
plot_cohort_age(df, mask['female-meso-cohort-pre-1940'], 'Mesothelioma Death Rates in England and Wales for Females \n for pre-1940 birth cohorts')
In [14]:
plot_cohort_age(df, mask['male-asb-cohort'], 'Asbestosis Death Rates in England and Wales for Males \n for ten birth cohorts')
In [15]:
plot_cohort_age(df, mask['female-asb-cohort'], 'Asbestosis Death Rates in England and Wales for Females \n for ten birth cohorts')
In [16]:
# plot_cohort_age(df, mask['male-meso-cohort-ne'], 'Mesothelioma Mortality trends in England and Wales for Males \n by birth cohort \n for the North East')
# plot_cohort_age(df, mask['male-ipf-cohort-ne'], 'IPF Mortality trends in England and Wales for Males \n by birth cohort \n for the North East')