In [1]:
import pandas as pd
from pandas import DataFrame
import pickle
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import ScalarFormatter
import numpy as np
%matplotlib inline
In [2]:
df = pd.read_pickle('.././data/pickle/pypf_prep.pkl') # load data
In [3]:
df = df[df['Agegroup'] != 'ALL AGES'] # lets throw away all ages rows
# df = df[df['Agegroup'] != 'UNDER 25'] # lets throw away age extreams
# df = df[df['Agegroup'] != '85+'] # lets throw away age extreams
agemidpoint = {'ALL AGES': 100, 'UNDER 25': 20, '25-34':30, '35-44':40,
'45-54':50, '55-64':60, '65-74':70, '75-84':80, '85+':90}
df['Agemidpoint'] = df.Agegroup.map(lambda x: agemidpoint.get(x)) # lets give everyone an age mid point
df['YearOfDeath'] = df.Year.map(lambda x: x.year)
df['dob'] = df['YearOfDeath'] - df['Agemidpoint'] # dob is appoximately year of death - age midpoint
df['birthcohort'] = pd.cut(df.dob, range(1880,2010,10),
labels=['1880-1889', '1890-1899', '1900-1909', '1910-1919',
'1920-1929','1930-1939','1940-1949','1950-1959',
'1960-1969', '1970-1979', '1980-1989', '1990-1999'],
right=False) # add birth cohorts
df['Rate per million population'] = df['Rate per 100,000 population'] * 10
In [48]:
mask = {}
mask['male-ipf-cohort-1900-1960'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1960-1969') & (df['birthcohort'] > '1890-1899')")
mask['female-ipf-cohort-1900-1960'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1960-1969') & (df['birthcohort'] > '1890-1899')")
mask['male-ipf-cohort'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1980-1989')")
mask['female-ipf-cohort'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1980-1989')")
mask['malefemale-ipf-cohort'] = eval("(df['Cause'] == 'IPF') & (df['birthcohort'] < '1980-1989')")
mask['male-ipf-cohort-pre-1940'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1940-1949')")
mask['female-ipf-cohort-pre-1940'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1940-1949')")
mask['malefemale-ipf-cohort-1900-1960'] = eval("(df['Cause'] == 'IPF') & (df['birthcohort'] < '1960-1969') & (df['birthcohort'] > '1890-1899')")
mask['male-meso-cohort-1900-1960'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1960-1969') & (df['birthcohort'] > '1890-1899')")
mask['male-meso-cohort'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1980-1989')")
mask['male-meso-cohort-pre-1940'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1940-1949')")
mask['female-meso-cohort-1900-1960'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1960-1969') & (df['birthcohort'] > '1890-1899')")
mask['female-meso-cohort'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1980-1989')")
mask['female-meso-cohort-pre-1940'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1940-1949')")
mask['male-asb-cohort-1900-1960'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'Asbestosis') & (df['birthcohort'] < '1960-1969') & (df['birthcohort'] > '1890-1899')")
mask['male-asb-cohort'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'Asbestosis') & (df['birthcohort'] < '1980-1989')")
mask['female-asb-cohort'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'Asbestosis') & (df['birthcohort'] < '1980-1989')")
mask['female-asb-cohort-1900-1960'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'Asbestosis') & (df['birthcohort'] < '1960-1969') & (df['birthcohort'] > '1890-1899')")
mask['male-ipf-cohort-ne'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'IPF') & (df['birthcohort'] < '1980-1989') & (df['Region'] == 'NORTH EAST')")
mask['male-meso-cohort-ne'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'All Mesothelioma') & (df['birthcohort'] < '1980-1989')& (df['Region'] == 'NORTH EAST')")
def plot_cohort_year(df, mask, title):
cohort_data = {}
df = df[mask]
for cohort in df['birthcohort'].unique():
grp = df[df['birthcohort'] == cohort].groupby('Year')
cohort_data[cohort] = grp['Rate per million population'].mean()
ax = pd.DataFrame(cohort_data).plot(figsize=(4,4), title=title)
ax.set_ylabel("Rate per million")
ax.set_xlabel("Age")
ax.set_yscale("log", nonposy='clip')
ax.legend(loc='upper left', frameon=False)
# plt.ylim(ymin = 0.0)
ax.yaxis.set_major_formatter(ScalarFormatter()) # scale as a full number not a power
plt.grid(True)
plt.show()
# return cohort_data
def plot_cohort_age(df, mask, title):
cohort_data = {}
df = df[mask]
for cohort in df['birthcohort'].unique():
grp = df[df['birthcohort'] == cohort].groupby('Agemidpoint')
cohort_data[cohort] = grp['Rate per million population'].mean()
ax = pd.DataFrame(cohort_data).plot(figsize=(8,8), title=title)
ax.set_ylabel("Rate per million")
ax.set_xlabel("Age")
ax.set_yscale("log", nonposy='clip')
ax.legend(loc='upper left', frameon=False)
# plt.ylim(ymin = 0.0)
ax.yaxis.set_major_formatter(ScalarFormatter()) # scale as a full number not a power
plt.grid(True)
plt.savefig('.././fig/{}'.format(title))
plt.show()
return ax
# return cohort_data
def plot_cohort_age_region(df, mask, title):
cohort_data = {}
df = df[mask]
for region in df['Region'].unique():
print('\n{}\n'.format(region))
for cohort in df['birthcohort'].unique():
grp = df[df['birthcohort'] == cohort].groupby('Agemidpoint')
cohort_data[cohort] = grp['Rate per million population'].mean()
ax = pd.DataFrame(cohort_data).plot(figsize=(7,7), title=title)
ax.set_ylabel("Rate per million")
ax.set_xlabel("Age")
ax.set_yscale("log", nonposy='clip')
ax.legend(loc='upper left', frameon=False)
# plt.ylim(ymin = 0.0)
ax.yaxis.set_major_formatter(ScalarFormatter()) # scale as a full number not a power
plt.grid(True)
plt.savefig('.././fig/{}{}'.format(cohort, region))
plt.show()
In [49]:
# black and white mode
def black_and_white_mode():
from cycler import cycler
color_c = cycler('color', ['k'])
style_c = cycler('linestyle', ['-', '--', ':', '-.'])
markr_c = cycler('marker', ['', '.', 'o'])
c_cms = color_c * markr_c * style_c
c_csm = color_c * style_c * markr_c
plt.rc('axes', prop_cycle=c_cms)
def colour_edition():
from cycler import cycler
color_c = cycler('color', list('rgbk'))
style_c = cycler('linestyle', ['-', '--'])
c_cms = color_c * style_c
plt.rc('axes', prop_cycle=c_cms)
In [50]:
# plot_cohort_year(df, mask['male-ipf-cohort'], 'IPF Mortality trends in England and Wales for Males \n by birth cohort')
# plot_cohort_year(df, mask['male-meso-cohort'], 'Mesothelioma Mortality trends in England and Wales for Males \n by birth cohort')
In [70]:
plot_cohort_age(df, mask['male-ipf-cohort'], '')
Out[70]:
In [51]:
# black_and_white_mode() # winter bts doesn't support colour
# colour_edition()
plot_cohort_age(df, mask['male-ipf-cohort'], 'IPF Death Rates in England and Wales for Males for ten birth cohorts')
Out[51]:
In [8]:
df[mask['male-asb-cohort']][df[mask['male-asb-cohort']].dob == 1889]
Out[8]:
In [68]:
title = 'Estimated number of male deaths (age standardised to 2008 population)\n for IPF, Mesothelioma, and Asbestosis by date of birth'
mylabels = ['IPF', 'Mesothelioma', 'Asbestosis']
ax = df[mask['male-ipf-cohort']].groupby('dob')['Estimated deaths age standardised to 2008 population'].sum().plot(figsize=(8,8), title=title)
ax.set_ylabel("Estimated number deaths")
df[mask['male-meso-cohort']].groupby('dob')['Estimated deaths age standardised to 2008 population'].sum().plot(figsize=(8,8))
df[mask['male-asb-cohort']].groupby('dob')['Estimated deaths age standardised to 2008 population'].sum().plot(figsize=(8,8))
ax.grid(True)
ax.legend(labels=mylabels, loc='upper left', frameon=False)
ax
Out[68]:
In [10]:
df[mask['male-ipf-cohort']].groupby('birthcohort')['Estimated deaths age standardised to 2008 population'].sum()
Out[10]:
In [11]:
df[mask['male-meso-cohort']].groupby('birthcohort')['Estimated deaths age standardised to 2008 population'].median()
Out[11]:
In [12]:
df[mask['male-asb-cohort']].groupby('birthcohort')['Estimated deaths age standardised to 2008 population'].median()
Out[12]:
In [13]:
plot_cohort_age(df, mask['female-ipf-cohort'], 'IPF Death Rates in England and Wales for Females \n for ten birth cohorts')
Out[13]:
In [14]:
plot_cohort_age(df, mask['malefemale-ipf-cohort'], 'IPF death rates in England and Wales for ten birth cohorts')
Out[14]:
In [15]:
plot_cohort_age(df, mask['malefemale-ipf-cohort-1900-1960'], 'IPF Death Rates in England and Wales for six birth cohorts')
Out[15]:
In [16]:
plot_cohort_age(df, mask['male-meso-cohort'], 'Mesothelioma Death Rates in England and Wales for Males \n for ten birth cohorts')
Out[16]:
In [17]:
plot_cohort_age(df, mask['male-meso-cohort-pre-1940'], 'Mesothelioma Death Rates in England and Wales for Males \n for pre-1940 birth cohorts')
Out[17]:
In [18]:
plot_cohort_age(df, mask['female-meso-cohort'], 'Mesothelioma Death Rates in England and Wales for Females \n for ten birth cohorts')
Out[18]:
In [19]:
plot_cohort_age(df, mask['female-meso-cohort-pre-1940'], 'Mesothelioma Death Rates in England and Wales for Females \n for pre-1940 birth cohorts')
Out[19]:
In [20]:
plot_cohort_age(df, mask['male-asb-cohort'], 'Asbestosis Death Rates in England and Wales for Males \n for ten birth cohorts')
Out[20]:
In [21]:
plot_cohort_age(df, mask['female-asb-cohort'], 'Asbestosis Death Rates in England and Wales for Females \n for ten birth cohorts')
Out[21]:
In [22]:
# plot_cohort_age(df, mask['male-meso-cohort-ne'], 'Mesothelioma Mortality trends in England and Wales for Males \n by birth cohort \n for the North East')
# plot_cohort_age(df, mask['male-ipf-cohort-ne'], 'IPF Mortality trends in England and Wales for Males \n by birth cohort \n for the North East')
In [23]:
mask['male-ipf-cohort'] = eval("(df['Sex'] == 'Male') & (df['Cause'] == 'IPF')")
mask['female-ipf-cohort'] = eval("(df['Sex'] == 'Female') & (df['Cause'] == 'IPF')")
In [24]:
df[mask['male-ipf-cohort']]['Rate per 100,000 population'].mean() / 5.8921637021400297
Out[24]:
In [25]:
df[mask['female-ipf-cohort']]['Rate per 100,000 population'].mean()
Out[25]:
In [26]:
df[df['Cause'] == 'IPF'].Deaths.sum()
Out[26]:
In [27]:
df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Male') & (df['YearOfDeath'] == 2012)].Deaths.sum() / df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Female') & (df['YearOfDeath'] == 2012)].Deaths.sum()
Out[27]:
In [28]:
df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Female')].Deaths.sum()
Out[28]:
In [29]:
df[df['Sex'] == 'Male']['2008 population'].sum() / df[df['Sex'] == 'Female']['2008 population'].sum()
Out[29]:
In [30]:
df[df.YearOfDeath == 2012]
Out[30]:
In [31]:
plot_cohort_age(df, mask['male-ipf-cohort-1900-1960'], 'IPF Male')
Out[31]:
In [32]:
plot_cohort_age(df, mask['male-meso-cohort-1900-1960'], 'Meso Male')
Out[32]:
In [33]:
plot_cohort_age(df, mask['male-asb-cohort-1900-1960'], 'Asb Male')
Out[33]:
In [34]:
plot_cohort_age(df, mask['female-ipf-cohort-1900-1960'], 'IPF Female')
Out[34]:
In [35]:
plot_cohort_age(df, mask['female-meso-cohort-1900-1960'], 'Meso Female')
Out[35]:
In [36]:
plot_cohort_age(df, mask['female-asb-cohort-1900-1960'], 'Asb Female')
Out[36]:
In [37]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
gs = gridspec.GridSpec(3, 3)
ax1 = plt.subplot(gs[0, :])
ax1.plot()
ax2 = plt.subplot(gs[1,:-1])
In [38]:
jam = plot_cohort_age(df, mask['female-asb-cohort-1900-1960'], 'Asb Female')
In [39]:
jam = plot_cohort_age(df, mask['female-asb-cohort-1900-1960'], 'Asb Female')
In [ ]: