Lets make pictures



In [1]:

    
import pandas as pd
from pandas import DataFrame
import pickle
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
%matplotlib inline



In [2]:

    
df = pd.read_pickle('.././data/pickle/pypf_prep.pkl')
df.index = df.Year
#df1 = df #lets save this result
#df = df[df['Agegroup'] != 'ALL AGES'] #lets throw away all ages rows



In [3]:

    
#graph a la Navaratnam 2011

grp = df[(df['Cause'] == 'IPF') & (df['Year'] < pd.to_datetime('2009')) & (df['Agegroup'] == 'ALL AGES')].groupby('Year')

data = grp['Estimated deaths age standardised to 2008 population'].sum()

major = mdates.YearLocator(2)
minor = mdates.MonthLocator()

plt.figure(figsize=(7, 7))
x = data.index
y = data.values

plt.xticks(rotation=70)
plt.ylim((0,3500))
plt.ylabel('Deaths')
plt.xlabel('Year')
plt.title('Mortality trends in England and Wales for IPF')

plt.annotate("ICD-9(1979)", (pd.to_datetime('1979'), 1000), (pd.to_datetime('1976'), 1500), 
               arrowprops=dict(arrowstyle="->")) 
plt.annotate("ICD-10(2000)", (pd.to_datetime('2000'), 2500), (pd.to_datetime('1997'), 3000), 
               arrowprops=dict(arrowstyle="->")) 
plt.grid(True)
p1, = plt.plot(x, y, 'k--', linewidth=2.0) 

ax = plt.gca()
ax.xaxis.set_major_locator(major)
ax.legend([p1], ["Expected number of deaths"], loc='upper left', frameon=False)

#ax.xaxis.set_minor_locator(minor)

plt.savefig('.././fig/IPF mortality trends in England and Wales to 2008.png')



In [4]:

    
#graph a la Navaratnam 2011

grp = df[(df['Cause'] == 'IPF') &(df['Agegroup'] == 'ALL AGES')].groupby('Year')

data = grp['Estimated deaths age standardised to 2008 population'].sum()

major = mdates.YearLocator(2)
minor = mdates.MonthLocator()

plt.figure(figsize=(7, 7))
x = data.index
y = data.values

plt.xticks(rotation=70)
plt.ylim((0,4000))
plt.ylabel('Deaths')
plt.xlabel('Year')
plt.title('Mortality trends in England and Wales for IPF')

plt.annotate("ICD-9(1979)", (pd.to_datetime('1979'), 1000), (pd.to_datetime('1976'), 1500), 
               arrowprops=dict(arrowstyle="->")) 
plt.annotate("ICD-10(2000)", (pd.to_datetime('2000'), 2500), (pd.to_datetime('1997'), 3000), 
               arrowprops=dict(arrowstyle="->")) 
plt.grid(True)
p1, = plt.plot(x, y, 'k--', linewidth=2.0) 

ax = plt.gca()
ax.xaxis.set_major_locator(major)
ax.legend([p1], ["Expected number of deaths"], loc='upper left', frameon=False)

#ax.xaxis.set_minor_locator(minor)

plt.savefig('.././fig/IPF mortality trends in England and Wales to 2012.png')



In [5]:

    
grp = df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Male') & (df['Agegroup'] == 'ALL AGES')].groupby('Year')
data = grp['Estimated deaths age standardised to 2008 population'].sum()

grp = df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Female') & (df['Agegroup'] == 'ALL AGES')].groupby('Year')
data1 = grp['Estimated deaths age standardised to 2008 population'].sum()

major = mdates.YearLocator(2)
minor = mdates.MonthLocator()

plt.figure(figsize=(7, 7))
x = data.index
y = data.values

x1 = data1.index
y1 = data1.values


plt.xticks(rotation=70)

plt.ylim((0,3000))
plt.ylabel('Estimated number of deaths age standardised to 2008 population')
plt.xlabel('Year')
plt.title('Mortality trends in England and Wales for Males and Females for IPF')
#plt.annotate("ICD-9(1979)", (pd.to_datetime('1979'), 1000), (pd.to_datetime('1976'), 1500), 
              # arrowprops=dict(arrowstyle="->")) 
#plt.annotate("ICD-10(2000)", (pd.to_datetime('2000'), 2500), (pd.to_datetime('1997'), 3000), 
              # arrowprops=dict(arrowstyle="->")) 
plt.grid(True)
p1, = plt.plot(x, y, 'k-', linewidth=2.0) 
p2, = plt.plot(x1, y1,'k--', linewidth=2.0) 

ax = plt.gca()
ax.xaxis.set_major_locator(major)
ax.legend([p1, p2], ["Expected number of deaths (male)", "Expected number of deaths (female)"], loc='upper left', frameon=False)

plt.savefig('.././fig/IPF mortality trends in England and Wales for Males and Females.png')



In [6]:

    
df_male = df[(df['Sex'] == 'Male') & (df['Agegroup'] == 'ALL AGES')]

grp = df_male[df_male['Cause'] == 'IPF'].groupby('Year')
data = grp['Estimated deaths age standardised to 2008 population'].sum()


grp = df_male[(df_male['Cause'] == 'All Mesothelioma') & (df_male['Year'] < pd.to_datetime('2001'))].groupby('Year') 
data1 = grp['Estimated deaths age standardised to 2008 population'].sum()#all meso

grp = df_male[(df_male['Cause'] == 'All Mesothelioma') & (df_male['Year'] > pd.to_datetime('2000'))].groupby('Year') 
data2 = grp['Estimated deaths age standardised to 2008 population'].sum()#all meso

grp = df_male[df_male['Cause'] == 'Asbestosis'].groupby('Year')
data3 = grp['Estimated deaths age standardised to 2008 population'].sum()

#grp = df_male[df_male['Cause'] == 'Mesothelioma_other'].groupby('Year') #all non pulmonary
#data = grp['Estimated deaths age standardised to 2008 population'].sum() 

#grp = df_male[df_male['Cause'] == 'Mesothelioma'].groupby('Year') #pulmonary
#data1 = grp['Estimated deaths age standardised to 2008 population'].sum()


#make an (ugly) graph a la Navaratnam
#grp = dffinal6[(dffinal6['Cause'] == 'IPF') & (dffinal6['Sex'] == 'Male')].groupby('Year')
#data = grp['Estimated deaths age standardised to 2008 population'].sum()

#grp = dffinal6[(dffinal6['Cause'] == 'IPF') & (dffinal6['Sex'] == 'Female')].groupby('Year')
#data1 = grp['Estimated deaths age standardised to 2008 population'].sum()

plt.figure(figsize=(7, 7))
x = data.index
y = data.values

x1 = data1.index
y1 = data1.values

x2 = data2.index
y2 = data2.values

x3 = data3.index
y3 = data3.values

major = mdates.YearLocator(2)
minor = mdates.MonthLocator()
plt.xticks(rotation=70)

plt.ylim((0,3000))
plt.ylabel('Estimated number of deaths age standardised to 2008 population')
plt.xlabel('Year')
plt.title('Mortality trends in England and Wales for Males \n for IPF, Mesothelioma, and Asbestosis')
plt.annotate("ICD-9(1979)", (pd.to_datetime('1979'), 500), (pd.to_datetime('1976'), 1000), 
               arrowprops=dict(arrowstyle="->")) 
plt.annotate("ICD-10(2000)", (pd.to_datetime('2000'), 1500), (pd.to_datetime('1997'), 2000), 
               arrowprops=dict(arrowstyle="->")) 
plt.grid(True)
p1, = plt.plot(x, y, 'k-', linewidth=2.0) 
p2, = plt.plot(x1, y1,'b--', linewidth=2.0) 
p3, = plt.plot(x2, y2, 'b-', linewidth=2.0) 
p4, = plt.plot(x3, y3, 'r-', linewidth=2.0) 

ax = plt.gca()
ax.xaxis.set_major_locator(major)
ax.legend([p1, p2, p3, p4], ["Idiopathic Pulmonary Fibrosis", "Mesothelioma (pre ICD-10)", "Mesothelioma (ICD-10)", "Asbestosis"], loc='upper left', frameon=False)
plt.savefig('.././fig/IPF mortality trends in England and Wales for Males for IPF, Mesothelioma, and Asbestosis.png')



In [7]:

    
df_male = df[(df['Sex'] == 'Male') & (df['Agegroup'] == 'ALL AGES')]

grp = df_male[df_male['Cause'] == 'IPF'].groupby('Year')
data = DataFrame(grp['Estimated deaths age standardised to 2008 population'].sum())
data = data.pct_change()


grp = df_male[(df_male['Cause'] == 'All Mesothelioma') & (df_male['Year'] < pd.to_datetime('2001'))].groupby('Year') 
data1 = DataFrame(grp['Estimated deaths age standardised to 2008 population'].sum())#all meso
data1 = data1.pct_change()


grp = df_male[(df_male['Cause'] == 'All Mesothelioma') & (df_male['Year'] > pd.to_datetime('2000'))].groupby('Year') 
data2 = DataFrame(grp['Estimated deaths age standardised to 2008 population'].sum())#all meso
data2 = data2.pct_change()



grp = df_male[df_male['Cause'] == 'Asbestosis'].groupby('Year')
data3 = DataFrame(grp['Estimated deaths age standardised to 2008 population'].sum())
data3 = data3.pct_change()



#grp = df_male[df_male['Cause'] == 'Mesothelioma_other'].groupby('Year') #all non pulmonary
#data = grp['Estimated deaths age standardised to 2008 population'].sum() 

#grp = df_male[df_male['Cause'] == 'Mesothelioma'].groupby('Year') #pulmonary
#data1 = grp['Estimated deaths age standardised to 2008 population'].sum()


#make an (ugly) graph a la Navaratnam
#grp = dffinal6[(dffinal6['Cause'] == 'IPF') & (dffinal6['Sex'] == 'Male')].groupby('Year')
#data = grp['Estimated deaths age standardised to 2008 population'].sum()

#grp = dffinal6[(dffinal6['Cause'] == 'IPF') & (dffinal6['Sex'] == 'Female')].groupby('Year')
#data1 = grp['Estimated deaths age standardised to 2008 population'].sum()



In [8]:

    
df1 = DataFrame(data1)



In [9]:

    
data.values









    Out[9]:





array([[        nan],
       [ 0.0293261 ],
       [ 0.10409772],
       [-0.14198728],
       [ 0.04576732],
       [ 0.15554248],
       [ 0.07613366],
       [ 0.12430156],
       [ 0.10774118],
       [ 0.01907658],
       [ 0.26515021],
       [ 0.12604908],
       [-0.00271688],
       [ 0.0195477 ],
       [ 0.03745367],
       [-0.01545349],
       [ 0.12985665],
       [-0.0236159 ],
       [ 0.03681614],
       [ 0.14705037],
       [ 0.00837537],
       [-0.01525441],
       [ 0.10592986],
       [ 0.07317156],
       [ 0.03895109],
       [ 0.03361469],
       [ 0.03121786],
       [ 0.04545206],
       [ 0.02945093],
       [ 0.05483315],
       [ 0.02212873],
       [ 0.00171588],
       [ 0.00684932],
       [ 0.12112405],
       [ 0.01568766],
       [-0.00456942],
       [ 0.08870428],
       [ 0.12925826],
       [ 0.01619492]])



In [10]:

    
np.corrcoef(data.values, data1.values)[0, 1]









    



/home/drcjar/.virtualenvs/litsearch2/lib/python3.5/site-packages/numpy/lib/function_base.py:2487: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/drcjar/.virtualenvs/litsearch2/lib/python3.5/site-packages/numpy/lib/function_base.py:2496: RuntimeWarning: divide by zero encountered in double_scalars
  c *= 1. / np.float64(fact)
/home/drcjar/.virtualenvs/litsearch2/lib/python3.5/site-packages/numpy/lib/function_base.py:2496: RuntimeWarning: invalid value encountered in multiply
  c *= 1. / np.float64(fact)






    Out[10]:





nan



In [12]:

    
print (data.mean())
print (data1.mean())
print (data2.mean())
print (data3.mean())









    



Estimated deaths age standardised to 2008 population    0.053762
dtype: float64
Estimated deaths age standardised to 2008 population    0.092655
dtype: float64
Estimated deaths age standardised to 2008 population    0.022435
dtype: float64
Estimated deaths age standardised to 2008 population    0.094559
dtype: float64



In [13]:

    
plt.figure(figsize=(15, 10))
x = data.index
y = data.values * 100

x1 = data1.index
y1 = data1.values * 100

x2 = data2.index
y2 = data2.values * 100

x3 = data3.index
y3 = data3.values * 100

major = mdates.YearLocator(1)
minor = mdates.MonthLocator()
plt.xticks(rotation=70)

#plt.ylim((0,100))
plt.ylabel('Percent change')
plt.xlabel('Year')
plt.title('Mortality trends in England and Wales for Males \n for IPF, Mesothelioma, and Asbestosis')
#plt.annotate("ICD-9(1979)", (pd.to_datetime('1979'), 1000), (pd.to_datetime('1976'), 1500), 
              # arrowprops=dict(arrowstyle="->")) 
#plt.annotate("ICD-10(2000)", (pd.to_datetime('2000'), 2500), (pd.to_datetime('1997'), 3000), 
              # arrowprops=dict(arrowstyle="->")) 
plt.grid(True)
p1, = plt.plot(x, y, 'k-', linewidth=2.0) 
p2, = plt.plot(x1, y1,'b--', linewidth=2.0) 
p3, = plt.plot(x2, y2, 'b-', linewidth=2.0) 
p4, = plt.plot(x3, y3, 'r-', linewidth=2.0) 

ax = plt.gca()
ax.xaxis.set_major_locator(major)
ax.legend([p1, p2, p3, p4], ["Idiopathic Pulmonary Fibrosis", "Mesothelioma (pre ICD-10)", "Mesothelioma (ICD-10)", "Asbestosis"], loc='upper left', frameon=False)

plt.savefig('.././fig/IPF mortality trends in England and Wales for Males for IPF, Mesothelioma, and Asbestosis.png')

Lets make mortality rate with confidence intervals as per navaratum



In [14]:

    
import numpy as np
import scipy as sp
import scipy.stats

def mean_confidence_interval(data, confidence=0.95):
    a = 1.0*np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
    #eturn m, m-h, m+h
    return '%.2f (%.2f - %.2f)' % (m, m-h, m+h)



In [15]:

    
#for a given period and cause this function calculates the national period population (in person-years), number of deaths 
#in the period and the standardised (to a 2008 population) number of deaths using our data by rolling up the region numbers
#and years


def year_analysis(period, cause):
    perpop = []
    perdeath = []
    perstddeath = []
    
    for i, item in enumerate(period):
        regpop = df[(df['Agegroup'] == 'ALL AGES') & (df['Year'] == pd.to_datetime(period[i])) & (df['Cause'] == cause)].groupby('Region').Population.sum()
        natpop = sum(regpop)
        perpop.append(natpop)
        
        regdeath = df[(df['Agegroup'] == 'ALL AGES') & (df['Year'] == pd.to_datetime(period[i])) & (df['Cause'] == cause)].groupby('Region')['Deaths'].sum()
        natdeath = sum(regdeath)
        perdeath.append(natdeath)  
        
        regstddeath = df[(df['Agegroup'] == 'ALL AGES') & (df['Year'] == pd.to_datetime(period[i])) & (df['Cause'] == cause)].groupby('Region')['Estimated deaths age standardised to 2008 population'].sum()
        natstddeath = sum(regstddeath)
        perstddeath.append(natstddeath)  
        
    n_years = len(period)
    
    period = ('%s - %s' % (period[0], period[n_years - 1]))
    
    deaths = sum(perdeath)
    
    person_years = ('%i' % (sum(perpop) / 1000000))
     
    standardised_deaths = ('%i' % sum(perstddeath))
    
    stdperrate = [(d / 54454723) * 100000 for d in perstddeath] #54454723 is 2008 nat pop
    
    stdperrate = mean_confidence_interval(stdperrate)
    
    return period, deaths, person_years, standardised_deaths, stdperrate
    
    
#corrected meso is a special case...


def year_analysis2(period, cause):
    perpop = []
    perdeath = []
    perstddeath = []
    
    for i, item in enumerate(period):
        regpop = df[(df['Agegroup'] == 'ALL AGES') & (df['Year'] == pd.to_datetime(period[i])) & (df['Cause'] == cause)].groupby('Region').Population.sum()
        natpop = sum(regpop)
        perpop.append(natpop)
        
        regdeath = df[(df['Agegroup'] == 'ALL AGES') & (df['Year'] == pd.to_datetime(period[i])) & (df['Cause'] == cause)].groupby('Region')['Deaths'].sum()
        natdeath = sum(regdeath)
        perdeath.append(natdeath)  
        
        regstddeath = df[(df['Agegroup'] == 'ALL AGES') & (df['Year'] == pd.to_datetime(period[i])) & (df['Cause'] == cause)].groupby('Region')['Corrected Meso Deaths'].sum()
        natstddeath = sum(regstddeath)
        perstddeath.append(natstddeath)  
        
    n_years = len(period)
    
    period = ('%s - %s' % (period[0], period[n_years - 1]))
    
    deaths = sum(perdeath)
    
    person_years = ('%i' % (sum(perpop) / 1000000))
     
    standardised_deaths = ('%i' % sum(perstddeath))
    
    stdperrate = [(d / 54454723) * 100000 for d in perstddeath] #54454723 is 2008 nat pop
    
    stdperrate = mean_confidence_interval(stdperrate)
    
    return period, deaths, person_years, standardised_deaths, stdperrate



In [16]:

    
periods = {}
periods[0] = ['1979', '1980', '1981', '1982', '1983']
periods[1] = ['1984', '1985', '1986', '1987', '1988']
periods[2] = ['1989', '1990', '1991', '1992', '1993', '1994']
periods[3] = ['1995', '1996', '1997', '1998', '1999', '2000']
periods[4] = ['2001', '2002', '2003', '2004']
periods[5] = ['2005', '2006', '2007', '2008']
periods[6] = ['2009', '2010', '2011', '2012']

ipf_results = []
asbestos_results = []
all_meso_results = []
cor_meso_results = []

for i, item in enumerate(periods):
    ipf_results.append(year_analysis(periods[i], 'IPF'))
    asbestos_results.append(year_analysis(periods[i], 'Asbestosis'))
    all_meso_results.append(year_analysis(periods[i], 'All Mesothelioma'))
    
    
dfipf = pd.DataFrame(ipf_results, columns=['Period', 'Deaths', 'Person years (million)', 'Standardised Deaths (2008 population)', 'Standardised mortality rate per 100,000 (95% CI)'])
dfasb = pd.DataFrame(asbestos_results, columns=['Period', 'Deaths', 'Person years (million)', 'Standardised Deaths (2008 population)', 'Standardised mortality rate per 100,000 (95% CI)'])
dfmes1 = pd.DataFrame(all_meso_results, columns=['Period', 'Deaths', 'Person years (million)', 'Standardised Deaths (2008 population)', 'Standardised mortality rate per 100,000 (95% CI)'])



In [17]:

    
dfmes1 = dfmes1[dfmes1['Deaths'] != 0] #throw away rows we lack data for
dfmes = pd.concat([dfmes1]) #combine 
dfmes = dfmes.sort() #sort









    



/home/drcjar/.virtualenvs/litsearch2/lib/python3.5/site-packages/ipykernel/__main__.py:3: FutureWarning: sort(....) is deprecated, use sort_index(.....)
  app.launch_new_instance()



In [18]:

    
dfipf









    Out[18]:






  
    
      
      Period
      Deaths
      Person years (million)
      Standardised Deaths (2008 population)
      Standardised mortality rate per 100,000 (95% CI)
    
  
  
    
      0
      1979 - 1983
      3846.0
      247
      4029
      1.48 (1.22 - 1.74)
    
    
      1
      1984 - 1988
      5851.0
      249
      6378
      2.34 (2.19 - 2.50)
    
    
      2
      1989 - 1994
      8584.0
      304
      9192
      2.81 (2.51 - 3.12)
    
    
      3
      1995 - 2000
      11548.0
      310
      12162
      3.72 (3.35 - 4.10)
    
    
      4
      2001 - 2004
      9597.0
      210
      9917
      4.55 (4.15 - 4.96)
    
    
      5
      2005 - 2008
      11089.0
      215
      11193
      5.14 (4.60 - 5.67)
    
    
      6
      2009 - 2012
      13770.0
      222
      13470
      6.18 (5.02 - 7.35)



In [19]:

    
dfasb









    Out[19]:






  
    
      
      Period
      Deaths
      Person years (million)
      Standardised Deaths (2008 population)
      Standardised mortality rate per 100,000 (95% CI)
    
  
  
    
      0
      1979 - 1983
      172.0
      247
      170
      0.06 (0.04 - 0.09)
    
    
      1
      1984 - 1988
      208.0
      249
      227
      0.08 (0.07 - 0.10)
    
    
      2
      1989 - 1994
      325.0
      304
      349
      0.11 (0.10 - 0.12)
    
    
      3
      1995 - 2000
      348.0
      310
      367
      0.11 (0.10 - 0.12)
    
    
      4
      2001 - 2004
      364.0
      210
      376
      0.17 (0.13 - 0.22)
    
    
      5
      2005 - 2008
      495.0
      215
      500
      0.23 (0.20 - 0.26)
    
    
      6
      2009 - 2012
      723.0
      222
      708
      0.33 (0.28 - 0.37)



In [20]:

    
dfmes









    Out[20]:






  
    
      
      Period
      Deaths
      Person years (million)
      Standardised Deaths (2008 population)
      Standardised mortality rate per 100,000 (95% CI)
    
  
  
    
      0
      1979 - 1983
      2446.223216
      247
      2540
      0.93 (0.72 - 1.15)
    
    
      1
      1984 - 1988
      3192.247558
      249
      3491
      1.28 (1.17 - 1.40)
    
    
      2
      1989 - 1994
      5453.305493
      304
      5875
      1.80 (1.39 - 2.21)
    
    
      3
      1995 - 2000
      8816.424768
      310
      9298
      2.85 (2.42 - 3.27)
    
    
      4
      2001 - 2004
      6502.000000
      210
      6741
      3.10 (3.01 - 3.18)
    
    
      5
      2005 - 2008
      7368.000000
      215
      7448
      3.42 (3.20 - 3.64)
    
    
      6
      2009 - 2012
      8474.000000
      222
      8290
      3.81 (3.68 - 3.93)



In [21]:

    
df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Male') & (df['Agegroup'] == 'ALL AGES')].Deaths.sum()









    Out[21]:





41249.0



In [22]:

    
df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Female') & (df['Agegroup'] == 'ALL AGES')].Deaths.sum()









    Out[22]:





25992.0



In [23]:

    
df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Male') & (df['Agegroup'] == 'ALL AGES')].Population.sum() / 1000000









    Out[23]:





980.56322699999998



In [24]:

    
df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Female') & (df['Agegroup'] == 'ALL AGES')].Population.sum() / 1000000









    Out[24]:





1028.579778



In [25]:

    
grp = df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Male') & (df['Agegroup'] == 'ALL AGES')].groupby('Year')
grp = grp['Estimated deaths age standardised to 2008 population'].sum()
grp = DataFrame(grp).reset_index()
grp['Estimated deaths age standardised to 2008 population'].map(lambda x: (x/54454723) * 100000).mean()









    Out[25]:





2.006538435815437



In [26]:

    
grp = df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Female') & (df['Agegroup'] == 'ALL AGES')].groupby('Year')
grp = grp['Estimated deaths age standardised to 2008 population'].sum()
grp = DataFrame(grp).reset_index()
grp['Estimated deaths age standardised to 2008 population'].map(lambda x: (x/54454723) * 100000).mean()









    Out[26]:





1.250705869329551



In [27]:

    
2 / 1.25









    Out[27]:





1.6



In [28]:

    
grp = df[(df['Cause'] == 'IPF')].groupby('Agegroup')['Rate per 100,000 (standardised)'].mean()
grp.sort()
grp









    



/home/drcjar/.virtualenvs/litsearch2/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: sort is deprecated, use sort_values(inplace=True) for INPLACE sorting
  from ipykernel import kernelapp as app






    Out[28]:





Agegroup
UNDER 25     0.021286
25-34        0.048154
35-44        0.164044
45-54        0.731656
55-64        3.363908
ALL AGES     3.445875
65-74       11.624223
75-84       26.663022
85+         37.749784
Name: Rate per 100,000 (standardised), dtype: float64



In [29]:

    
grp = df[(df['Cause'] == 'IPF')].groupby('Region')['Rate per 100,000 (standardised)'].mean()
grp.sort()
grp









    



/home/drcjar/.virtualenvs/litsearch2/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: sort is deprecated, use sort_values(inplace=True) for INPLACE sorting
  from ipykernel import kernelapp as app






    Out[29]:





Region
EAST                         7.976114
SOUTH EAST                   8.312335
LONDON                       8.707275
YORKSHIRE AND THE HUMBER     8.908680
SOUTH WEST                   9.190130
EAST MIDLANDS                9.333159
WEST MIDLANDS                9.459794
NORTH EAST                  10.289851
WALES                       10.495049
NORTH WEST                  10.707523
Name: Rate per 100,000 (standardised), dtype: float64



In [30]:

    
grp = df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Female')].groupby('Region')['Rate per 100,000 (standardised)'].mean()
grp.sort()
grp









    



/home/drcjar/.virtualenvs/litsearch2/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: sort is deprecated, use sort_values(inplace=True) for INPLACE sorting
  from ipykernel import kernelapp as app






    Out[30]:





Region
EAST                        4.737318
SOUTH EAST                  4.900534
YORKSHIRE AND THE HUMBER    5.053850
LONDON                      5.411803
SOUTH WEST                  5.640149
WEST MIDLANDS               5.641900
NORTH EAST                  5.692070
EAST MIDLANDS               5.746785
WALES                       6.168030
NORTH WEST                  6.214628
Name: Rate per 100,000 (standardised), dtype: float64



In [31]:

    
grp = df[(df['Cause'] == 'IPF') & (df['Sex'] == 'Male')].groupby('Region')['Rate per 100,000 (standardised)'].mean()
grp.sort()
grp









    



/home/drcjar/.virtualenvs/litsearch2/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: sort is deprecated, use sort_values(inplace=True) for INPLACE sorting
  from ipykernel import kernelapp as app






    Out[31]:





Region
EAST                        11.214911
SOUTH EAST                  11.724135
LONDON                      12.002747
SOUTH WEST                  12.740111
YORKSHIRE AND THE HUMBER    12.763510
EAST MIDLANDS               12.919534
WEST MIDLANDS               13.277687
WALES                       14.822068
NORTH EAST                  14.887633
NORTH WEST                  15.200417
Name: Rate per 100,000 (standardised), dtype: float64



In [32]:

    
grp = df[(df['Cause'] == 'Asbestosis') & (df['Sex'] == 'Male')].groupby('Region')['Rate per 100,000 (standardised)'].mean()
grp.sort()
grp









    



/home/drcjar/.virtualenvs/litsearch2/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: sort is deprecated, use sort_values(inplace=True) for INPLACE sorting
  from ipykernel import kernelapp as app






    Out[32]:





Region
EAST                        0.547326
WALES                       0.643478
WEST MIDLANDS               0.760362
YORKSHIRE AND THE HUMBER    0.802830
SOUTH EAST                  0.872112
EAST MIDLANDS               0.960083
SOUTH WEST                  1.098402
LONDON                      1.316267
NORTH WEST                  1.622810
NORTH EAST                  3.072687
Name: Rate per 100,000 (standardised), dtype: float64



In [33]:

    
grp = df[(df['Cause'] == 'All Mesothelioma') & (df['Sex'] == 'Male')].groupby('Region')['Rate per 100,000 (standardised)'].mean()
grp.sort()
grp









    



/home/drcjar/.virtualenvs/litsearch2/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: sort is deprecated, use sort_values(inplace=True) for INPLACE sorting
  from ipykernel import kernelapp as app






    Out[33]:





Region
WALES                        6.184247
EAST MIDLANDS                6.482373
WEST MIDLANDS                6.525374
SOUTH WEST                   7.618154
EAST                         7.861819
LONDON                       8.220906
YORKSHIRE AND THE HUMBER     8.646171
NORTH WEST                   8.965068
SOUTH EAST                   9.205414
NORTH EAST                  17.543943
Name: Rate per 100,000 (standardised), dtype: float64



In [34]:

    
df.head()









    Out[34]:






  
    
      
      Region
      Agegroup
      Deaths
      Sex
      Year
      Cause
      Population
      2008 population
      Rate per 100,000 population
      Estimated deaths age standardised to 2008 population
      Rate per 100,000 (standardised)
    
    
      Year
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1974-01-01
      NORTH EAST
      ALL AGES
      26.0
      Male
      1974-01-01
      IPF
      1536100.0
      1256065.0
      1.692598
      21.260133
      1.692598
    
    
      1974-01-01
      NORTH EAST
      ALL AGES
      1.0
      Male
      1974-01-01
      Asbestosis
      1536100.0
      1256065.0
      0.065100
      0.817697
      0.065100
    
    
      1974-01-01
      NORTH EAST
      ALL AGES
      12.5
      Male
      1974-01-01
      All Mesothelioma
      1536100.0
      1256065.0
      0.813749
      10.221218
      0.813749
    
    
      1974-01-01
      NORTH EAST
      UNDER 25
      0.0
      Male
      1974-01-01
      IPF
      615500.0
      405899.0
      0.000000
      0.000000
      0.000000
    
    
      1974-01-01
      NORTH EAST
      25-34
      0.0
      Male
      1974-01-01
      IPF
      203300.0
      149126.0
      0.000000
      0.000000
      0.000000



In [35]:

    
def makepictures(age):
    
    print(age)

    df_male = df[(df['Sex'] == 'Male') & (df['Agegroup'] == age)]

    grp = df_male[df_male['Cause'] == 'IPF'].groupby('Year')
    data = grp['Estimated deaths age standardised to 2008 population'].sum()


    grp = df_male[(df_male['Cause'] == 'All Mesothelioma') & (df_male['Year'] < pd.to_datetime('2001'))].groupby('Year') 
    data1 = grp['Estimated deaths age standardised to 2008 population'].sum()#all meso

    grp = df_male[(df_male['Cause'] == 'All Mesothelioma') & (df_male['Year'] > pd.to_datetime('2000'))].groupby('Year') 
    data2 = grp['Estimated deaths age standardised to 2008 population'].sum()#all meso

    grp = df_male[df_male['Cause'] == 'Asbestosis'].groupby('Year')
    data3 = grp['Estimated deaths age standardised to 2008 population'].sum()
  
    #grp = df_male[df_male['Cause'] == 'Mesothelioma_other'].groupby('Year') #all non pulmonary
    #data = grp['Estimated deaths age standardised to 2008 population'].sum() 

    #grp = df_male[df_male['Cause'] == 'Mesothelioma'].groupby('Year') #pulmonary
    #data1 = grp['Estimated deaths age standardised to 2008 population'].sum()
    
    #make an (ugly) graph a la Navaratnam
    #grp = dffinal6[(dffinal6['Cause'] == 'IPF') & (dffinal6['Sex'] == 'Male')].groupby('Year')
    #data = grp['Estimated deaths age standardised to 2008 population'].sum()

    #grp = dffinal6[(dffinal6['Cause'] == 'IPF') & (dffinal6['Sex'] == 'Female')].groupby('Year')
    #data1 = grp['Estimated deaths age standardised to 2008 population'].sum()

    plt.figure(figsize=(7, 7))
    x = data.index
    y = data.values

    x1 = data1.index
    y1 = data1.values

    x2 = data2.index
    y2 = data2.values

    x3 = data3.index
    y3 = data3.values

    major = mdates.YearLocator(2)
    minor = mdates.MonthLocator()
    plt.xticks(rotation=70)

    plt.ylim((0,1000))
    plt.ylabel('Estimated number of deaths age standardised to 2008 population')
    plt.xlabel('Year')
    plt.title('Mortality trends in England and Wales for Males Age %s \n for IPF, Mesothelioma, and Asbestosis' % age)
    #plt.annotate("ICD-9(1979)", (pd.to_datetime('1979'), 1000), (pd.to_datetime('1976'), 1500), 
              # arrowprops=dict(arrowstyle="->")) 
    #plt.annotate("ICD-10(2000)", (pd.to_datetime('2000'), 2500), (pd.to_datetime('1997'), 3000), 
              # arrowprops=dict(arrowstyle="->")) 
    plt.grid(True)
    p1, = plt.plot(x, y, 'k-', linewidth=2.0) 
    p2, = plt.plot(x1, y1,'b--', linewidth=2.0) 
    p3, = plt.plot(x2, y2, 'b-', linewidth=2.0) 
    p4, = plt.plot(x3, y3, 'r-', linewidth=2.0) 

    ax = plt.gca()
    ax.xaxis.set_major_locator(major)
    ax.legend([p1, p2, p3, p4], ["Idiopathic Pulmonary Fibrosis", "Mesothelioma (pre ICD-10)", "Mesothelioma (ICD-10)", "Asbestosis"], loc='upper left', frameon=False)

    plt.savefig('.././fig/IPF mortality trends in England and Wales for Males Age %s for IPF, Mesothelioma, and Asbestosis.png' % age)



In [72]:

    
ipfdeaths = (df[(df.Year == '2012') & (df.Cause == 'IPF') & (df.Agegroup == 'ALL AGES')].Deaths.sum())
print ('{} IPF deaths in 2012 in England and Wales'. format(ipfdeaths))

totaldeaths =  499331 # in 2012 from ons
print ('{} total deaths in England and Wales'.format(499331))

percentipfdeaths = ipfdeaths / totaldeaths * 100
print ('{} IPF deaths as percent of total deaths in England and Wales'.format(percentipfdeaths))

print ('\nsource: personal communication from ONS of number of IPF deaths in 2012 in England and Wales\n',
       '\ntotal number of deaths in 2012 from:\n https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/bulletins/deathsregistrationsummarytables/2013-07-10')

print ('\nn.b BLF think 5292 deaths for whole of UK in 2012 and quote 0.9% of all deaths (meaning no. of deaths would be 588000 in UK): \nhttps://statistics.blf.org.uk/pulmonary-fibrosis')









    



3902.0 IPF deaths in 2012 in England and Wales
499331 total deaths in England and Wales
0.7814455741782504 IPF deaths as percent of total deaths in England and Wales

source: personal communication from ONS of number of IPF deaths in 2012 in England and Wales
 
total number of deaths in 2012 from:
 https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/bulletins/deathsregistrationsummarytables/2013-07-10

n.b BLF think 5292 deaths for whole of UK in 2012 and quote 0.9% of all deaths (meaning no. of deaths would be 588000 in UK): 
https://statistics.blf.org.uk/pulmonary-fibrosis



In [97]:

    
# 170 + 169 # scotland 
# 156 # ireland
# would total 4397 - not clear (to me) where BLF get additional 895 deaths from

	Period	Deaths	Person years (million)	Standardised Deaths (2008 population)	Standardised mortality rate per 100,000 (95% CI)
0	1979 - 1983	3846.0	247	4029	1.48 (1.22 - 1.74)
1	1984 - 1988	5851.0	249	6378	2.34 (2.19 - 2.50)
2	1989 - 1994	8584.0	304	9192	2.81 (2.51 - 3.12)
3	1995 - 2000	11548.0	310	12162	3.72 (3.35 - 4.10)
4	2001 - 2004	9597.0	210	9917	4.55 (4.15 - 4.96)
5	2005 - 2008	11089.0	215	11193	5.14 (4.60 - 5.67)
6	2009 - 2012	13770.0	222	13470	6.18 (5.02 - 7.35)

	Period	Deaths	Person years (million)	Standardised Deaths (2008 population)	Standardised mortality rate per 100,000 (95% CI)
0	1979 - 1983	172.0	247	170	0.06 (0.04 - 0.09)
1	1984 - 1988	208.0	249	227	0.08 (0.07 - 0.10)
2	1989 - 1994	325.0	304	349	0.11 (0.10 - 0.12)
3	1995 - 2000	348.0	310	367	0.11 (0.10 - 0.12)
4	2001 - 2004	364.0	210	376	0.17 (0.13 - 0.22)
5	2005 - 2008	495.0	215	500	0.23 (0.20 - 0.26)
6	2009 - 2012	723.0	222	708	0.33 (0.28 - 0.37)

	Period	Deaths	Person years (million)	Standardised Deaths (2008 population)	Standardised mortality rate per 100,000 (95% CI)
0	1979 - 1983	2446.223216	247	2540	0.93 (0.72 - 1.15)
1	1984 - 1988	3192.247558	249	3491	1.28 (1.17 - 1.40)
2	1989 - 1994	5453.305493	304	5875	1.80 (1.39 - 2.21)
3	1995 - 2000	8816.424768	310	9298	2.85 (2.42 - 3.27)
4	2001 - 2004	6502.000000	210	6741	3.10 (3.01 - 3.18)
5	2005 - 2008	7368.000000	215	7448	3.42 (3.20 - 3.64)
6	2009 - 2012	8474.000000	222	8290	3.81 (3.68 - 3.93)

	Region	Agegroup	Deaths	Sex	Year	Cause	Population	2008 population	Rate per 100,000 population	Estimated deaths age standardised to 2008 population	Rate per 100,000 (standardised)
Year
1974-01-01	NORTH EAST	ALL AGES	26.0	Male	1974-01-01	IPF	1536100.0	1256065.0	1.692598	21.260133	1.692598
1974-01-01	NORTH EAST	ALL AGES	1.0	Male	1974-01-01	Asbestosis	1536100.0	1256065.0	0.065100	0.817697	0.065100
1974-01-01	NORTH EAST	ALL AGES	12.5	Male	1974-01-01	All Mesothelioma	1536100.0	1256065.0	0.813749	10.221218	0.813749
1974-01-01	NORTH EAST	UNDER 25	0.0	Male	1974-01-01	IPF	615500.0	405899.0	0.000000	0.000000	0.000000
1974-01-01	NORTH EAST	25-34	0.0	Male	1974-01-01	IPF	203300.0	149126.0	0.000000	0.000000	0.000000