In [15]:
# ***Chris Barber concieved of and carried out this analysis first (I am replicating it here and any bugs are my own)***
#Data sources:
#http://www.erswhitebook.org/files/public/Supplementary%20Material/Interstital%20Lung%20Disease/WB_ILD_supplement.xlsx
#http://www.erswhitebook.org/files/public/Supplementary%20Material/Occupational%20Lung%20Disease/WB_occupation_supplement.xlsx
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from scipy.stats.stats import pearsonr
meso = pd.read_csv('../data/ers_whitebook/eu_meso_mort.csv', skiprows=2)
ipf = pd.read_csv('../data/ers_whitebook/eu_ipf_mort.csv', skiprows=2)
meso = meso[['Country', 'Mesothelioma']][:28] #eu per 100,000 age standardised meso mort by country
ipf = ipf[['Country', 'Idiopathic fibrosing alveolitis and other ILD']][:28] #eu per 100,000 age standardised meso mort by country
ipfmeso = pd.merge(ipf,meso).dropna() #this drops Greece because don't have IPF and Meso data for Greece
ipfmeso.columns = ['Country','Idiopathic pulmonary fibrosis','Mesothelioma']
In [16]:
ipfmeso.head()
Out[16]:
In [17]:
ipfmeso.Country.nunique()
Out[17]:
In [29]:
ax = ipfmeso.plot(kind='scatter', x='Mesothelioma', y='Idiopathic pulmonary fibrosis', ylim=(0,7), xlim=(0,3), figsize=(7, 7), title='European age-standardised mortality for Mesothelioma and \n Idiopathic pulmonary fibrosis (rate per 100,000)')
fig = ax.get_figure()
fig.savefig('.././fig/EuropeanMortalityMesotheliomaIPF.png')
In [22]:
ipfmeso.corr(method='pearson')
Out[22]:
In [23]:
pearsonr(ipfmeso.Meso, ipfmeso.IPF)
Out[23]: