In [18]:
# ***Chris Barber concieved of and carried out this analysis first (I am replicating it here and any bugs are my own)***
#Data sources:
#http://www.erswhitebook.org/files/public/Supplementary%20Material/Interstital%20Lung%20Disease/WB_ILD_supplement.xlsx
#http://www.erswhitebook.org/files/public/Supplementary%20Material/Occupational%20Lung%20Disease/WB_occupation_supplement.xlsx
import pandas as pd
%matplotlib inline
from scipy.stats.stats import pearsonr
meso = pd.read_csv('../data/ers_whitebook/eu_meso_mort.csv', skiprows=2)
ipf = pd.read_csv('../data/ers_whitebook/eu_ipf_mort.csv', skiprows=2)
meso = meso[['Country', 'Mesothelioma']][:28] #eu per 100,000 age standardised meso mort by country
ipf = ipf[['Country', 'Idiopathic fibrosing alveolitis and other ILD']][:28] #eu per 100,000 age standardised meso mort by country
ipfmeso = pd.merge(ipf,meso).dropna() #this drops Greece because don't have IPF and Meso data for Greece
ipfmeso.columns = ['Country','IPF','Meso']
In [19]:
ipfmeso.head()
Out[19]:
In [20]:
ipfmeso.Country.nunique()
Out[20]:
In [21]:
ipfmeso.plot(kind='scatter', x='Meso', y='IPF')
Out[21]:
In [22]:
ipfmeso.corr(method='pearson')
Out[22]:
In [23]:
pearsonr(ipfmeso.Meso, ipfmeso.IPF)
Out[23]: