In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('datas/T201501PDPI+BNFT.CSV') #frpm http://datagov.ic.nhs.uk/presentation/2015_01_January/T201501PDPI+BNFT.CSV
df.columns = [x.strip() for x in df.columns] #clean column whitespace
In [3]:
gpdetail = pd.read_csv('datas/epraccur.csv') #from http://systems.hscic.gov.uk/data/ods/datadownloads/data-files/epraccur.zip
gpdetail = gpdetail.icol([1,2,10,26,15]) #throw away columns we don't care about
gpdetail.columns = ['PRACTICE', 'NAME', 'POSTCODE', 'Prescribing Setting', 'CCG']
setting_lookup = {0:'Other' , 1:'WIC Practice' , 2:'OOH Practice' , 3:'WIC + OOH Practice' , 4:'GP Practice' ,
8:'Public Health Service' , 9:'Community Health Service' , 10:'Hospital Service' , 11:'Optometry Service' ,
12:'Urgent & Emergency Care' , 13:'Hospice' , 14:'Care Home / Nursing Home' , 15:'Border Force' ,
16:'Young Offender Institution' , 17:'Secure Training Centre' , 18:'Secure Children\'s Home' , 19:'Immigration Removal Centre' ,
20:'Court' , 21:'Police Custody' , 22:'Sexual Assault Referral Centre\'(SARC)' , 24:'Other – Justice Estate' ,
25:'Prison'}
In [4]:
lans = 'Lansoprazole|Zoton'
omep = 'Omeprzole|Losec'
pant = 'Pantoprazole|Protium'
ppi = lans + '|' + omep + '|' + pant
In [7]:
df = df[df['BNF NAME'].str.contains(ppi)] #throw away everything that's not about a ppi
In [8]:
df = pd.merge(df, gpdetail, on='PRACTICE') #add gp details
In [9]:
df['Prescribing Setting'] = df['Prescribing Setting'].map(setting_lookup.get)
In [10]:
df1 = df[df['BNF NAME'].str.contains(lans)]
df2 = df[df['BNF NAME'].str.contains(omep)]
df3 = df[df['BNF NAME'].str.contains(pant)]
In [11]:
df1['ppi'] = 'lans'
df2['ppi'] = 'omep'
df3['ppi'] = 'pant'
In [12]:
df_list = [df1[['ppi','ITEMS','CCG','Prescribing Setting']], df2[['ppi','ITEMS','CCG','Prescribing Setting']],
df3[['ppi','ITEMS','CCG','Prescribing Setting']]]
In [13]:
df4 = pd.concat(df_list)
In [14]:
df5 = df4.groupby(['ppi']).ITEMS.sum()
In [15]:
df5
Out[15]:
In [16]:
pd.DataFrame(df5).apply(lambda x: 100*x/float(x.sum()))
Out[16]:
In [17]:
df6 = df4.groupby(['Prescribing Setting', 'ppi']).ITEMS.sum()
In [25]:
df6
Out[25]:
In [20]:
pd.DataFrame(df6).to_csv('settingbyppi.csv')
In [21]:
df7 = df4.groupby(['CCG', 'ppi']).ITEMS.sum()
In [26]:
df7
Out[26]:
In [24]:
pd.DataFrame(df7).to_csv('ccgbyppi.csv')
In [ ]: