In [1]:
import requests
from StringIO import StringIO
from numpy import nan as NA
import pandas as pd
import zipfile
import re
%matplotlib inline
import matplotlib.pyplot as plt
import xlrd
import statsmodels.api as sm
from patsy import dmatrices, dmatrix
import scipy as scipy
import numpy as np

In [2]:
ls


datas/
indexplay.ipynb
learningaboutindexing.csv
LICENSE
pa_research_for_stata.csv
pa-research (herrison's conflicted copy 2015-06-17).ipynb
pa-research.ipynb
README.md
requirements.txt
Untitled.ipynb

In [3]:
cd datas/


/home/sam/Dropbox/Documents/pa-research/datas

In [5]:
df = pd.read_csv('T201501PDPI+BNFT.CSV')

In [6]:
df.head()


Out[6]:
SHA PCT PRACTICE BNF CODE BNF NAME ITEMS NIC ACT COST QUANTITY PERIOD
0 Q44 RXA N81646 0102000N0AAABAB Hyoscine Butylbrom_Tab 10mg 1 1.13 1.16 21 201501
1 Q44 RXA N81646 0401010Z0AAAAAA Zopiclone_Tab 7.5mg 15 3.28 4.72 57 201501
2 Q44 RXA N81646 0401020K0AAAHAH Diazepam_Tab 2mg 35 99.91 104.64 2662 201501
3 Q44 RXA N81646 040201060AAALAL Olanzapine_Tab 15mg 3 1.29 1.53 21 201501
4 Q44 RXA N81646 0403010B0AAAHAH Amitriptyline HCl_Tab 25mg 16 1.52 3.20 38 201501

In [ ]: