In [1]:
import numpy as np
import pandas as pd
import re
import datetime
import geopandas as gpd
In [2]:
%pylab inline
In [3]:
allyears = pd.read_csv('https://dl.dropboxusercontent.com/u/39570370/allyears.csv')
searchcodes = pd.read_csv('https://raw.githubusercontent.com/rinina/PCC-Opioid-Project/master/data/searchcodes.csv', dtype=str)
In [4]:
allyears = allyears.dropna(subset=['Substance'])
allyears = allyears[allyears['Medical Outcome'] != 'Confirmed nonexposure']
allyears['Start Date'] = pd.to_datetime(allyears['Start Date'])
allyears = allyears[allyears['Caller Site'] == 'Own residence']
In [5]:
matched = allyears[allyears['Substance'].str.contains('|'.join(searchcodes['Code number'].values))]
In [6]:
zipshp = gpd.read_file('./data/tl_2010_04_zcta500.shp')
In [7]:
population = pd.read_csv('https://raw.githubusercontent.com/rinina/PCC-Opioid-Project/master/data/zipcode_pop.txt', sep='\t')
In [8]:
population['ZIPCODE'] = population['Geo_NAME'].str.extract('(\d\d\d\d\d)')
In [9]:
pop_only = population.groupby('ZIPCODE').sum().rename(columns={'SE_T001_001':'pop'})['pop']
In [10]:
zipshp = zipshp.merge(pd.DataFrame(pop_only), left_on='ZCTA5CE00', right_index=True)
In [11]:
zipcounts = matched.groupby('ZIP Code 5').size()
zipcounts.index = pd.Series(zipcounts.index).astype(int).astype(str)
zipcounts = zipcounts.reset_index()
counts = zipshp.merge(zipcounts, left_on='ZCTA5CE00', right_on='ZIP Code 5').rename(columns={0:'pcc_count'})
In [12]:
counts['calls_100k'] = 100000*counts['pcc_count']/counts['pop']
In [13]:
counts.plot(column='pop', scheme='QUANTILES', k=3, colormap='OrRd')
Out[13]:
In [14]:
counts[counts['calls_100k'] > 115][['ZIP Code 5', 'calls_100k']]
Out[14]:
In [15]:
counts.to_file('PCC-calls-per-100k.shp')