In [1]:
#2018-11-26 16:25:37.011223
%load_ext metapack.jupyter.magic
In [2]:
CACHE_DIR='/Users/eric/Library/Application Support/metapack/'
RESOURCE_NAME='beat_demographics'
RESOLVED_URL='file:///Users/eric/proj/virt-proj/data-project/sdrdl-data-projects/sandiego.gov/sandiego.gov-police_regions/notebooks/BeatPopulations.ipynb#beat_demographics'
WORKING_DIR='/Users/eric/proj/virt-proj/data-project/sdrdl-data-projects/sandiego.gov/sandiego.gov-police_regions'
METATAB_DOC='metapack+file:///Users/eric/proj/virt-proj/data-project/sdrdl-data-projects/sandiego.gov/sandiego.gov-police_regions/metadata.csv'
METATAB_WORKING_DIR='/Users/eric/proj/virt-proj/data-project/sdrdl-data-projects/sandiego.gov/sandiego.gov-police_regions'
METATAB_PACKAGE='metapack+file:///Users/eric/proj/virt-proj/data-project/sdrdl-data-projects/sandiego.gov/sandiego.gov-police_regions/'
name='beat_demographics'
description='Counts of people in the beat, by race.'
url='notebooks/BeatPopulations.ipynb#beat_demographics'
In [3]:
METAPACK_BUILDING=True
Link census tract populations, total and by race, into police beats. Attributes population from tracts to beats by the areas of the overlaps. The basic procedure is to find the overlaps between beats and Census tracts, then addign a portion of the population of the tract to the beat, based on the raio of the size of overlap to the size of the tract.
In [4]:
import seaborn as sns
import metapack as mp
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
%matplotlib inline
sns.set_context('notebook')
In [5]:
pkg = mp.jupyter.open_package()
#pkg = mp.jupyter.open_source_package()
pkg
Out[5]:
In [6]:
beats = pkg.resource('pd_beats').geoframe()
# There are beats that are way off in east county. Get rid of them.
rightmost_centroid = beats.centroid.x.sort_values(ascending=False).iloc[:6].max()
beats = beats[beats.centroid.x <rightmost_centroid]
# Convert to EPSG:26911, ( A randomly selected UTM Zone 11N CRS) so area calculations
# will be in square meters, rather than square degrees
beats = beats.to_crs({'init': 'epsg:26911'})
# It looks like the dataset has multiple rows per beat, one feature per row. We need
# it to have one row per beat, with multiple features combined together.
beats = beats.dissolve(by='beat').reset_index()
# Add the area
beats['beat_area'] = beats.area / 1_000_000
beats.plot()
Out[6]:
In [7]:
tracts = pkg.reference('tracts').geoframe()
tracts = tracts.to_crs({'init': 'epsg:26911'})
# Add the area
tracts['tract_area'] = tracts.area / 1_000_000
tracts.plot()
Out[7]:
In [8]:
from rowgenerators import parse_app_url
t = parse_app_url('census://CA/140/B03002').dataframe()
In [9]:
# White, black, asian, etc are all non hispanic.
col_map = {
'B03002_001':'total',
'B03002_003':'white',
'B03002_004':'black',
'B03002_005':'aian',
'B03002_006':'asian',
'B03002_007':'nhopi',
'B03002_012':'hisp'
}
for k,v in list(col_map.items()):
col_map[k+'_m90'] = col_map[k]+'_m90'
race_tracts = t[t.COUNTY=='073'].rename(columns=col_map).reset_index().rename(columns={'GEOID':'geoid'})
In [10]:
race_tracts = race_tracts[['geoid', 'total', 'white', 'black', 'aian', 'asian', 'nhopi', 'hisp']]
race_tracts.titles.head().T
Out[10]:
In [11]:
t = gpd.sjoin(beats, tracts)
ax = t.plot()
beats.centroid.plot(ax=ax, color='red')
t = t[['geoid', 'beat']].drop_duplicates()\
.merge(tracts[['geoid','geometry', 'tract_area']],on='geoid')\
.merge(beats[['beat','geometry', 'beat_area']],on='beat')
In [12]:
intr = gpd.overlay(beats, tracts, how='intersection')[['beat','geoid','geometry']]
intr['intr_area'] = (intr.geometry.area/1_000_000.0).astype(float)
# Get rid of really small intersections
intr = intr[intr.intr_area >= .01]
merged = intr[['beat','geoid', 'intr_area']]\
.merge(tracts[['geoid', 'tract_area']],on='geoid')\
.merge(beats[['beat', 'beat_area']],on='beat')\
.merge(race_tracts, on='geoid')
merged = merged.drop_duplicates(subset=['beat','geoid'])
merged['tract_overlap_proportion'] = merged.intr_area/merged.tract_area
merged['beat_overlap_proportion'] = merged.intr_area/merged.beat_area
# The intersection areas must be smaller than both of the areas being intersected
assert(not any(merged.intr_area > merged.beat_area))
assert(not any(merged.intr_area > merged.tract_area))
# Check that all of the areas of the beats are accounted for
assert(all(merged.groupby('beat').beat_overlap_proportion.sum().round(1) == 1))
merged['total'] = merged.total * merged.tract_overlap_proportion
merged['white'] = merged.white * merged.tract_overlap_proportion
merged['asian'] = merged.asian * merged.tract_overlap_proportion
merged['black'] = merged.black * merged.tract_overlap_proportion
merged['aian'] = merged.aian * merged.tract_overlap_proportion
merged['hisp'] = merged.hisp * merged.tract_overlap_proportion
merged['nhopi'] = merged.nhopi * merged.tract_overlap_proportion
merged.head().T
Out[12]:
In [13]:
beat_demographics = merged.groupby('beat').sum()[['total', 'white', 'black', 'aian', 'asian', 'nhopi', 'hisp']].round()
In [14]:
%mt_materialize beat_demographics '/Users/eric/Library/Application Support/metapack/_materialized_data/sandiego.gov-police_regions-3'
In [15]:
%mt_materialize_all '/Users/eric/Library/Application Support/metapack/_materialized_data/sandiego.gov-police_regions-3'
In [16]:
%mt_show_metatab
In [17]:
%mt_show_libdirs