In [1]:
#2018-11-26 16:25:37.011223
%load_ext metapack.jupyter.magic

In [2]:
CACHE_DIR='/Users/eric/Library/Application Support/metapack/'
RESOURCE_NAME='beat_demographics'
RESOLVED_URL='file:///Users/eric/proj/virt-proj/data-project/sdrdl-data-projects/sandiego.gov/sandiego.gov-police_regions/notebooks/BeatPopulations.ipynb#beat_demographics'
WORKING_DIR='/Users/eric/proj/virt-proj/data-project/sdrdl-data-projects/sandiego.gov/sandiego.gov-police_regions'
METATAB_DOC='metapack+file:///Users/eric/proj/virt-proj/data-project/sdrdl-data-projects/sandiego.gov/sandiego.gov-police_regions/metadata.csv'
METATAB_WORKING_DIR='/Users/eric/proj/virt-proj/data-project/sdrdl-data-projects/sandiego.gov/sandiego.gov-police_regions'
METATAB_PACKAGE='metapack+file:///Users/eric/proj/virt-proj/data-project/sdrdl-data-projects/sandiego.gov/sandiego.gov-police_regions/'
name='beat_demographics'
description='Counts of people in the beat, by race.'
url='notebooks/BeatPopulations.ipynb#beat_demographics'

In [3]:
METAPACK_BUILDING=True

Beat Populations

Link census tract populations, total and by race, into police beats. Attributes population from tracts to beats by the areas of the overlaps. The basic procedure is to find the overlaps between beats and Census tracts, then addign a portion of the population of the tract to the beat, based on the raio of the size of overlap to the size of the tract.


In [4]:
import seaborn as sns
import metapack as mp
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display 

%matplotlib inline
sns.set_context('notebook')

In [5]:
pkg = mp.jupyter.open_package()
#pkg = mp.jupyter.open_source_package()
pkg


Out[5]:

San Diego Police Regions

sandiego.gov-police_regions-3 Last Update: 2018-11-27T00:25:36

Boundary shapes for San Diego neighborhoods, beats and divisions.

Documentation Links

Contacts

Resources

References

  • tracts. Census tracts from 2016 5 year ACS, for San Diego county
  • race. Race, by tract, in San Diego county

In [6]:
beats = pkg.resource('pd_beats').geoframe()

# There are  beats that are way off in east county. Get rid of them.
rightmost_centroid = beats.centroid.x.sort_values(ascending=False).iloc[:6].max()

beats = beats[beats.centroid.x <rightmost_centroid]

# Convert to EPSG:26911, ( A randomly selected UTM Zone 11N CRS) so area calculations 
# will be in square meters, rather than square degrees
beats = beats.to_crs({'init': 'epsg:26911'})

# It looks like the dataset has multiple rows per beat, one feature per row. We need
# it to have one row per beat, with multiple features combined together. 
beats = beats.dissolve(by='beat').reset_index()

#  Add the area
beats['beat_area'] = beats.area / 1_000_000

beats.plot()


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x11ee77a58>

In [7]:
tracts = pkg.reference('tracts').geoframe()

tracts = tracts.to_crs({'init': 'epsg:26911'})

#  Add the area
tracts['tract_area'] = tracts.area / 1_000_000


tracts.plot()


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x108bb6198>

In [8]:
from rowgenerators import parse_app_url
t = parse_app_url('census://CA/140/B03002').dataframe()

In [9]:
# White, black, asian, etc are all non hispanic. 
col_map = {
    'B03002_001':'total',
    'B03002_003':'white',
    'B03002_004':'black',
    'B03002_005':'aian',
    'B03002_006':'asian',
    'B03002_007':'nhopi', 
    'B03002_012':'hisp'
    
}

for k,v in list(col_map.items()):
    col_map[k+'_m90'] = col_map[k]+'_m90'
    
race_tracts = t[t.COUNTY=='073'].rename(columns=col_map).reset_index().rename(columns={'GEOID':'geoid'})

In [10]:
race_tracts = race_tracts[['geoid', 'total', 'white', 'black', 'aian', 'asian', 'nhopi', 'hisp']]
race_tracts.titles.head().T


Out[10]:
0 1 2 3 4
GEOID 14000US06073000100 14000US06073000201 14000US06073000202 14000US06073000300 14000US06073000400
total 2773 2158 4828 4946 3916
total_m90 185 241 415 405 306
white 2276 1628 3477 3437 2655
white_m90 222 236 417 326 331
black 0 0 37 177 52
black_m90 12 12 50 138 66
aian 0 8 39 0 0
aian_m90 12 12 56 12 12
asian 84 84 368 196 475
asian_m90 51 59 218 121 210
nhopi 0 0 0 8 0
nhopi_m90 12 12 12 16 12
hisp 290 389 767 886 649
hisp_m90 102 326 205 294 293

In [11]:
t = gpd.sjoin(beats, tracts)

ax = t.plot()
beats.centroid.plot(ax=ax, color='red')

t = t[['geoid', 'beat']].drop_duplicates()\
    .merge(tracts[['geoid','geometry', 'tract_area']],on='geoid')\
    .merge(beats[['beat','geometry', 'beat_area']],on='beat')



In [12]:
intr = gpd.overlay(beats, tracts, how='intersection')[['beat','geoid','geometry']]

intr['intr_area'] = (intr.geometry.area/1_000_000.0).astype(float)

# Get rid of really small intersections
intr = intr[intr.intr_area >= .01] 

merged = intr[['beat','geoid', 'intr_area']]\
    .merge(tracts[['geoid', 'tract_area']],on='geoid')\
    .merge(beats[['beat', 'beat_area']],on='beat')\
    .merge(race_tracts, on='geoid')

merged = merged.drop_duplicates(subset=['beat','geoid'])

merged['tract_overlap_proportion'] = merged.intr_area/merged.tract_area
merged['beat_overlap_proportion'] = merged.intr_area/merged.beat_area

# The intersection areas must be smaller than both of the areas being intersected
assert(not any(merged.intr_area > merged.beat_area))
assert(not any(merged.intr_area > merged.tract_area))

# Check that all of the areas of the beats are accounted for
assert(all(merged.groupby('beat').beat_overlap_proportion.sum().round(1) == 1))

merged['total'] = merged.total * merged.tract_overlap_proportion
merged['white'] = merged.white * merged.tract_overlap_proportion
merged['asian'] = merged.asian * merged.tract_overlap_proportion
merged['black'] = merged.black * merged.tract_overlap_proportion
merged['aian']  = merged.aian * merged.tract_overlap_proportion
merged['hisp']  = merged.hisp * merged.tract_overlap_proportion
merged['nhopi']  = merged.nhopi * merged.tract_overlap_proportion

merged.head().T


Out[12]:
0 1 2 3 4
beat 0 721 0 0 511
geoid 14000US06073021900 14000US06073021900 14000US06073021600 14000US06073003800 14000US06073003800
intr_area 0.183666 0.0228637 0.645752 0.0366767 1.77264
tract_area 10.6162 10.6162 15.2322 1.82267 1.82267
beat_area 18.2475 7.63003 18.2475 18.2475 6.80108
total 90.741 11.2959 155.544 133.613 6457.75
total_m90 1284 1284 376 1040 1040
white 29.4973 3.67198 105.137 68.8994 3330.02
white_m90 690 690 281 468 468
black 14.6016 1.81769 8.05486 26.6422 1287.66
black_m90 384 384 109 328 328
aian 0.899625 0.11199 0.127182 1.04637 50.5728
aian_m90 69 69 6 54 54
asian 9.39416 1.16944 5.08728 10.5039 507.673
asian_m90 293 293 72 188 188
nhopi 0.70932 0.0882999 1.05985 0.523185 25.2864
nhopi_m90 42 42 27 29 29
hisp 34.255 4.26424 31.8803 24.5696 1187.49
hisp_m90 352 352 248 394 394
tract_overlap_proportion 0.0173005 0.00215366 0.042394 0.0201225 0.972553
beat_overlap_proportion 0.0100653 0.00299654 0.0353886 0.00200996 0.260641

In [13]:
beat_demographics = merged.groupby('beat').sum()[['total', 'white', 'black', 'aian', 'asian', 'nhopi', 'hisp']].round()

In [14]:
%mt_materialize beat_demographics '/Users/eric/Library/Application Support/metapack/_materialized_data/sandiego.gov-police_regions-3'


{
    "df_name": "beat_demographics",
    "path": "/Users/eric/Library/Application Support/metapack/_materialized_data/sandiego.gov-police_regions-3/beat_demographics.csv"
}

In [15]:
%mt_materialize_all '/Users/eric/Library/Application Support/metapack/_materialized_data/sandiego.gov-police_regions-3'


[]

In [16]:
%mt_show_metatab


Declare: metatab-latest
Section: Resources

In [17]:
%mt_show_libdirs


[]