In [19]:
import shapefile
from pandas import DataFrame
from geopandas import GeoDataFrame
from shapely.geometry import Point
# read the shapefile
reader = shapefile.Reader("../philly/STR_Centerline/Street_Centerline.shp")
fields = reader.fields[1:]
field_names = [field[0] for field in fields]
buffer = []
for sr in reader.shapeRecords():
atr = dict(zip(field_names, sr.record))
geom = sr.shape.__geo_interface__
buffer.append(dict(type="Feature", \
geometry=geom, properties=atr))
# write the GeoJSON file
buffer = str(buffer)
from json import dumps
gjson = open("../philly/GeoJSON_data/Street_Centerline.json", "w+")
gjson.write(dumps({"type": "FeatureCollection",\
"features": buffer}, indent=2) + "\n")
gjson.close()
In [1]:
%pylab inline
import geopandas as gpd
from geopandas.tools import sjoin
import pandas as pd
from IPython.display import display
from pandas import DataFrame
from geopandas import GeoDataFrame
from shapely.geometry import Point
Populating the interactive namespace from numpy and matplotlib
In [37]:
# street_gpd_2.crs = fiona.crs.from_epsg(102729)
street_gpd_2.crs = {'init':'epsg:102729'}
In [3]:
import geojson, json
In [2]:
street_gpd = gpd.read_file('../philly/GeoJSON_data/Street_Centerline_qgis3.geojson')
street_gpd.plot()
print(street_gpd.shape)
street_gpd.head().T
(41022, 31)
Out[2]:
0
1
2
3
4
CLASS
3
3
3
5
4
FNODE_
2
2
1
6
5
LENGTH
449.863
540.083
446.104
447.261
148.216
LPOLY_
0
0
0
0
0
L_F_ADD
1500
400
1600
1600
350
L_HUNDRED
1500
400
1600
1600
300
L_T_ADD
1598
498
1698
1698
398
MULTI_REP
0
0
0
0
0
NEWSEGDATE
None
None
None
None
None
ONEWAY
FT
TF
FT
TF
TF
PRE_DIR
None
N
None
None
N
RESPONSIBL
FAM
FAM
FAM
None
None
RPOLY_
0
0
0
0
0
R_F_ADD
1501
401
1601
1601
351
R_HUNDRED
1500
400
1600
1600
300
R_T_ADD
1599
499
1699
1699
399
SEG_ID
420708
422065
420702
420732
420718
SHAPE_LEN
449.863
540.083
446.104
447.261
148.216
STCL2_
1
2
3
4
5
STCL2_ID
85205
86540
85199
85229
85215
STNAME
CALLOWHILL ST
N 15TH ST
CALLOWHILL ST
CARLTON ST
N 17TH ST
STREETLABE
CALLOWHILL ST
N 15TH ST
CALLOWHILL ST
CARLTON ST
N 17TH ST
ST_CODE
20880
88070
20880
21440
88110
ST_NAME
CALLOWHILL
15TH
CALLOWHILL
CARLTON
17TH
ST_TYPE
ST
ST
ST
ST
ST
SUF_DIR
None
None
None
None
None
TNODE_
1
3
4
5
4
UPDATE_
1997-02-10
1997-02-10
1997-02-10
1997-02-10
1997-06-27
ZIP_LEFT
19130
19130
19130
19103
19103
ZIP_RIGHT
19130
19130
19130
19103
19103
geometry
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16529687686355 39.960013687935...
LINESTRING (-75.16537539004391 39.959601782423...
LINESTRING (-75.16694872703492 39.959805718080...
In [5]:
collision_gpd = gpd.read_file('../philly/GeoJSON_data/GIS_HEALTH.Collisions_crash_2011_2014PUBV.geojson')
print(collision_gpd.shape)
(43488, 54)
In [6]:
collision_gpd.head().T
Out[6]:
0
1
2
3
4
AUTOMOBILE_COUNT
0
1
1
1
0
BELTED_DEATH_COUNT
0
0
0
0
0
BELTED_MAJ_INJ_COUNT
0
0
0
0
0
BICYCLE_COUNT
0
1
1
0
0
BICYCLE_DEATH_COUNT
0
0
0
0
0
BICYCLE_MAJ_INJ_COUNT
0
0
0
0
0
BUS_COUNT
0
0
0
0
0
COLLISION_TYPE
1
4
4
8
8
COMM_VEH_COUNT
1
0
0
0
0
CRASH_MONTH
8
8
8
8
8
CRASH_YEAR
2014
2014
2014
2014
2014
CRN
2014087880
2014087758
2014096425
2014088141
2014091439
DAY_OF_WEEK
6
6
7
7
7
FATAL_COUNT
0
0
0
0
0
HEAVY_TRUCK_COUNT
1
0
0
0
0
HOUR_OF_DAY
15
14
19
19
19
ILLUMINATION
1
1
1
1
1
INJURY_COUNT
5
1
1
1
1
INTERSECTION
0
0
1
0
0
INTERSECT_TYPE
0
0
1
0
0
LATITUDE
40.085
39.9904
40.0293
39.9535
40.0162
LENGTH
439.229
778.344
170.622
194.532
396.315
LOCATION_TYPE
0
0
0
0
0
LONGITUDE
-75.038
-75.1028
-75.0548
-75.2405
-75.0906
MAJ_INJ_COUNT
0
0
0
0
0
MAX_SEVERITY_LEVEL
4
4
3
4
8
MCYCLE_DEATH_COUNT
0
0
0
0
0
MCYCLE_MAJ_INJ_COUNT
0
0
0
0
0
MIN_INJ_COUNT
5
1
0
1
0
MOD_INJ_COUNT
0
0
1
0
0
MOTORCYCLE_COUNT
0
0
0
0
0
OBJECTID
4001
4002
4003
4004
4005
PED_COUNT
0
0
0
1
1
PED_DEATH_COUNT
0
0
0
0
0
PED_MAJ_INJ_COUNT
0
0
0
0
0
PERSON_COUNT
7
3
2
2
2
RELATION_TO_ROAD
1
1
1
1
1
ROAD_CONDITION
1
0
0
0
0
SCH_BUS_IND
N
N
N
N
N
SCH_ZONE_IND
N
N
U
U
N
SEG_ID
960283
541001
760684
300613
640755
SMALL_TRUCK_COUNT
0
0
0
0
0
SUV_COUNT
2
0
0
0
1
TCD_TYPE
0
0
0
0
0
TIME_OF_DAY
1500
1450
1919
1910
1905
UNBELTED_OCC_COUNT
0
0
0
0
0
UNB_DEATH_COUNT
0
0
0
0
0
UNB_MAJ_INJ_COUNT
0
0
0
0
0
UNK_INJ_DEG_COUNT
0
0
0
0
1
UNK_INJ_PER_COUNT
0
0
0
0
0
VAN_COUNT
0
0
0
0
0
VEHICLE_COUNT
3
1
1
1
1
WEATHER
2
1
1
1
1
geometry
POINT (-75.03795178519658 40.08497541304385)
POINT (-75.10275539598592 39.990404578965)
POINT (-75.05481628915624 40.02929723040301)
POINT (-75.24050521636346 39.95347871767484)
POINT (-75.09056881169107 40.01618353851767)
In [21]:
collision_gpd[collision_gpd.SEG_ID==420708].T
Out[21]:
6682
11674
21114
33351
34755
36630
36938
AUTOMOBILE_COUNT
2
1
2
1
2
2
0
BELTED_DEATH_COUNT
0
0
0
0
0
0
0
BELTED_MAJ_INJ_COUNT
0
0
0
0
0
0
0
BICYCLE_COUNT
0
1
0
0
0
0
1
BICYCLE_DEATH_COUNT
0
0
0
0
0
0
0
BICYCLE_MAJ_INJ_COUNT
0
0
0
0
0
0
0
BUS_COUNT
0
0
0
0
0
0
0
COLLISION_TYPE
4
4
4
4
4
4
4
COMM_VEH_COUNT
0
0
0
0
0
0
0
CRASH_MONTH
10
10
6
1
6
10
10
CRASH_YEAR
2013
2013
2013
2012
2011
2011
2011
CRN
2013114450
2013111794
2013079452
2012015372
2011069166
2011115373
2011115250
DAY_OF_WEEK
3
6
4
1
5
4
6
FATAL_COUNT
0
0
0
0
0
0
0
HEAVY_TRUCK_COUNT
0
0
0
0
0
0
0
HOUR_OF_DAY
13
6
13
13
15
16
13
ILLUMINATION
1
1
1
1
1
1
1
INJURY_COUNT
0
1
0
0
0
3
1
INTERSECTION
1
1
1
1
1
1
0
INTERSECT_TYPE
1
1
1
1
1
1
0
LATITUDE
39.9599
39.9599
39.9599
39.9599
39.9599
39.9599
39.9599
LENGTH
449.863
449.863
449.863
449.863
449.863
449.863
449.863
LOCATION_TYPE
0
0
0
0
0
0
0
LONGITUDE
-75.1645
-75.1645
-75.1645
-75.1645
-75.1645
-75.1645
-75.1645
MAJ_INJ_COUNT
0
0
0
0
0
0
0
MAX_SEVERITY_LEVEL
0
8
0
0
0
3
8
MCYCLE_DEATH_COUNT
0
0
0
0
0
0
0
MCYCLE_MAJ_INJ_COUNT
0
0
0
0
0
0
0
MIN_INJ_COUNT
0
0
0
0
0
2
0
MOD_INJ_COUNT
0
0
0
0
0
1
0
MOTORCYCLE_COUNT
0
0
0
0
0
0
0
OBJECTID
11683
7675
21115
35352
33756
37631
37939
PED_COUNT
0
0
0
0
0
0
0
PED_DEATH_COUNT
0
0
0
0
0
0
0
PED_MAJ_INJ_COUNT
0
0
0
0
0
0
0
PERSON_COUNT
3
2
2
3
4
3
2
RELATION_TO_ROAD
1
1
1
1
1
1
1
ROAD_CONDITION
0
1
0
0
0
0
0
SCH_BUS_IND
N
N
N
N
N
N
N
SCH_ZONE_IND
N
U
N
N
N
U
N
SEG_ID
420708
420708
420708
420708
420708
420708
420708
SMALL_TRUCK_COUNT
0
0
0
0
0
0
0
SUV_COUNT
0
0
0
1
0
0
0
TCD_TYPE
2
3
2
2
3
2
0
TIME_OF_DAY
1358
640
1300
1315
1530
1618
1345
UNBELTED_OCC_COUNT
0
0
2
0
0
0
0
UNB_DEATH_COUNT
0
0
0
0
0
0
0
UNB_MAJ_INJ_COUNT
0
0
0
0
0
0
0
UNK_INJ_DEG_COUNT
0
1
0
0
0
0
1
UNK_INJ_PER_COUNT
0
0
0
0
0
0
0
VAN_COUNT
0
0
0
0
0
0
1
VEHICLE_COUNT
2
1
2
2
2
2
1
WEATHER
1
2
1
1
1
1
1
geometry
POINT (-75.16450649969386 39.95992340269341)
POINT (-75.16450649969386 39.95992340269341)
POINT (-75.16450649969386 39.95992340269341)
POINT (-75.16450649969386 39.95992340269341)
POINT (-75.16450649969386 39.95992340269341)
POINT (-75.16450649969386 39.95992340269341)
POINT (-75.16450649969386 39.95992340269341)
In [28]:
none_seg_count = 0
for seg in collision_gpd.SEG_ID:
if seg is None:
none_seg_count += 1
print(none_seg_count)
0
In [19]:
for col_name in collision_gpd.columns:
if ('_COUNT' in col_name) or ('_TYPE' in col_name) or ('CRASH' in col_name) or (col_name == 'SEG_ID'):
print(col_name)
AUTOMOBILE_COUNT
BELTED_DEATH_COUNT
BELTED_MAJ_INJ_COUNT
BICYCLE_COUNT
BICYCLE_DEATH_COUNT
BICYCLE_MAJ_INJ_COUNT
BUS_COUNT
COLLISION_TYPE
COMM_VEH_COUNT
CRASH_MONTH
CRASH_YEAR
FATAL_COUNT
HEAVY_TRUCK_COUNT
INJURY_COUNT
INTERSECT_TYPE
LOCATION_TYPE
MAJ_INJ_COUNT
MCYCLE_DEATH_COUNT
MCYCLE_MAJ_INJ_COUNT
MIN_INJ_COUNT
MOD_INJ_COUNT
MOTORCYCLE_COUNT
PED_COUNT
PED_DEATH_COUNT
PED_MAJ_INJ_COUNT
PERSON_COUNT
SEG_ID
SMALL_TRUCK_COUNT
SUV_COUNT
TCD_TYPE
UNBELTED_OCC_COUNT
UNB_DEATH_COUNT
UNB_MAJ_INJ_COUNT
UNK_INJ_DEG_COUNT
UNK_INJ_PER_COUNT
VAN_COUNT
VEHICLE_COUNT
In [6]:
street_gpd[street_gpd.SEG_ID==960283]
Out[6]:
CLASS
FNODE_
LENGTH
LPOLY_
L_F_ADD
L_HUNDRED
L_T_ADD
MULTI_REP
NEWSEGDATE
ONEWAY
...
STREETLABE
ST_CODE
ST_NAME
ST_TYPE
SUF_DIR
TNODE_
UPDATE_
ZIP_LEFT
ZIP_RIGHT
geometry
22464
2
16299
439.229039
0
9400
9400
9498
0
None
B
...
BUSTLETON AVE
20020
BUSTLETON
AVE
None
16543
1998-10-08
19115
19115
LINESTRING (-75.038401134496 40.08447396565553...
1 rows × 31 columns
In [7]:
collision_gpd[collision_gpd.CRASH_YEAR=='2014'].shape
Out[7]:
(10627, 54)
In [7]:
collision_gpd.groupby(collision_gpd.CRASH_YEAR).size()
Out[7]:
CRASH_YEAR
2011 10668
2012 11196
2013 10997
2014 10627
dtype: int64
In [9]:
def haversine(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
from math import radians, cos, sin, asin, sqrt
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
km = 6367 * c
m = km *1000
return m
def ptfromln(pt, ln):
n_pt = ln.interpolate(ln.project(pt))
lon1, lat1 = n_pt.coords[0]
lon2, lat2 = pt.coords[0]
return haversine(lon1, lat1, lon2, lat2)
def pts2seg(pts, gp_segs, buffer_dis=50, near_dis_thres=5):
pts_crs,gp_segs_crs = pts.to_crs(epsg=3559), gp_segs.to_crs(epsg=3559)
pts_crs_bfr = pts_crs.copy()
pts_crs_bfr.geometry = pts_crs_bfr.buffer(near_dis_thres*1.1)
close_jn = gpd.tools.sjoin(pts_crs_bfr, gp_segs_crs)[['OBJECTID','SEG_ID_right']]
handledid = set(pd.unique(close_jn.OBJECTID))
mask = (~pts_crs_bfr.OBJECTID.isin(handledid))
far_jns = []
while pts_crs_bfr[mask].shape[0]!=0:
pts_crs_bfr.loc[mask, 'geometry'] = pts_crs_bfr[mask].buffer(buffer_dis)
jn = gpd.tools.sjoin(pts_crs_bfr[mask], gp_segs_crs)[['OBJECTID','SEG_ID_right']]
far_jns.append(jn)
handledid |= set(pd.unique(jn.OBJECTID))
mask = (~pts_crs_bfr.OBJECTID.isin(handledid))
far_jns = pd.concat(far_jns)
mr = pd.merge(gp_segs[['geometry','SEG_ID']],far_jns , left_on='SEG_ID', right_on='SEG_ID_right')
#mr = pd.merge(pts[['OBJECTID','geometry','SEG_ID']],mr, left_on='OBJECTID', right_on='OBJECTID_left')
mr['dis']=mr.apply(lambda x: ptfromln(x.geometry_x, x.geometry_y),axis=1)
result = close_jn.groupby('OBJECTID')['SEG_ID_right'].apply(list).append(mr.groupby('OBJECTID').apply(lambda x: [x.ix[x.dis.idxmin()].SEG_ID_y]))
return pd.DataFrame(result, columns=['segid'])
In [8]:
philly_seg_stats = street_gpd[['SEG_ID', 'SHAPE_LEN', 'geometry']]
philly_seg_stats.shape
Out[8]:
(41022, 3)
In [49]:
collision_pt_seg = pts2seg(collision_gpd, philly_seg_stats)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-49-07a5db01d260> in <module>()
----> 1 collision_pt_seg = pts2seg(collision_gpd, philly_seg_stats)
<ipython-input-45-242f3563623a> in pts2seg(pts, gp_segs, buffer_dis, near_dis_thres)
32 while pts_crs_bfr[mask].shape[0]!=0:
33 pts_crs_bfr.loc[mask, 'geometry'] = pts_crs_bfr[mask].buffer(buffer_dis)
---> 34 jn = gpd.tools.sjoin(pts_crs_bfr[mask], gp_segs_crs)[['OBJECTID','SEG_ID_right']]
35 far_jns.append(jn)
36 handledid |= set(pd.unique(jn.OBJECTID))
//anaconda/lib/python3.5/site-packages/geopandas/tools/sjoin.py in sjoin(left_df, right_df, how, op, lsuffix, rsuffix)
55 idxmatch = idxmatch[idxmatch.apply(len) > 0]
56
---> 57 r_idx = np.concatenate(idxmatch.values)
58 l_idx = np.concatenate([[i] * len(v) for i, v in idxmatch.iteritems()])
59
ValueError: need at least one array to concatenate
In [12]:
philly_seg_buffer_gp = street_gpd[['SEG_ID','STCL2_ID','geometry']].copy()
philly_seg_buffer_gp.geometry = philly_seg_buffer_gp.buffer(0.0001)
collision_buffer_gp = collision_gpd[['SEG_ID','OBJECTID', 'geometry','CRASH_YEAR']].copy()
collision_buffer_gp.geometry = collision_buffer_gp.buffer(0.0001)
joined = sjoin(collision_buffer_gp, philly_seg_buffer_gp, how='left', op='intersects')
In [91]:
philly_seg_buffer_gp = street_gpd[['SEG_ID','STCL2_ID','geometry']].copy()
philly_seg_buffer_gp.geometry = philly_seg_buffer_gp.buffer(0.0001)
In [42]:
print(philly_seg_buffer_gp.shape)
philly_seg_buffer_gp.head().T
(41022, 3)
Out[42]:
0
1
2
3
4
SEG_ID
420708
422065
420702
420732
420718
STCL2_ID
85205
86540
85199
85229
85215
geometry
POLYGON ((2692954.035913117 238893.638507401, ...
POLYGON ((2693439.911167583 239131.2080263407,...
POLYGON ((2692511.692965856 238951.4471188142,...
POLYGON ((2692493.394445261 238804.3647359097,...
POLYGON ((2692511.692879579 238951.4472303169,...
In [55]:
collision_buffer_gp = collision_gpd[['SEG_ID','OBJECTID','geometry','CRASH_YEAR']].copy()
collision_buffer_gp.geometry = collision_buffer_gp.buffer(0.0001)
print(collision_buffer_gp.shape)
collision_buffer_gp.head().T
(43488, 4)
Out[55]:
0
1
2
3
4
SEG_ID
960283
541001
760684
300613
640755
OBJECTID
4001
4002
4003
4004
4005
geometry
POLYGON ((-75.03785178519658 40.08497541304385...
POLYGON ((-75.10265539598592 39.990404578965, ...
POLYGON ((-75.05471628915623 40.02929723040301...
POLYGON ((-75.24040521636346 39.95347871767484...
POLYGON ((-75.09046881169107 40.01618353851767...
CRASH_YEAR
2014
2014
2014
2014
2014
In [13]:
joined.shape
Out[13]:
(67230, 7)
In [47]:
joined = sjoin(collision_buffer_gp, philly_seg_buffer_gp, how='left', op='intersects')
In [48]:
joined.head().T
Out[48]:
0
1
2
3
4
SEG_ID_left
960283
541001
760684
300613
640755
OBJECTID
4001
4002
4003
4004
4005
geometry
POLYGON ((-75.03785178519658 40.08497541304385...
POLYGON ((-75.10265539598592 39.990404578965, ...
POLYGON ((-75.05471628915623 40.02929723040301...
POLYGON ((-75.24040521636346 39.95347871767484...
POLYGON ((-75.09046881169107 40.01618353851767...
CRASH_YEAR
2014
2014
2014
2014
2014
index_right
22464
14639
27112
11124
8369
SEG_ID_right
960283
541001
760684
300613
640755
STCL2_ID
1993
1001
9617
82230
4375
In [52]:
street_gpd.crs
Out[52]:
{'init': 'epsg:4326'}
In [53]:
collision_gpd.crs
Out[53]:
{'init': 'epsg:4326'}
In [49]:
joined.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 67230 entries, 0 to 43487
Data columns (total 7 columns):
SEG_ID_left 66695 non-null float64
OBJECTID 67230 non-null int64
geometry 67230 non-null object
CRASH_YEAR 67230 non-null object
index_right 66694 non-null float64
SEG_ID_right 66694 non-null float64
STCL2_ID 66694 non-null float64
dtypes: float64(4), int64(1), object(2)
memory usage: 4.1+ MB
In [17]:
street_gpd.plot()
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x14269c5f8>
In [50]:
join_gb = joined.reset_index().groupby('OBJECTID').count()
In [51]:
join_gb.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 43488 entries, 1 to 43488
Data columns (total 7 columns):
index 43488 non-null int64
SEG_ID_left 43488 non-null int64
geometry 43488 non-null int64
CRASH_YEAR 43488 non-null int64
index_right 43488 non-null int64
SEG_ID_right 43488 non-null int64
STCL2_ID 43488 non-null int64
dtypes: int64(7)
memory usage: 2.7 MB
In [21]:
joined.shape
Out[21]:
(67230, 7)
In [29]:
join_gb.shape
Out[29]:
(43488, 7)
In [52]:
print(join_gb[join_gb.SEG_ID_right==0].shape[0], ' points spatially match no segment')
print(join_gb[join_gb.SEG_ID_right==1].shape[0], ' points spatially match 1 segment')
print(join_gb[join_gb.SEG_ID_right>1].shape[0], ' points spatially match multiple segments')
536 points spatially match no segment
31630 points spatially match 1 segment
11322 points spatially match multiple segments
In [31]:
join_gb.head().T
Out[31]:
OBJECTID
1
2
3
4
5
index
1
2
1
1
1
SEG_ID_left
1
2
1
1
1
geometry
1
2
1
1
1
CRASH_YEAR
1
2
1
1
1
index_right
1
2
1
1
1
SEG_ID_right
1
2
1
1
1
STCL2_ID
1
2
1
1
1
In [41]:
philly_seg_stats.head().T
Out[41]:
0
1
2
3
4
SEG_ID
420708
422065
420702
420732
420718
SHAPE_LEN
449.863
540.083
446.104
447.261
148.216
geometry
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16529687686355 39.960013687935...
LINESTRING (-75.16537539004391 39.959601782423...
LINESTRING (-75.16694872703492 39.959805718080...
In [54]:
join_df = joined[['SEG_ID_left', 'OBJECTID']]
In [62]:
join_df.columns = ['SEG_ID', 'OBJECTID']
join_df.head().T
join_df.info()
philly_seg_stats.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 67230 entries, 0 to 43487
Data columns (total 2 columns):
SEG_ID 66695 non-null float64
OBJECTID 67230 non-null int64
dtypes: float64(1), int64(1)
memory usage: 1.5 MB
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 41022 entries, 0 to 41021
Data columns (total 3 columns):
SEG_ID 41022 non-null int64
SHAPE_LEN 41022 non-null float64
geometry 41022 non-null object
dtypes: float64(1), int64(1), object(1)
memory usage: 961.5+ KB
In [77]:
join_df_numeric = join_df[['SEG_ID', 'OBJECTID']].dropna().astype(int)
In [78]:
join_df_numeric.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 66695 entries, 0 to 43487
Data columns (total 2 columns):
SEG_ID 66695 non-null int64
OBJECTID 66695 non-null int64
dtypes: int64(2)
memory usage: 1.5 MB
In [135]:
collision_count = pd.DataFrame(join_df_numeric.groupby(['SEG_ID']).size().reset_index())
collision_count.columns = ['SEG_ID', 'collision_count']
In [136]:
collision_count.head().T
Out[136]:
0
1
2
3
4
SEG_ID
100002
100003
100006
100007
100008
collision_count
1
15
1
1
1
In [137]:
collision_count[collision_count.SEG_ID == 100003]
Out[137]:
SEG_ID
collision_count
1
100003
15
In [134]:
joined.shape
Out[134]:
(67230, 7)
In [141]:
philly_collision_stats = philly_seg_stats.merge(collision_count, on = 'SEG_ID', how = 'inner')
In [142]:
philly_collision_stats.shape
Out[142]:
(13186, 4)
In [152]:
philly_collision_stats['collision_count/length'] = philly_collision_stats.collision_count/philly_collision_stats.SHAPE_LEN
In [153]:
philly_collision_stats.head().T
Out[153]:
0
1
2
3
4
SEG_ID
420708
422065
420702
420732
420696
SHAPE_LEN
449.863
540.083
446.104
447.261
319.5
geometry
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16529687686355 39.960013687935...
LINESTRING (-75.16537539004391 39.959601782423...
LINESTRING (-75.16686813417091 39.960207835856...
collision_count
7
1
4
1
2
collsion_count/length
0.0155603
0.00185157
0.00896651
0.00223583
0.00625978
collision_count_norm
0.333648
0.111216
0.258235
0.111216
0.176273
collision_count/length
0.0155603
0.00185157
0.00896651
0.00223583
0.00625978
In [94]:
def normalize(col_name, df):
col_name_norm = col_name + '_norm'
df[col_name_norm] = df[col_name]+1
df[col_name_norm] = np.log2(df[col_name_norm])
df[col_name_norm] = df[col_name_norm]/df[col_name_norm].max()
In [154]:
normalize('collision_count', philly_collision_stats)
normalize('collision_count/length', philly_collision_stats)
In [155]:
philly_collision_stats.head().T
Out[155]:
0
1
2
3
4
SEG_ID
420708
422065
420702
420732
420696
SHAPE_LEN
449.863
540.083
446.104
447.261
319.5
geometry
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16529687686355 39.960013687935...
LINESTRING (-75.16537539004391 39.959601782423...
LINESTRING (-75.16686813417091 39.960207835856...
collision_count
7
1
4
1
2
collsion_count/length
0.0155603
0.00185157
0.00896651
0.00223583
0.00625978
collision_count_norm
0.333648
0.111216
0.258235
0.111216
0.176273
collision_count/length
0.0155603
0.00185157
0.00896651
0.00223583
0.00625978
collision_count/length_norm
0.00826819
0.000990576
0.00478006
0.00119592
0.00334159
In [157]:
def gradient_color(percent):
min_color = np.array([255,255,255])
max_color = np.array([65,105,255])
return '#%02x%02x%02x' % tuple([int(k) for k in min_color+(max_color-min_color)*percent])
def write_var(col_name, var_name, df,f):
df['color']=df[col_name].apply(gradient_color)
f.write('var %s = %s;\n' % (var_name, df.to_json()))
with open('../visualization/Volumes of Collisions.js','w+') as f:
write_var('collision_count_norm', 'collision_count', philly_collision_stats,f)
write_var('collision_count/length_norm', 'collision_count_norm', philly_collision_stats,f)
In [162]:
philly_collision_stats[philly_collision_stats.SEG_ID==422279]
Out[162]:
SEG_ID
SHAPE_LEN
geometry
collision_count
collsion_count/length
collision_count_norm
collision_count/length
collision_count/length_norm
color
1674
422279
2360.798811
LINESTRING (-75.18428206823198 39.949500166985...
270
0.114368
0.898863
0.114368
0.057987
#f3f6ff
In [165]:
collision_gpd[collision_gpd.SEG_ID==422279.0]
Out[165]:
AUTOMOBILE_COUNT
BELTED_DEATH_COUNT
BELTED_MAJ_INJ_COUNT
BICYCLE_COUNT
BICYCLE_DEATH_COUNT
BICYCLE_MAJ_INJ_COUNT
BUS_COUNT
COLLISION_TYPE
COMM_VEH_COUNT
CRASH_MONTH
...
TIME_OF_DAY
UNBELTED_OCC_COUNT
UNB_DEATH_COUNT
UNB_MAJ_INJ_COUNT
UNK_INJ_DEG_COUNT
UNK_INJ_PER_COUNT
VAN_COUNT
VEHICLE_COUNT
WEATHER
geometry
137
1
0
0
0
0
0
0
7
0
8
...
935
0
0
0
0
0
0
1
1
POINT (-75.18744029819194 39.94735998737308)
957
1
0
0
0
0
0
0
1
0
4
...
1203
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
1321
2
0
0
0
0
0
0
5
0
2
...
106
1
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
1778
1
0
0
0
0
0
0
4
1
3
...
1005
0
0
0
0
1
0
2
2
POINT (-75.18744029819194 39.94735998737308)
2821
2
0
0
0
0
0
0
4
0
12
...
723
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
2876
0
0
0
0
0
0
0
5
0
3
...
2353
4
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
2925
1
0
0
0
0
0
0
1
0
1
...
1041
0
0
0
1
0
1
2
1
POINT (-75.18744029819194 39.94735998737308)
4094
1
0
0
0
0
0
0
5
1
10
...
1134
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
5421
2
0
0
0
0
0
0
1
0
11
...
1425
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
5693
1
0
0
0
0
0
0
1
0
12
...
1328
0
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
5995
3
0
0
0
0
0
0
1
0
9
...
1843
0
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
6425
0
0
0
0
0
0
0
1
0
8
...
1300
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
7077
0
0
0
0
0
0
0
1
0
12
...
1355
0
0
0
0
1
0
2
2
POINT (-75.18744029819194 39.94735998737308)
7806
0
0
0
0
0
0
0
1
0
12
...
1418
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
7955
2
0
0
0
0
0
0
1
0
12
...
1912
0
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
8105
2
0
0
0
0
0
0
1
0
11
...
1519
0
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
8594
1
0
0
0
0
0
0
1
0
11
...
634
0
0
0
0
0
1
2
2
POINT (-75.18744029819194 39.94735998737308)
8620
4
0
0
0
0
0
0
1
0
11
...
2125
0
0
0
0
0
0
4
1
POINT (-75.18744029819194 39.94735998737308)
8683
3
0
0
0
0
0
0
4
0
11
...
2017
0
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
8812
3
0
0
0
0
0
0
1
0
11
...
1742
0
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
8883
1
0
0
0
0
0
0
1
0
10
...
2313
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
9152
2
0
0
0
0
0
0
5
0
3
...
1510
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
9778
2
0
0
0
0
0
0
1
0
10
...
1308
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
10913
1
0
0
0
0
0
0
1
0
8
...
2145
0
0
0
0
1
0
2
1
POINT (-75.18744029819194 39.94735998737308)
11299
2
0
0
0
0
0
0
1
0
1
...
802
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
11750
3
0
0
0
0
0
0
1
0
4
...
1027
0
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
12242
2
0
0
0
0
0
0
1
0
8
...
2338
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
13319
1
0
0
0
0
0
0
7
0
4
...
235
0
0
0
0
0
0
1
1
POINT (-75.18744029819194 39.94735998737308)
14578
4
0
0
0
0
0
0
1
0
4
...
1314
0
0
0
0
1
0
4
1
POINT (-75.18744029819194 39.94735998737308)
14717
0
0
0
0
0
0
0
1
0
8
...
621
0
0
0
0
0
0
2
2
POINT (-75.18744029819194 39.94735998737308)
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
27576
1
0
0
0
0
0
0
1
0
4
...
1051
0
0
0
0
0
1
2
1
POINT (-75.18744029819194 39.94735998737308)
27592
1
0
0
0
0
0
0
7
0
2
...
1049
0
0
0
0
0
0
1
2
POINT (-75.18744029819194 39.94735998737308)
28374
2
0
1
0
0
0
0
1
0
4
...
1514
0
0
0
0
0
0
4
1
POINT (-75.18744029819194 39.94735998737308)
28513
0
0
0
0
0
0
0
1
0
5
...
1248
0
0
0
1
0
1
2
1
POINT (-75.18744029819194 39.94735998737308)
28795
1
0
0
0
0
0
0
1
0
12
...
1935
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
29490
0
0
0
0
0
0
0
7
0
12
...
1549
0
0
0
1
0
0
1
1
POINT (-75.18744029819194 39.94735998737308)
29874
0
0
0
0
0
0
0
7
0
4
...
629
0
0
0
0
0
0
1
1
POINT (-75.18744029819194 39.94735998737308)
29951
2
0
0
0
0
0
0
1
0
1
...
2219
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
30463
1
0
0
0
0
0
0
1
0
12
...
2107
0
0
0
0
1
1
3
1
POINT (-75.18744029819194 39.94735998737308)
30949
2
0
0
0
0
0
0
1
0
12
...
915
0
0
0
2
0
0
4
1
POINT (-75.18744029819194 39.94735998737308)
31358
3
0
0
0
0
0
0
1
0
6
...
750
1
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
31733
1
0
0
0
0
0
0
1
0
5
...
1706
0
0
0
1
0
0
2
2
POINT (-75.18744029819194 39.94735998737308)
32052
2
0
0
0
0
0
0
1
0
2
...
1210
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
32202
2
0
0
0
0
0
0
1
0
5
...
1012
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
32379
2
0
0
0
0
0
0
1
0
7
...
1210
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
33036
1
0
0
0
0
0
0
1
0
4
...
232
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
34231
2
0
0
0
0
0
0
1
0
7
...
1755
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
34293
3
0
0
0
0
0
0
1
0
7
...
835
0
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
35021
1
0
0
0
0
0
0
7
0
3
...
1853
0
0
0
0
0
0
1
2
POINT (-75.18744029819194 39.94735998737308)
35615
1
0
0
0
0
0
0
1
0
7
...
1545
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
36050
2
0
0
0
0
0
0
1
0
12
...
900
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
36717
2
0
0
0
0
0
0
1
0
12
...
725
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
37720
2
0
0
0
0
0
0
1
0
8
...
1207
0
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
39636
2
0
0
0
0
0
0
1
0
1
...
1702
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
40277
1
0
0
0
0
0
0
1
0
1
...
645
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
40357
1
0
0
0
0
0
0
1
0
2
...
1343
0
0
0
0
0
0
2
1
POINT (-75.18744029819194 39.94735998737308)
41201
1
0
0
0
0
0
0
1
0
7
...
1339
0
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
41403
1
0
0
0
0
0
0
7
0
8
...
148
0
0
0
0
1
0
1
1
POINT (-75.18744029819194 39.94735998737308)
41760
3
0
0
0
0
0
0
1
0
8
...
1816
0
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
43054
3
0
0
0
0
0
0
1
0
4
...
2006
0
0
0
0
0
0
3
1
POINT (-75.18744029819194 39.94735998737308)
90 rows × 54 columns
In [30]:
print(street_gpd.shape)
street_gpd.head().T
(41022, 31)
Out[30]:
0
1
2
3
4
CLASS
3
3
3
5
4
FNODE_
2
2
1
6
5
LENGTH
449.863
540.083
446.104
447.261
148.216
LPOLY_
0
0
0
0
0
L_F_ADD
1500
400
1600
1600
350
L_HUNDRED
1500
400
1600
1600
300
L_T_ADD
1598
498
1698
1698
398
MULTI_REP
0
0
0
0
0
NEWSEGDATE
None
None
None
None
None
ONEWAY
FT
TF
FT
TF
TF
PRE_DIR
None
N
None
None
N
RESPONSIBL
FAM
FAM
FAM
None
None
RPOLY_
0
0
0
0
0
R_F_ADD
1501
401
1601
1601
351
R_HUNDRED
1500
400
1600
1600
300
R_T_ADD
1599
499
1699
1699
399
SEG_ID
420708
422065
420702
420732
420718
SHAPE_LEN
449.863
540.083
446.104
447.261
148.216
STCL2_
1
2
3
4
5
STCL2_ID
85205
86540
85199
85229
85215
STNAME
CALLOWHILL ST
N 15TH ST
CALLOWHILL ST
CARLTON ST
N 17TH ST
STREETLABE
CALLOWHILL ST
N 15TH ST
CALLOWHILL ST
CARLTON ST
N 17TH ST
ST_CODE
20880
88070
20880
21440
88110
ST_NAME
CALLOWHILL
15TH
CALLOWHILL
CARLTON
17TH
ST_TYPE
ST
ST
ST
ST
ST
SUF_DIR
None
None
None
None
None
TNODE_
1
3
4
5
4
UPDATE_
1997-02-10
1997-02-10
1997-02-10
1997-02-10
1997-06-27
ZIP_LEFT
19130
19130
19130
19103
19103
ZIP_RIGHT
19130
19130
19130
19103
19103
geometry
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16529687686355 39.960013687935...
LINESTRING (-75.16537539004391 39.959601782423...
LINESTRING (-75.16694872703492 39.959805718080...
In [31]:
vpi_data_gpd = gpd.read_file('../philly/Vehicle & Pedestrian Investigations.geojson')
print(vpi_data_gpd.shape)
vpi_data_gpd.head().T
(1296689, 29)
Out[31]:
0
1
2
3
4
:@computed_region_bbgf_pidf
10
14
14
5
5
:created_at
2016-09-28T13:00:32
2016-09-28T13:00:32
2016-09-28T13:00:32
2016-09-28T13:00:32
2016-09-28T13:00:32
:id
row-tywx_zg46-cw84
row-z92m.d7r6~4rji
row-4sb2-6s78_xnur
row-aa32.djwa-fx39
row-xwa5_hp4e-frtk
:updated_at
2016-09-28T13:00:32
2016-09-28T13:00:32
2016-09-28T13:00:32
2016-09-28T13:00:32
2016-09-28T13:00:32
:version
rv-y6zc~yrni.ujrq
rv-z6a6-xsyb~mudr
rv-fwuf_8mhe-vjan
rv-kpt5-325y_zr5u
rv-buwm-pueh_vce5
age
51
28
28
62
30
datetimeoccur
2016-09-08T10:25:00
2016-09-09T01:34:00
2016-09-09T00:38:00
2016-09-08T10:10:00
2016-09-08T11:20:00
dcnumber
None
None
None
None
None
districtoccur
14
18
18
06
06
gender
Male
Female
Female
Male
Male
geometry
POINT (-75.166124 40.030666)
POINT (-75.232809 39.960947)
POINT (-75.240836 39.961337)
POINT (-75.156342 39.953572)
POINT (-75.157916 39.953782)
id
1555287
1556358
1556159
1555244
1556582
individual_arrested
0
1
0
0
0
individual_contraband
0
1
0
0
0
individual_frisked
0
0
0
0
0
individual_searched
0
1
0
0
0
inside_or_outside
Outside
Outside
Outside
Outside
Outside
location
0 BLOCK COLLOM ST
0 BLOCK S 56TH ST
0 BLOCK S 60TH ST
100 BLOCK N 10TH ST
100 BLOCK N 11TH ST
objectid
2426243
2426326
2426332
2426344
2426346
point_x
-75.16612389
-75.23280889
-75.24083602
-75.15634199
-75.15791644
point_y
40.03066595
39.96094721
39.96133678
39.9535721
39.95378192
psa
142
182
181
062
062
race
Black - Non-Latino
Black - Non-Latino
Black - Non-Latino
Black - Non-Latino
White - Non-Latino
stopcode
2701
2701
2701
2702
2702
stoptype
pedestrian
pedestrian
pedestrian
vehicle
vehicle
vehicle_contraband
0
0
0
0
0
vehicle_frisked
0
0
0
0
0
vehicle_searched
0
0
0
0
0
weekday
THURSDAY
FRIDAY
FRIDAY
THURSDAY
THURSDAY
In [32]:
vpi_data_gpd.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1296689 entries, 0 to 1296688
Data columns (total 29 columns):
:@computed_region_bbgf_pidf 1198809 non-null object
:created_at 1296689 non-null object
:id 1296689 non-null object
:updated_at 1296689 non-null object
:version 1296689 non-null object
age 1294550 non-null object
datetimeoccur 1296689 non-null object
dcnumber 0 non-null object
districtoccur 1296688 non-null object
gender 1296555 non-null object
geometry 1202146 non-null object
id 1296689 non-null object
individual_arrested 1296689 non-null object
individual_contraband 1296689 non-null object
individual_frisked 1296689 non-null object
individual_searched 1296689 non-null object
inside_or_outside 1296689 non-null object
location 1264246 non-null object
objectid 1296689 non-null object
point_x 1202146 non-null object
point_y 1202146 non-null object
psa 1296686 non-null object
race 1296689 non-null object
stopcode 1296689 non-null object
stoptype 1296689 non-null object
vehicle_contraband 1296689 non-null object
vehicle_frisked 1296689 non-null object
vehicle_searched 1296689 non-null object
weekday 1296689 non-null object
dtypes: object(29)
memory usage: 286.9+ MB
In [2]:
street_gpd = gpd.read_file('../philly/GeoJSON_data/Street_Centerline_qgis3.geojson')
In [23]:
philly311_csv = pd.read_csv('../philly/311_Requests.csv')
print(philly311_csv.shape)
philly311_csv.head().T
(1110623, 17)
Out[23]:
0
1
2
3
4
Service Request ID
10895664
10895669
10894010
10894151
10894702
Requested Date/Time
08/18/2016 06:32:14 PM
08/18/2016 06:35:32 PM
08/18/2016 09:43:32 AM
08/18/2016 10:19:40 AM
08/18/2016 12:36:47 PM
Service Name
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Illegal Dumping
Rubbish/Recyclable Material Collection
Service Code
SR-ST03
SR-ST03
SR-ST03
SR-ST02
SR-ST03
Agency Responsible
Streets Department
Streets Department
Streets Department
Streets Department
Streets Department
Status
Closed
Closed
Closed
Closed
Closed
Service Notice
2 Business Days
2 Business Days
2 Business Days
5 Business Days
2 Business Days
Updated Date/Time
08/22/2016 06:15:23 AM
08/22/2016 06:15:26 AM
08/22/2016 06:15:29 AM
08/22/2016 06:15:32 AM
08/22/2016 06:15:35 AM
Expected Date/Time
08/21/2016 08:00:00 PM
08/21/2016 08:00:00 PM
08/21/2016 08:00:00 PM
08/24/2016 08:00:00 PM
08/21/2016 08:00:00 PM
Address
4500 N LAWRENCE ST
4400 N LAWRENCE ST
435 W CAREY ST
3255 N 11TH ST
6500 W WALNUT PARK DR
Zipcode
19140
19140
NaN
19140
NaN
Media URL
NaN
NaN
NaN
NaN
NaN
Location
POINT (-75.133247 40.019631)
POINT (-75.133589 40.01808)
POINT (-75.135892 40.008402)
POINT (-75.147295 40.002129)
POINT (-75.120826 40.050084)
Latitude
40.0196
40.0181
40.0084
40.0021
40.0501
Longitude
-75.1332
-75.1336
-75.1359
-75.1473
-75.1208
Zipcodes
41
41
41
41
9
Census Tracts 2010 - 2013
65
337
337
196
304
In [3]:
#data = DataFrame.from_csv(os.path.join(directory, filename), index_col=False)
points = [Point(row['Longitude'], row['Latitude']) for key, row in philly311_csv.iterrows()]
geo_df = GeoDataFrame(philly311_csv,geometry=points)
geo_df.to_file('../philly/GeoJSON_data/311_Requests.geojson', driver='GeoJSON')
In [3]:
philly311_gpd = gpd.read_file('../philly/GeoJSON_data/311_Requests.geojson')
In [5]:
print(philly311_gpd.shape)
philly311_gpd.head().T
(1110623, 18)
Out[5]:
0
1
2
3
4
Address
4500 N LAWRENCE ST
4400 N LAWRENCE ST
435 W CAREY ST
3255 N 11TH ST
6500 W WALNUT PARK DR
Agency Responsible
Streets Department
Streets Department
Streets Department
Streets Department
Streets Department
Census Tracts 2010 - 2013
65
337
337
196
304
Expected Date/Time
08/21/2016 08:00:00 PM
08/21/2016 08:00:00 PM
08/21/2016 08:00:00 PM
08/24/2016 08:00:00 PM
08/21/2016 08:00:00 PM
Latitude
40.0196
40.0181
40.0084
40.0021
40.0501
Location
POINT (-75.133247 40.019631)
POINT (-75.133589 40.01808)
POINT (-75.135892 40.008402)
POINT (-75.147295 40.002129)
POINT (-75.120826 40.050084)
Longitude
-75.1332
-75.1336
-75.1359
-75.1473
-75.1208
Media URL
None
None
None
None
None
Requested Date/Time
08/18/2016 06:32:14 PM
08/18/2016 06:35:32 PM
08/18/2016 09:43:32 AM
08/18/2016 10:19:40 AM
08/18/2016 12:36:47 PM
Service Code
SR-ST03
SR-ST03
SR-ST03
SR-ST02
SR-ST03
Service Name
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Illegal Dumping
Rubbish/Recyclable Material Collection
Service Notice
2 Business Days
2 Business Days
2 Business Days
5 Business Days
2 Business Days
Service Request ID
10895664
10895669
10894010
10894151
10894702
Status
Closed
Closed
Closed
Closed
Closed
Updated Date/Time
08/22/2016 06:15:23 AM
08/22/2016 06:15:26 AM
08/22/2016 06:15:29 AM
08/22/2016 06:15:32 AM
08/22/2016 06:15:35 AM
Zipcode
19140
19140
None
19140
None
Zipcodes
41
41
41
41
9
geometry
POINT (-75.13324674099999 40.019630884)
POINT (-75.13358943099999 40.018080355)
POINT (-75.135892102 40.008401585)
POINT (-75.14729510799999 40.002128888)
POINT (-75.12082581 40.050084238)
In [54]:
len(philly311_gpd['Expected Date/Time'])
Out[54]:
1110623
In [53]:
philly311_gpd[philly311_gpd['Expected Date/Time'] == NaN]
Out[53]:
Address
Agency Responsible
Census Tracts 2010 - 2013
Expected Date/Time
Latitude
Location
Longitude
Media URL
Requested Date/Time
Service Code
Service Name
Service Notice
Service Request ID
Status
Updated Date/Time
Zipcode
Zipcodes
geometry
In [57]:
philly311_gpd[philly311_gpd['Expected Date/Time'].str.contains('2016', na=False)]
Out[57]:
Address
Agency Responsible
Census Tracts 2010 - 2013
Expected Date/Time
Latitude
Location
Longitude
Media URL
Requested Date/Time
Service Code
Service Name
Service Notice
Service Request ID
Status
Updated Date/Time
Zipcode
Zipcodes
geometry
0
4500 N LAWRENCE ST
Streets Department
65.0
08/21/2016 08:00:00 PM
40.019631
POINT (-75.133247 40.019631)
-75.133247
None
08/18/2016 06:32:14 PM
SR-ST03
Rubbish/Recyclable Material Collection
2 Business Days
10895664
Closed
08/22/2016 06:15:23 AM
19140
41.0
POINT (-75.13324674099999 40.019630884)
1
4400 N LAWRENCE ST
Streets Department
337.0
08/21/2016 08:00:00 PM
40.018080
POINT (-75.133589 40.01808)
-75.133589
None
08/18/2016 06:35:32 PM
SR-ST03
Rubbish/Recyclable Material Collection
2 Business Days
10895669
Closed
08/22/2016 06:15:26 AM
19140
41.0
POINT (-75.13358943099999 40.018080355)
2
435 W CAREY ST
Streets Department
337.0
08/21/2016 08:00:00 PM
40.008402
POINT (-75.135892 40.008402)
-75.135892
None
08/18/2016 09:43:32 AM
SR-ST03
Rubbish/Recyclable Material Collection
2 Business Days
10894010
Closed
08/22/2016 06:15:29 AM
None
41.0
POINT (-75.135892102 40.008401585)
3
3255 N 11TH ST
Streets Department
196.0
08/24/2016 08:00:00 PM
40.002129
POINT (-75.147295 40.002129)
-75.147295
None
08/18/2016 10:19:40 AM
SR-ST02
Illegal Dumping
5 Business Days
10894151
Closed
08/22/2016 06:15:32 AM
19140
41.0
POINT (-75.14729510799999 40.002128888)
4
6500 W WALNUT PARK DR
Streets Department
304.0
08/21/2016 08:00:00 PM
40.050084
POINT (-75.120826 40.050084)
-75.120826
None
08/18/2016 12:36:47 PM
SR-ST03
Rubbish/Recyclable Material Collection
2 Business Days
10894702
Closed
08/22/2016 06:15:35 AM
None
9.0
POINT (-75.12082581 40.050084238)
5
215 W ASHDALE ST
Streets Department
64.0
08/21/2016 08:00:00 PM
40.027507
POINT (-75.128343 40.027507)
-75.128343
None
08/18/2016 03:22:16 PM
SR-ST03
Rubbish/Recyclable Material Collection
2 Business Days
10895291
Closed
08/22/2016 06:15:41 AM
None
9.0
POINT (-75.12834329200001 40.027507442)
6
3119 N BROAD ST
Streets Department
197.0
08/21/2016 08:00:00 PM
40.000762
POINT (-75.1531 40.000762)
-75.153100
https://d17aqltn7cihbm.cloudfront.net/uploads/...
08/18/2016 03:35:41 PM
SR-ST03
Rubbish/Recyclable Material Collection
2 Business Days
10895342
Closed
08/22/2016 06:15:44 AM
None
33.0
POINT (-75.15310022199999 40.000761795)
7
S 22ND ST & MOORE ST
Streets Department
151.0
09/11/2016 08:00:00 PM
39.929731
POINT (-75.181946 39.929731)
-75.181946
None
09/07/2016 12:00:19 PM
SR-ST01
Street Defect
3 Business Days
10925505
Closed
10/22/2016 07:31:06 AM
None
46.0
POINT (-75.18194554300001 39.929730972)
8
158 DIAMOND ST
Streets Department
24.0
08/18/2016 08:00:00 PM
39.981899
POINT (-75.134993 39.981899)
-75.134993
None
08/17/2016 02:39:55 PM
SR-ST03
Rubbish/Recyclable Material Collection
2 Business Days
10893003
Closed
08/22/2016 06:30:26 AM
19122
11.0
POINT (-75.13499281599999 39.9818993)
9
158 DIAMOND ST
Streets Department
24.0
08/18/2016 08:00:00 PM
39.981899
POINT (-75.134993 39.981899)
-75.134993
None
08/17/2016 02:41:10 PM
SR-ST03
Rubbish/Recyclable Material Collection
2 Business Days
10893006
Closed
08/22/2016 06:30:29 AM
19122
11.0
POINT (-75.13499281599999 39.9818993)
10
1001 POPLAR ST
Streets Department
179.0
08/24/2016 08:00:00 PM
39.968628
POINT (-75.153274 39.968628)
-75.153274
None
08/19/2016 01:25:33 AM
SR-ST02
Illegal Dumping
5 Business Days
10895773
Closed
08/22/2016 06:30:32 AM
None
12.0
POINT (-75.15327389300001 39.968628368)
11
N 7TH ST & MARKET ST
Streets Department
148.0
08/21/2016 08:00:00 PM
39.950938
POINT (-75.152002 39.950938)
-75.152002
None
08/19/2016 09:09:39 AM
SR-ST25
Traffic Signal Emergency
1 Business Days
10895980
Closed
08/22/2016 06:30:34 AM
None
21.0
POINT (-75.152001849 39.950937692)
12
2410 N 6TH ST
Streets Department
30.0
08/24/2016 08:00:00 PM
39.988103
POINT (-75.143105 39.988103)
-75.143105
None
08/18/2016 10:09:52 AM
SR-ST02
Illegal Dumping
5 Business Days
10894112
Closed
08/22/2016 06:30:36 AM
19133
34.0
POINT (-75.143104624 39.988102857)
13
2236 FRANKFORD AVE
Streets Department
27.0
08/21/2016 08:00:00 PM
39.980433
POINT (-75.128643 39.980433)
-75.128643
None
08/18/2016 10:09:58 AM
SR-ST03
Rubbish/Recyclable Material Collection
2 Business Days
10894113
Closed
08/22/2016 06:30:39 AM
None
14.0
POINT (-75.12864297899999 39.980433165)
14
802 W INDIANA AVE
Streets Department
366.0
08/24/2016 08:00:00 PM
39.997314
POINT (-75.144286 39.997314)
-75.144286
None
08/18/2016 10:49:48 AM
SR-ST02
Illegal Dumping
5 Business Days
10894272
Closed
08/22/2016 06:30:41 AM
19133
34.0
POINT (-75.14428571399999 39.99731393)
15
GERMANTOWN AVE & E TULPEHOCKEN ST
Streets Department
381.0
08/18/2016 08:00:00 PM
40.042095
POINT (-75.179982 40.042095)
-75.179982
None
08/18/2016 10:36:40 AM
SR-ST25
Traffic Signal Emergency
1 Business Days
10894221
Closed
08/22/2016 06:30:43 AM
None
45.0
POINT (-75.17998167099999 40.042094772)
16
E ROOSEVELT BLVD & RYAN AVE
Streets Department
73.0
08/21/2016 08:00:00 PM
40.053993
POINT (-75.048882 40.053993)
-75.048882
None
08/19/2016 12:39:50 PM
SR-ST25
Traffic Signal Emergency
1 Business Days
10896703
Closed
08/22/2016 06:30:46 AM
None
27.0
POINT (-75.04888205100001 40.053992761)
17
2801 ORMES ST
Streets Department
127.0
08/24/2016 08:00:00 PM
39.992159
POINT (-75.125898 39.992159)
-75.125898
https://d17aqltn7cihbm.cloudfront.net/uploads/...
08/18/2016 11:59:42 AM
SR-ST02
Illegal Dumping
5 Business Days
10894554
Closed
08/22/2016 06:30:48 AM
None
35.0
POINT (-75.12589782800001 39.992159329)
18
2556 E LEHIGH AVE
Streets Department
339.0
08/21/2016 08:00:00 PM
39.979571
POINT (-75.115665 39.979571)
-75.115665
None
08/18/2016 12:23:16 PM
SR-ST03
Rubbish/Recyclable Material Collection
2 Business Days
10894654
Closed
08/22/2016 06:30:50 AM
19125
14.0
POINT (-75.11566479699999 39.979570733)
19
185 W ATLANTIC ST
Streets Department
116.0
08/24/2016 08:00:00 PM
40.004266
POINT (-75.13116 40.004266)
-75.131160
None
08/18/2016 01:15:55 PM
SR-ST02
Illegal Dumping
5 Business Days
10894823
Closed
08/22/2016 06:30:53 AM
None
41.0
POINT (-75.13115956499998 40.00426592199999)
20
358 W INDIANA AVE
Streets Department
166.0
08/24/2016 08:00:00 PM
39.996374
POINT (-75.137101 39.996374)
-75.137101
https://d17aqltn7cihbm.cloudfront.net/uploads/...
08/18/2016 01:46:09 PM
SR-ST02
Illegal Dumping
5 Business Days
10894952
Closed
08/22/2016 06:30:55 AM
None
34.0
POINT (-75.13710128599999 39.996374367)
21
N 12TH ST & CALLOWHILL ST
Streets Department
294.0
08/24/2016 08:00:00 PM
39.959114
POINT (-75.158311 39.959114)
-75.158311
None
08/18/2016 03:58:16 PM
SR-ST02
Illegal Dumping
5 Business Days
10895425
Closed
08/22/2016 06:30:58 AM
None
12.0
POINT (-75.15831109400001 39.959113891)
22
2642 BRADDOCK ST
Streets Department
27.0
08/24/2016 08:00:00 PM
39.986803
POINT (-75.124189 39.986803)
-75.124189
https://d17aqltn7cihbm.cloudfront.net/uploads/...
08/18/2016 05:37:01 PM
SR-ST02
Illegal Dumping
5 Business Days
10895601
Closed
08/22/2016 06:31:00 AM
None
14.0
POINT (-75.12418933900001 39.986802981)
23
2939 N 8TH ST
Streets Department
366.0
08/24/2016 08:00:00 PM
39.996795
POINT (-75.144216 39.996795)
-75.144216
None
08/18/2016 06:17:04 PM
SR-ST02
Illegal Dumping
5 Business Days
10895651
Closed
08/22/2016 06:31:03 AM
19133
34.0
POINT (-75.144216247 39.996795328)
24
2172 E NORRIS ST
Streets Department
24.0
06/20/2016 08:00:00 PM
39.978950
POINT (-75.131078 39.97895)
-75.131078
https://d17aqltn7cihbm.cloudfront.net/uploads/...
06/14/2016 09:13:38 AM
SR-ST02
Illegal Dumping
5 Business Days
10783983
Closed
08/22/2016 06:45:27 AM
None
14.0
POINT (-75.13107823199999 39.978950016)
25
210 E LIPPINCOTT ST
Streets Department
96.0
06/22/2016 08:00:00 PM
39.997179
POINT (-75.126095 39.997179)
-75.126095
https://d17aqltn7cihbm.cloudfront.net/uploads/...
06/16/2016 02:37:14 PM
SR-ST02
Illegal Dumping
5 Business Days
10789990
Closed
08/22/2016 06:45:30 AM
None
35.0
POINT (-75.12609503100001 39.997179121)
26
4701 WOODLAND AVE
Streets Department
347.0
06/19/2016 08:00:00 PM
39.942999
POINT (-75.211093 39.942999)
-75.211093
None
06/20/2016 04:29:59 AM
SR-ST25
Traffic Signal Emergency
None
10792903
Closed
08/22/2016 06:45:32 AM
None
44.0
POINT (-75.21109311500003 39.942999207)
27
2343 MOORE ST
Streets Department
151.0
11/29/2016 07:00:00 PM
39.930093
POINT (-75.184797 39.930093)
-75.184797
https://d17aqltn7cihbm.cloudfront.net/uploads/...
09/19/2016 03:02:11 PM
SR-ST01
Street Defect
46 Business Days
10946522
Closed
10/22/2016 07:31:07 AM
None
46.0
POINT (-75.18479705 39.930093432)
28
2336 MOORE ST
Streets Department
154.0
11/30/2016 07:00:00 PM
39.930072
POINT (-75.184722 39.930072)
-75.184722
None
09/21/2016 09:40:46 AM
SR-ST01
Street Defect
46 Business Days
10950045
Closed
10/22/2016 07:31:09 AM
None
46.0
POINT (-75.18472173500002 39.930072148)
29
2637 EARP ST
Streets Department
151.0
12/04/2016 07:00:00 PM
39.936029
POINT (-75.188489 39.936029)
-75.188489
None
09/22/2016 12:34:39 PM
SR-ST01
Street Defect
46 Business Days
10953184
Closed
10/22/2016 07:31:11 AM
None
47.0
POINT (-75.188488679 39.936029205)
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1110589
6919 LINMORE AVE
License & Inspections
8.0
09/13/2016 08:00:00 PM
39.919788
POINT (-75.238772 39.919788)
-75.238772
None
08/17/2016 09:39:03 AM
SR-LI27
Vacant House or Commercial
20 Business Days
10891824
Closed
08/20/2016 08:47:13 PM
None
43.0
POINT (-75.23877243299999 39.919788369)
1110590
6919 LINMORE AVE
License & Inspections
8.0
09/11/2016 08:00:00 PM
39.919788
POINT (-75.238772 39.919788)
-75.238772
None
08/15/2016 09:24:23 AM
SR-LI27
Vacant House or Commercial
20 Business Days
10887126
Closed
08/20/2016 08:47:14 PM
None
43.0
POINT (-75.23877243299999 39.919788369)
1110591
5861 HAZEL AVE
License & Inspections
95.0
09/04/2016 08:00:00 PM
39.953197
POINT (-75.24043 39.953197)
-75.240430
None
08/08/2016 01:14:28 PM
SR-LI30
Zoning Residential
20 Business Days
10876974
Closed
08/20/2016 08:47:18 PM
None
44.0
POINT (-75.240430327 39.953196572)
1110592
1537 S VOGDES ST
Community Life Improvement Program
285.0
12/08/2016 07:00:00 PM
39.937538
POINT (-75.223854 39.937538)
-75.223854
None
08/05/2016 04:31:02 PM
SR-CL02
Vacant Lot Clean-Up
90 Business Days
10874798
Closed
08/20/2016 08:47:20 PM
None
44.0
POINT (-75.223853631 39.937537645)
1110593
1328 S CHADWICK ST
License & Inspections
257.0
08/30/2016 08:00:00 PM
39.933884
POINT (-75.172028 39.933884)
-75.172028
None
08/17/2016 01:12:05 PM
SR-LI06
Construction Site Task Force
10 Business Days
10892683
Closed
08/20/2016 08:47:22 PM
19146
47.0
POINT (-75.17202777999999 39.933884327)
1110594
6125 CARPENTER ST
License & Inspections
165.0
02/18/2016 07:00:00 PM
39.949032
POINT (-75.246534 39.949032)
-75.246534
None
01/22/2016 12:27:08 PM
SR-LI16
Fire Residential or Commercial
20 Business Days
10522275
Open
08/20/2016 08:47:23 PM
None
44.0
POINT (-75.24653444499999 39.949031618)
1110595
1430 S 56TH ST
Philly311 Contact Center
273.0
07/27/2016 08:00:00 PM
39.940268
POINT (-75.228646 39.940268)
-75.228646
None
06/30/2016 01:32:43 PM
SR-IR01
Information Request
None
10813962
Open
08/20/2016 08:47:24 PM
None
44.0
POINT (-75.228645574 39.940268127)
1110596
6300 CITY AVE
License & Inspections
279.0
08/31/2016 08:00:00 PM
39.989675
POINT (-75.251068 39.989675)
-75.251068
None
08/18/2016 02:37:13 PM
SR-LI06
Construction Site Task Force
10 Business Days
10895131
Closed
08/20/2016 08:47:51 PM
None
26.0
POINT (-75.25106848 39.989674543)
1110597
1432 S 56TH ST
License & Inspections
273.0
07/27/2016 08:00:00 PM
39.940228
POINT (-75.228599 39.940228)
-75.228599
None
06/30/2016 01:31:20 PM
SR-LI27
Vacant House or Commercial
20 Business Days
10813956
Closed
08/20/2016 08:47:58 PM
None
44.0
POINT (-75.228598658 39.940227542)
1110598
2145 S 58TH ST
Community Life Improvement Program
344.0
11/30/2016 07:00:00 PM
39.932697
POINT (-75.224917 39.932697)
-75.224917
https://d17aqltn7cihbm.cloudfront.net/uploads/...
07/28/2016 11:05:14 AM
SR-CL02
Vacant Lot Clean-Up
90 Business Days
10859598
Open
08/20/2016 08:48:01 PM
None
44.0
POINT (-75.224916713 39.932696859)
1110599
1504 S 17TH ST
License & Inspections
257.0
08/31/2016 08:00:00 PM
39.932008
POINT (-75.173 39.932008)
-75.173000
None
08/18/2016 12:01:18 PM
SR-LI02
Building Construction
10 Business Days
10894561
Closed
08/20/2016 08:48:27 PM
19146
47.0
POINT (-75.17300036100001 39.932008048)
1110600
2755 N 47TH ST
Streets Department
355.0
06/22/2016 08:00:00 PM
40.001945
POINT (-75.223458 40.001945)
-75.223458
None
06/20/2016 09:52:00 AM
SR-ST10
Dead Animal in Street
3 Business Days
10793368
Closed
08/20/2016 08:48:39 PM
None
32.0
POINT (-75.223457733 40.001945296)
1110601
1429 S 53RD ST
License & Inspections
347.0
09/01/2016 08:00:00 PM
39.940076
POINT (-75.22086 39.940076)
-75.220860
None
08/05/2016 09:07:51 AM
SR-LI27
Vacant House or Commercial
20 Business Days
10873182
Closed
08/20/2016 08:48:45 PM
19143
44.0
POINT (-75.220860007 39.940076284)
1110602
1415 S CHADWICK ST
License & Inspections
257.0
08/30/2016 08:00:00 PM
39.932943
POINT (-75.172203 39.932943)
-75.172203
None
08/17/2016 01:13:19 PM
SR-LI06
Construction Site Task Force
10 Business Days
10892687
Closed
08/20/2016 08:48:47 PM
19146
47.0
POINT (-75.172203258 39.93294334)
1110603
5707 CHESTER AVE
Community Life Improvement Program
285.0
12/05/2016 07:00:00 PM
39.936517
POINT (-75.227432 39.936517)
-75.227432
None
08/02/2016 10:09:38 AM
SR-CL02
Vacant Lot Clean-Up
90 Business Days
10866858
Closed
08/20/2016 08:48:48 PM
None
44.0
POINT (-75.22743167100001 39.936517469)
1110604
5443 HADFIELD ST
License & Inspections
345.0
04/06/2016 08:00:00 PM
39.943872
POINT (-75.229648 39.943872)
-75.229648
None
03/10/2016 07:48:55 PM
SR-LI27
Vacant House or Commercial
20 Business Days
10624380
Open
08/20/2016 08:48:49 PM
None
44.0
POINT (-75.229647884 39.943872024)
1110605
5215 W BERKS ST
License & Inspections
353.0
07/25/2016 08:00:00 PM
39.986730
POINT (-75.22779 39.98673)
-75.227790
None
06/28/2016 12:36:57 PM
SR-LI27
Vacant House or Commercial
20 Business Days
10808787
Closed
08/20/2016 08:48:56 PM
None
32.0
POINT (-75.22779014299999 39.986730004)
1110606
1745 N BAMBREY ST
License & Inspections
187.0
09/01/2016 08:00:00 PM
39.982231
POINT (-75.176454 39.982231)
-75.176454
None
08/19/2016 12:04:09 PM
SR-LI06
Construction Site Task Force
10 Business Days
10896576
Closed
08/20/2016 08:49:01 PM
None
10.0
POINT (-75.176453814 39.98223131)
1110607
2814 CAMBRIDGE ST
Parks & Recreation
361.0
09/08/2016 08:00:00 PM
39.973699
POINT (-75.182467 39.973699)
-75.182467
None
08/20/2016 09:49:48 PM
SR-PR08
Street Trees
15 Business Days
10897823
Open
08/20/2016 09:49:56 PM
None
31.0
POINT (-75.182467199 39.973699354)
1110608
N 8TH ST & BROWN ST
Streets Department
357.0
06/26/2016 08:00:00 PM
39.965519
POINT (-75.151054 39.965519)
-75.151054
None
06/21/2016 08:32:17 AM
SR-ST02
Illegal Dumping
5 Business Days
10795362
Closed
08/21/2016 06:30:21 AM
19123
12.0
POINT (-75.15105387600001 39.965518579)
1110609
2000 BRIDGE ST
Philly311 Contact Center
230.0
08/22/2016 04:00:10 AM
40.015825
POINT (-75.071432 40.015825)
-75.071432
None
08/21/2016 07:30:10 AM
SR-MI01
Miscellaneous
None
10897847
Open
08/21/2016 07:30:18 AM
None
13.0
POINT (-75.07143223 40.015824907)
1110610
4951 UNRUH AVE
Community Life Improvement Program
318.0
08/29/2016 08:00:00 PM
40.020947
POINT (-75.043656 40.020947)
-75.043656
https://d17aqltn7cihbm.cloudfront.net/uploads/...
08/21/2016 07:47:22 AM
SR-CL01
Graffiti Removal
None
10897850
Closed
08/21/2016 07:47:49 AM
None
36.0
POINT (-75.04365569300001 40.020946558)
1110611
1198 S 4TH ST
Parks & Recreation
215.0
08/20/2016 08:00:00 PM
39.933411
POINT (-75.151019 39.933411)
-75.151019
None
08/21/2016 11:17:13 AM
SR-PR01
Parks and Rec Safety and Maintenance
0 0
10897895
Open
08/21/2016 11:17:18 AM
None
48.0
POINT (-75.15101909500002 39.933411423)
1110612
2834 S 8TH ST
Streets Department
336.0
07/03/2016 08:00:00 PM
39.913396
POINT (-75.162031 39.913396)
-75.162031
None
06/20/2016 06:23:38 PM
SR-ST04
Street Light Outage
10 Business Days
10795126
Closed
08/21/2016 12:00:30 PM
None
22.0
POINT (-75.162030652 39.913395637)
1110613
2814 CAMBRIDGE ST
Parks & Recreation
361.0
09/08/2016 08:00:00 PM
39.973699
POINT (-75.182467 39.973699)
-75.182467
None
08/21/2016 01:02:10 PM
SR-PR08
Street Trees
15 Business Days
10897951
Open
08/21/2016 01:02:16 PM
None
31.0
POINT (-75.182467199 39.973699354)
1110614
4507 SPRUCE ST
Parks & Recreation
291.0
09/08/2016 08:00:00 PM
39.952934
POINT (-75.2126 39.952934)
-75.212600
None
08/21/2016 02:28:11 PM
SR-PR08
Street Trees
15 Business Days
10897976
Open
08/21/2016 02:28:18 PM
None
40.0
POINT (-75.212599651 39.952934083)
1110615
1045 LUKENS ST
Parks & Recreation
292.0
09/08/2016 08:00:00 PM
40.128054
POINT (-75.000268 40.128054)
-75.000268
None
08/21/2016 07:57:20 PM
SR-PR08
Street Trees
15 Business Days
10898038
Open
08/21/2016 07:57:25 PM
None
7.0
POINT (-75.000267529 40.128053663)
1110616
2814 CAMBRIDGE ST
Parks & Recreation
361.0
09/08/2016 08:00:00 PM
39.973699
POINT (-75.182467 39.973699)
-75.182467
None
08/21/2016 08:04:56 PM
SR-PR08
Street Trees
15 Business Days
10898043
Open
08/21/2016 08:05:03 PM
None
31.0
POINT (-75.182467199 39.973699354)
1110617
2017 PIERCE ST
License & Inspections
217.0
02/24/2016 07:00:00 PM
39.929778
POINT (-75.178886 39.929778)
-75.178886
None
02/04/2016 09:05:52 AM
SR-LI03
Building Dangerous
15 Business Days
10564918
Open
08/21/2016 08:45:46 PM
19145
46.0
POINT (-75.17888564899999 39.929777858)
1110622
2100 RIDGE AVE
Philly311 Contact Center
177.0
08/22/2016 04:00:11 AM
39.977982
POINT (-75.1698 39.977982)
-75.169800
None
08/21/2016 11:57:01 PM
SR-MI01
Miscellaneous
None
10898075
Open
08/21/2016 11:57:28 PM
None
10.0
POINT (-75.16979963499998 39.977982414)
277129 rows × 18 columns
In [59]:
print(philly311_gpd[philly311_gpd['Expected Date/Time'].str.contains('2013', na=False)].shape[0])
print(philly311_gpd[philly311_gpd['Expected Date/Time'].str.contains('2014', na=False)].shape[0])
print(philly311_gpd[philly311_gpd['Expected Date/Time'].str.contains('2015', na=False)].shape[0])
print(philly311_gpd[philly311_gpd['Expected Date/Time'].str.contains('2016', na=False)].shape[0])
0
2720
137971
277129
In [60]:
print(philly311_gpd[philly311_gpd['Requested Date/Time'].str.contains('2013', na=False)].shape[0])
print(philly311_gpd[philly311_gpd['Requested Date/Time'].str.contains('2014', na=False)].shape[0])
print(philly311_gpd[philly311_gpd['Requested Date/Time'].str.contains('2015', na=False)].shape[0])
print(philly311_gpd[philly311_gpd['Requested Date/Time'].str.contains('2016', na=False)].shape[0])
0
25903
569889
514831
In [4]:
philly311_gpd = philly311_gpd.drop('Media URL', 1)
philly311_gpd['Zipcode'] = philly311_gpd['Zipcode'].fillna(0)
philly311_gpd['Service Notice'] = philly311_gpd['Service Notice'].fillna('-')
In [17]:
print(sum(isnan(philly311_gpd['Latitude'])==0))
philly311_gpd.info()
329488
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1110623 entries, 0 to 1110622
Data columns (total 17 columns):
Address 329983 non-null object
Agency Responsible 1095993 non-null object
Census Tracts 2010 - 2013 329282 non-null float64
Expected Date/Time 419829 non-null object
Latitude 329488 non-null float64
Location 329488 non-null object
Longitude 329488 non-null float64
Requested Date/Time 1110623 non-null object
Service Code 1081747 non-null object
Service Name 1110623 non-null object
Service Notice 1110623 non-null object
Service Request ID 1110623 non-null int64
Status 1110623 non-null object
Updated Date/Time 1110623 non-null object
Zipcode 1110623 non-null object
Zipcodes 329279 non-null float64
geometry 329488 non-null object
dtypes: float64(4), int64(1), object(12)
memory usage: 144.0+ MB
In [18]:
philly311_gpd[isnan(philly311_gpd['Latitude'])==0].shape
Out[18]:
(329488, 17)
In [19]:
philly311_gpd[isnan(philly311_gpd['Latitude'])==0].head().T
Out[19]:
0
1
2
3
4
Address
4500 N LAWRENCE ST
4400 N LAWRENCE ST
435 W CAREY ST
3255 N 11TH ST
6500 W WALNUT PARK DR
Agency Responsible
Streets Department
Streets Department
Streets Department
Streets Department
Streets Department
Census Tracts 2010 - 2013
65
337
337
196
304
Expected Date/Time
08/21/2016 08:00:00 PM
08/21/2016 08:00:00 PM
08/21/2016 08:00:00 PM
08/24/2016 08:00:00 PM
08/21/2016 08:00:00 PM
Latitude
40.0196
40.0181
40.0084
40.0021
40.0501
Location
POINT (-75.133247 40.019631)
POINT (-75.133589 40.01808)
POINT (-75.135892 40.008402)
POINT (-75.147295 40.002129)
POINT (-75.120826 40.050084)
Longitude
-75.1332
-75.1336
-75.1359
-75.1473
-75.1208
Requested Date/Time
08/18/2016 06:32:14 PM
08/18/2016 06:35:32 PM
08/18/2016 09:43:32 AM
08/18/2016 10:19:40 AM
08/18/2016 12:36:47 PM
Service Code
SR-ST03
SR-ST03
SR-ST03
SR-ST02
SR-ST03
Service Name
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Illegal Dumping
Rubbish/Recyclable Material Collection
Service Notice
2 Business Days
2 Business Days
2 Business Days
5 Business Days
2 Business Days
Service Request ID
10895664
10895669
10894010
10894151
10894702
Status
Closed
Closed
Closed
Closed
Closed
Updated Date/Time
08/22/2016 06:15:23 AM
08/22/2016 06:15:26 AM
08/22/2016 06:15:29 AM
08/22/2016 06:15:32 AM
08/22/2016 06:15:35 AM
Zipcode
19140
19140
0
19140
0
Zipcodes
41
41
41
41
9
geometry
POINT (-75.13324674099999 40.019630884)
POINT (-75.13358943099999 40.018080355)
POINT (-75.135892102 40.008401585)
POINT (-75.14729510799999 40.002128888)
POINT (-75.12082581 40.050084238)
In [62]:
philly311 = philly311_gpd[isnan(philly311_gpd['Latitude'])==0]
In [63]:
print(philly311[philly311['Expected Date/Time'].str.contains('2013', na=False)].shape[0])
print(philly311[philly311['Expected Date/Time'].str.contains('2014', na=False)].shape[0])
print(philly311[philly311['Expected Date/Time'].str.contains('2015', na=False)].shape[0])
print(philly311[philly311['Expected Date/Time'].str.contains('2016', na=False)].shape[0])
0
2715
137705
155087
In [64]:
print(philly311[philly311['Requested Date/Time'].str.contains('2013', na=False)].shape[0])
print(philly311[philly311['Requested Date/Time'].str.contains('2014', na=False)].shape[0])
print(philly311[philly311['Requested Date/Time'].str.contains('2015', na=False)].shape[0])
print(philly311[philly311['Requested Date/Time'].str.contains('2016', na=False)].shape[0])
0
6103
154287
169098
In [16]:
philly_seg_buffer_gp = street_gpd[['SEG_ID','STCL2_ID','geometry']].copy()
philly_seg_buffer_gp.geometry = philly_seg_buffer_gp.buffer(0.0001)
philly_311_buffer_gpd = philly311[['Agency Responsible','Service Request ID', 'geometry','Service Name']].copy()
philly_311_buffer_gpd.geometry = philly_311_buffer_gpd.buffer(0.0001)
joined = sjoin(philly_311_buffer_gpd, philly_seg_buffer_gp, how='left', op='intersects')
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-16-7f45ab8e82f0> in <module>()
1 philly_seg_buffer_gp = street_gpd[['SEG_ID','STCL2_ID','geometry', 'SHAPE_LEN']].copy()
2 philly_seg_buffer_gp.geometry = philly_seg_buffer_gp.buffer(0.0001)
----> 3 philly_311_buffer_gpd = philly311[['Agency Responsible','Service Request ID', 'geometry','Service Name']].copy()
4 philly_311_buffer_gpd.geometry = philly_311_buffer_gpd.buffer(0.0001)
5 joined = sjoin(philly_311_buffer_gpd, philly_seg_buffer_gp, how='left', op='intersects')
NameError: name 'philly311' is not defined
In [25]:
join_gb = joined.reset_index().groupby('Service Request ID').count()
print(join_gb[join_gb.SEG_ID==0].shape[0], ' points spatially match no segment')
print(join_gb[join_gb.SEG_ID==1].shape[0], ' points spatially match 1 segment')
print(join_gb[join_gb.SEG_ID>1].shape[0], ' points spatially match multiple segments')
420 points spatially match no segment
202092 points spatially match 1 segment
126976 points spatially match multiple segments
In [26]:
joined.head().T
Out[26]:
0
0
0
0
1
Agency Responsible
Streets Department
Streets Department
Streets Department
Streets Department
Streets Department
Service Request ID
10895664
10895664
10895664
10895664
10895669
geometry
POLYGON ((-75.13314674099999 40.019630884, -75...
POLYGON ((-75.13314674099999 40.019630884, -75...
POLYGON ((-75.13314674099999 40.019630884, -75...
POLYGON ((-75.13314674099999 40.019630884, -75...
POLYGON ((-75.13348943099999 40.018080355, -75...
Service Name
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
index_right
3113
3119
3117
3125
8721
SEG_ID
640566
640606
640553
640552
640725
STCL2_ID
4186
4226
4173
4172
4345
In [4]:
philly311_gpd = philly311_gpd.drop('Media URL', 1)
philly311_gpd['Zipcode'] = philly311_gpd['Zipcode'].fillna(0)
philly311_gpd['Service Notice'] = philly311_gpd['Service Notice'].fillna('-')
philly_311_filtered = philly311_gpd[philly311_gpd['Requested Date/Time'].str.contains('2015|2016', na=False)]
In [5]:
philly_311_filtered = philly311_gpd[philly311_gpd['Requested Date/Time'].str.contains('2015|2016', na=False)]
In [32]:
philly_311_filtered[pd.isnull(philly_311_filtered['Latitude'])==0].to_csv('../philly/311_Requests_2015-16.csv')
In [21]:
print(philly_311_filtered.shape)
print(philly_311_filtered[isnan(philly_311_filtered['Latitude'])==0].shape)
(1084720, 17)
(323385, 17)
In [5]:
philly311_filtered = philly_311_filtered[isnan(philly_311_filtered['Latitude'])==0]
philly311_filtered.head().T
Out[5]:
0
1
2
3
4
Address
4500 N LAWRENCE ST
4400 N LAWRENCE ST
435 W CAREY ST
3255 N 11TH ST
6500 W WALNUT PARK DR
Agency Responsible
Streets Department
Streets Department
Streets Department
Streets Department
Streets Department
Census Tracts 2010 - 2013
65
337
337
196
304
Expected Date/Time
08/21/2016 08:00:00 PM
08/21/2016 08:00:00 PM
08/21/2016 08:00:00 PM
08/24/2016 08:00:00 PM
08/21/2016 08:00:00 PM
Latitude
40.0196
40.0181
40.0084
40.0021
40.0501
Location
POINT (-75.133247 40.019631)
POINT (-75.133589 40.01808)
POINT (-75.135892 40.008402)
POINT (-75.147295 40.002129)
POINT (-75.120826 40.050084)
Longitude
-75.1332
-75.1336
-75.1359
-75.1473
-75.1208
Requested Date/Time
08/18/2016 06:32:14 PM
08/18/2016 06:35:32 PM
08/18/2016 09:43:32 AM
08/18/2016 10:19:40 AM
08/18/2016 12:36:47 PM
Service Code
SR-ST03
SR-ST03
SR-ST03
SR-ST02
SR-ST03
Service Name
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Illegal Dumping
Rubbish/Recyclable Material Collection
Service Notice
2 Business Days
2 Business Days
2 Business Days
5 Business Days
2 Business Days
Service Request ID
10895664
10895669
10894010
10894151
10894702
Status
Closed
Closed
Closed
Closed
Closed
Updated Date/Time
08/22/2016 06:15:23 AM
08/22/2016 06:15:26 AM
08/22/2016 06:15:29 AM
08/22/2016 06:15:32 AM
08/22/2016 06:15:35 AM
Zipcode
19140
19140
0
19140
0
Zipcodes
41
41
41
41
9
geometry
POINT (-75.13324674099999 40.019630884)
POINT (-75.13358943099999 40.018080355)
POINT (-75.135892102 40.008401585)
POINT (-75.14729510799999 40.002128888)
POINT (-75.12082581 40.050084238)
In [23]:
philly311_filtered.shape
Out[23]:
(323385, 17)
In [10]:
philly_311_filtered.head().T
Out[10]:
0
1
2
3
4
Address
4500 N LAWRENCE ST
4400 N LAWRENCE ST
435 W CAREY ST
3255 N 11TH ST
6500 W WALNUT PARK DR
Agency Responsible
Streets Department
Streets Department
Streets Department
Streets Department
Streets Department
Census Tracts 2010 - 2013
65
337
337
196
304
Expected Date/Time
08/21/2016 08:00:00 PM
08/21/2016 08:00:00 PM
08/21/2016 08:00:00 PM
08/24/2016 08:00:00 PM
08/21/2016 08:00:00 PM
Latitude
40.0196
40.0181
40.0084
40.0021
40.0501
Location
POINT (-75.133247 40.019631)
POINT (-75.133589 40.01808)
POINT (-75.135892 40.008402)
POINT (-75.147295 40.002129)
POINT (-75.120826 40.050084)
Longitude
-75.1332
-75.1336
-75.1359
-75.1473
-75.1208
Requested Date/Time
08/18/2016 06:32:14 PM
08/18/2016 06:35:32 PM
08/18/2016 09:43:32 AM
08/18/2016 10:19:40 AM
08/18/2016 12:36:47 PM
Service Code
SR-ST03
SR-ST03
SR-ST03
SR-ST02
SR-ST03
Service Name
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Illegal Dumping
Rubbish/Recyclable Material Collection
Service Notice
2 Business Days
2 Business Days
2 Business Days
5 Business Days
2 Business Days
Service Request ID
10895664
10895669
10894010
10894151
10894702
Status
Closed
Closed
Closed
Closed
Closed
Updated Date/Time
08/22/2016 06:15:23 AM
08/22/2016 06:15:26 AM
08/22/2016 06:15:29 AM
08/22/2016 06:15:32 AM
08/22/2016 06:15:35 AM
Zipcode
19140
19140
0
19140
0
Zipcodes
41
41
41
41
9
geometry
POINT (-75.13324674099999 40.019630884)
POINT (-75.13358943099999 40.018080355)
POINT (-75.135892102 40.008401585)
POINT (-75.14729510799999 40.002128888)
POINT (-75.12082581 40.050084238)
In [6]:
philly_seg_buffer_gp = street_gpd[['SEG_ID','STCL2_ID','geometry', 'SHAPE_LEN']].copy()
philly_seg_buffer_gp.geometry = philly_seg_buffer_gp.buffer(0.0001)
philly_311_filtered_buffer_gpd = philly311_filtered[['Service Request ID', 'geometry','Service Name', 'Requested Date/Time']].dropna().copy()
philly_311_filtered_buffer_gpd.geometry = philly_311_filtered_buffer_gpd.buffer(0.0001)
joined_filtered = sjoin(philly_311_filtered_buffer_gpd, philly_seg_buffer_gp, how='left', op='intersects')
In [25]:
gb = joined_filtered.reset_index().groupby('SEG_ID').count()
print(gb[gb['Service Request ID']==0].shape[0], ' points spatially match no segment')
print(gb[gb['Service Request ID']==1].shape[0], ' points spatially match 1 segment')
print(gb[gb['Service Request ID']>1].shape[0], ' points spatially match multiple segments')
0 points spatially match no segment
1610 points spatially match 1 segment
37278 points spatially match multiple segments
In [27]:
print(joined_filtered.shape)
joined_filtered.head().T
(649764, 8)
Out[27]:
0
0
0
0
1
Service Request ID
10895664
10895664
10895664
10895664
10895669
geometry
POLYGON ((-75.13314674099999 40.019630884, -75...
POLYGON ((-75.13314674099999 40.019630884, -75...
POLYGON ((-75.13314674099999 40.019630884, -75...
POLYGON ((-75.13314674099999 40.019630884, -75...
POLYGON ((-75.13348943099999 40.018080355, -75...
Service Name
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Requested Date/Time
08/18/2016 06:32:14 PM
08/18/2016 06:32:14 PM
08/18/2016 06:32:14 PM
08/18/2016 06:32:14 PM
08/18/2016 06:35:32 PM
index_right
3113
3119
3117
3125
8721
SEG_ID
640566
640606
640553
640552
640725
STCL2_ID
4186
4226
4173
4172
4345
SHAPE_LEN
262.761
140.018
266.558
274.852
166.097
In [27]:
unique_agencies = set()
for agency in joined['Agency Responsible']:
unique_agencies.add(agency)
print(len(unique_agencies), unique_agencies)
9 {'Streets Department', 'Parks & Recreation', 'Water Department (PWD)', 'Philly311 Contact Center', 'Fire Department', None, 'License & Inspections', 'Community Life Improvement Program', 'Police Department'}
In [28]:
unique_services = set()
for service in joined['Service Name']:
unique_services.add(service)
print(len(unique_services), unique_services)
51 {'Building Construction', 'Line Striping', 'Traffic (Other)', 'Zoning Business', 'Stop Sign Repair', 'Inlet Cleaning', 'Parks and Rec Safety and Maintenance', 'Abandoned Bike', 'Newsstand/Outdoor Café', 'Dangerous Sidewalk', 'Zoning Residential', 'Miscellaneous', 'Daycare Residential or Commercial', 'Street Trees', 'Hydrant Request', 'Graffiti Removal', 'Sanitation / Dumpster Violation', 'Manhole Cover', 'Maintenance Residential or Commercial', 'Illegal Dumping', 'Building Dangerous', 'Police Complaint', 'Fire Residential or Commercial', 'Abandoned Vehicle', 'Complaint (Streets)', 'Boarding Room House', 'Smoke Detector', 'Other Dangerous', 'Tree Dangerous', 'Vacant Lot Clean-Up', 'Information Request', 'Street Defect', 'Rubbish/Recyclable Material Collection', 'License Residential', 'Newsstand Outdoor Cafe', 'Complaints against Fire or EMS', 'No Heat Residential', 'Traffic Signal Emergency', 'Emergency Air Conditioning', 'Other (Streets)', 'Dead Animal in Street', 'Construction Site Task Force', 'Alley Light Outage', 'Shoveling', 'Hydrant Knocked Down (No Water)', 'Street Paving', 'Vacant House or Commercial', 'Salting', 'Street Light Outage', 'Infestation Residential', 'No Heat (Residential)'}
In [7]:
unique_services = set()
for service in joined_filtered['Service Name']:
unique_services.add(service)
print(len(unique_services), unique_services)
51 {'Police Complaint', 'Shoveling', 'No Heat (Residential)', 'Street Trees', 'Stop Sign Repair', 'Infestation Residential', 'Street Paving', 'Traffic (Other)', 'Manhole Cover', 'Line Striping', 'Street Defect', 'Illegal Dumping', 'Vacant Lot Clean-Up', 'Other Dangerous', 'Street Light Outage', 'Other (Streets)', 'No Heat Residential', 'Newsstand/Outdoor Café', 'Dangerous Sidewalk', 'Zoning Residential', 'Information Request', 'Miscellaneous', 'Abandoned Vehicle', 'Hydrant Request', 'Building Construction', 'Complaint (Streets)', 'Newsstand Outdoor Cafe', 'Alley Light Outage', 'Zoning Business', 'Traffic Signal Emergency', 'Hydrant Knocked Down (No Water)', 'Salting', 'Complaints against Fire or EMS', 'Fire Residential or Commercial', 'Emergency Air Conditioning', 'Construction Site Task Force', 'Sanitation / Dumpster Violation', 'Parks and Rec Safety and Maintenance', 'Boarding Room House', 'Maintenance Residential or Commercial', 'Abandoned Bike', 'License Residential', 'Daycare Residential or Commercial', 'Inlet Cleaning', 'Rubbish/Recyclable Material Collection', 'Vacant House or Commercial', 'Building Dangerous', 'Graffiti Removal', 'Dead Animal in Street', 'Tree Dangerous', 'Smoke Detector'}
In [32]:
philly_311_statistic = joined[['SEG_ID', 'Agency Responsible', 'Service Name']].dropna()
In [33]:
print(philly_311_statistic.shape)
philly_311_statistic.head().T
(661603, 3)
Out[33]:
0
0
0
0
1
SEG_ID
640566
640606
640553
640552
640725
Agency Responsible
Streets Department
Streets Department
Streets Department
Streets Department
Streets Department
Service Name
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
In [8]:
philly_311_filtered_statistic = joined_filtered[['SEG_ID', 'Service Name', 'Requested Date/Time', 'SHAPE_LEN']]
print(philly_311_filtered_statistic.shape)
philly_311_filtered_statistic.head().T
(649764, 4)
Out[8]:
0
0
0
0
1
SEG_ID
640566
640606
640553
640552
640725
Service Name
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Requested Date/Time
08/18/2016 06:32:14 PM
08/18/2016 06:32:14 PM
08/18/2016 06:32:14 PM
08/18/2016 06:32:14 PM
08/18/2016 06:35:32 PM
SHAPE_LEN
262.761
140.018
266.558
274.852
166.097
In [31]:
philly_311_filtered_statistic.drop_duplicates().shape
Out[31]:
(649144, 4)
In [34]:
philly_311_filtered_statistic.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 649764 entries, 0 to 1110622
Data columns (total 4 columns):
SEG_ID 649350 non-null float64
Service Name 649764 non-null object
Requested Date/Time 649764 non-null object
SHAPE_LEN 649350 non-null float64
dtypes: float64(2), object(2)
memory usage: 24.8+ MB
In [9]:
from pandas.tseries.resample import TimeGrouper
In [10]:
philly_311_filtered_statistic['Requested Date/Time'] = pd.DatetimeIndex(philly_311_filtered_statistic['Requested Date/Time'])
philly_311_filtered_statistic.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 649764 entries, 0 to 1110622
Data columns (total 4 columns):
SEG_ID 649350 non-null float64
Service Name 649764 non-null object
Requested Date/Time 649764 non-null datetime64[ns]
SHAPE_LEN 649350 non-null float64
dtypes: datetime64[ns](1), float64(2), object(1)
memory usage: 24.8+ MB
//anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
if __name__ == '__main__':
In [45]:
philly_311_filtered_statistic.head().T
Out[45]:
0
0
0
0
1
SEG_ID
640566
640606
640553
640552
640725
Service Name
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Rubbish/Recyclable Material Collection
Requested Date/Time
2016-08-18 18:32:14
2016-08-18 18:32:14
2016-08-18 18:32:14
2016-08-18 18:32:14
2016-08-18 18:35:32
SHAPE_LEN
262.761
140.018
266.558
274.852
166.097
In [47]:
philly_311_filtered_statistic['Requested Date/Time'].dt.month
Out[47]:
0 8
0 8
0 8
0 8
1 8
1 8
1 8
1 8
2 8
3 8
4 8
5 8
6 8
7 9
7 9
7 9
7 9
8 8
8 8
8 8
9 8
9 8
9 8
10 8
11 8
11 8
11 8
11 8
12 8
13 8
..
1110606 8
1110607 8
1110608 6
1110608 6
1110608 6
1110608 6
1110609 8
1110609 8
1110609 8
1110609 8
1110610 8
1110611 8
1110611 8
1110611 8
1110611 8
1110612 6
1110613 8
1110614 8
1110615 8
1110616 8
1110617 2
1110618 6
1110618 6
1110618 6
1110620 10
1110621 11
1110622 8
1110622 8
1110622 8
1110622 8
Name: Requested Date/Time, dtype: int64
In [34]:
philly_311_statistic.groupby(['SEG_ID', 'Agency Responsible', 'Service Name']).size().reset_index()
Out[34]:
SEG_ID
Agency Responsible
Service Name
0
0
100006.0
Community Life Improvement Program
Vacant Lot Clean-Up
1
1
100006.0
License & Inspections
Maintenance Residential or Commercial
2
2
100006.0
Police Department
Abandoned Vehicle
1
3
100006.0
Streets Department
Illegal Dumping
6
4
100007.0
Streets Department
Illegal Dumping
2
5
100007.0
Streets Department
Rubbish/Recyclable Material Collection
1
6
100008.0
Community Life Improvement Program
Vacant Lot Clean-Up
1
7
100008.0
Streets Department
Illegal Dumping
5
8
100008.0
Streets Department
Salting
1
9
100008.0
Streets Department
Street Light Outage
6
10
100009.0
Police Department
Abandoned Vehicle
1
11
100009.0
Streets Department
Illegal Dumping
1
12
100009.0
Streets Department
Street Defect
2
13
100011.0
Streets Department
Illegal Dumping
6
14
100012.0
Community Life Improvement Program
Vacant Lot Clean-Up
1
15
100012.0
License & Inspections
Vacant House or Commercial
1
16
100012.0
License & Inspections
Zoning Business
1
17
100012.0
Streets Department
Illegal Dumping
3
18
100012.0
Streets Department
Street Light Outage
1
19
100015.0
Community Life Improvement Program
Vacant Lot Clean-Up
3
20
100015.0
Fire Department
Smoke Detector
1
21
100015.0
Parks & Recreation
Street Trees
1
22
100015.0
Streets Department
Illegal Dumping
2
23
100015.0
Streets Department
Rubbish/Recyclable Material Collection
1
24
100016.0
License & Inspections
Maintenance Residential or Commercial
1
25
100016.0
Streets Department
Illegal Dumping
5
26
100016.0
Streets Department
Stop Sign Repair
1
27
100020.0
Parks & Recreation
Parks and Rec Safety and Maintenance
1
28
100020.0
Parks & Recreation
Street Trees
2
29
100020.0
Streets Department
Illegal Dumping
1
...
...
...
...
...
285023
1180077.0
Streets Department
Street Light Outage
1
285024
1180077.0
Streets Department
Traffic (Other)
1
285025
1180080.0
Streets Department
Street Defect
2
285026
1180080.0
Streets Department
Street Light Outage
2
285027
1180081.0
Parks & Recreation
Street Trees
2
285028
1180081.0
Police Department
Abandoned Vehicle
1
285029
1180081.0
Streets Department
Dangerous Sidewalk
1
285030
1180081.0
Streets Department
Illegal Dumping
1
285031
1180081.0
Streets Department
Salting
2
285032
1180081.0
Streets Department
Street Light Outage
2
285033
1180082.0
License & Inspections
Construction Site Task Force
1
285034
1180082.0
Parks & Recreation
Parks and Rec Safety and Maintenance
1
285035
1180082.0
Police Department
Abandoned Vehicle
3
285036
1180082.0
Streets Department
Illegal Dumping
1
285037
1180082.0
Streets Department
Rubbish/Recyclable Material Collection
1
285038
1180082.0
Streets Department
Salting
1
285039
1180082.0
Streets Department
Street Light Outage
2
285040
1180083.0
Police Department
Abandoned Vehicle
1
285041
1180085.0
License & Inspections
Maintenance Residential or Commercial
2
285042
1180094.0
Streets Department
Rubbish/Recyclable Material Collection
1
285043
1180094.0
Streets Department
Street Light Outage
1
285044
1180097.0
Streets Department
Rubbish/Recyclable Material Collection
1
285045
1180098.0
Streets Department
Rubbish/Recyclable Material Collection
1
285046
1180100.0
Streets Department
Street Light Outage
4
285047
1180101.0
Streets Department
Rubbish/Recyclable Material Collection
1
285048
1180101.0
Streets Department
Street Light Outage
4
285049
1180102.0
Streets Department
Rubbish/Recyclable Material Collection
2
285050
1180102.0
Streets Department
Street Light Outage
4
285051
1180103.0
Streets Department
Rubbish/Recyclable Material Collection
2
285052
1180104.0
Streets Department
Rubbish/Recyclable Material Collection
2
285053 rows × 4 columns
In [35]:
philly_311_statistic.groupby(['Agency Responsible', 'Service Name']).size().reset_index()
Out[35]:
Agency Responsible
Service Name
0
0
Community Life Improvement Program
Building Dangerous
1
1
Community Life Improvement Program
Graffiti Removal
48511
2
Community Life Improvement Program
Illegal Dumping
1
3
Community Life Improvement Program
License Residential
1
4
Community Life Improvement Program
Maintenance Residential or Commercial
1
5
Community Life Improvement Program
Rubbish/Recyclable Material Collection
1
6
Community Life Improvement Program
Sanitation / Dumpster Violation
1
7
Community Life Improvement Program
Street Defect
7
8
Community Life Improvement Program
Vacant Lot Clean-Up
23014
9
Fire Department
Complaints against Fire or EMS
266
10
Fire Department
Smoke Detector
8151
11
License & Inspections
Alley Light Outage
2
12
License & Inspections
Boarding Room House
1951
13
License & Inspections
Building Construction
7749
14
License & Inspections
Building Dangerous
9535
15
License & Inspections
Complaint (Streets)
1
16
License & Inspections
Construction Site Task Force
12169
17
License & Inspections
Dangerous Sidewalk
5
18
License & Inspections
Daycare Residential or Commercial
170
19
License & Inspections
Dead Animal in Street
4
20
License & Inspections
Emergency Air Conditioning
2
21
License & Inspections
Fire Residential or Commercial
4140
22
License & Inspections
Illegal Dumping
70
23
License & Inspections
Infestation Residential
2515
24
License & Inspections
Information Request
63
25
License & Inspections
Inlet Cleaning
6
26
License & Inspections
License Residential
3922
27
License & Inspections
Maintenance Residential or Commercial
75609
28
License & Inspections
No Heat (Residential)
4111
29
License & Inspections
No Heat Residential
16
...
...
...
...
51
Philly311 Contact Center
Street Light Outage
3
52
Police Department
Abandoned Vehicle
47607
53
Police Department
Information Request
37
54
Police Department
Police Complaint
645
55
Streets Department
Abandoned Bike
717
56
Streets Department
Alley Light Outage
4717
57
Streets Department
Complaint (Streets)
5368
58
Streets Department
Dangerous Sidewalk
5376
59
Streets Department
Dead Animal in Street
3960
60
Streets Department
Illegal Dumping
66562
61
Streets Department
Information Request
65
62
Streets Department
Line Striping
1684
63
Streets Department
Manhole Cover
2393
64
Streets Department
Newsstand Outdoor Cafe
251
65
Streets Department
Newsstand/Outdoor Café
11
66
Streets Department
Other (Streets)
10210
67
Streets Department
Rubbish/Recyclable Material Collection
65723
68
Streets Department
Salting
29585
69
Streets Department
Sanitation / Dumpster Violation
14832
70
Streets Department
Shoveling
3604
71
Streets Department
Stop Sign Repair
5622
72
Streets Department
Street Defect
51286
73
Streets Department
Street Light Outage
27539
74
Streets Department
Street Paving
2453
75
Streets Department
Traffic (Other)
7139
76
Streets Department
Traffic Signal Emergency
28414
77
Water Department (PWD)
Hydrant Knocked Down (No Water)
97
78
Water Department (PWD)
Hydrant Request
1843
79
Water Department (PWD)
Information Request
103
80
Water Department (PWD)
Inlet Cleaning
8139
81 rows × 3 columns
In [11]:
philly_311_filtered_typemerge = philly_311_filtered_statistic.groupby(['SEG_ID', philly_311_filtered_statistic['Requested Date/Time'].dt.month, 'Service Name']).size().reset_index()
In [12]:
philly_311_filtered_typemerge.columns = ['SEG_ID', 'Month', 'Service Name', 'count']
print(philly_311_filtered_typemerge.shape)
philly_311_filtered_typemerge.head().T
(479157, 4)
Out[12]:
0
1
2
3
4
SEG_ID
100006
100006
100006
100006
100006
Month
2
3
4
5
6
Service Name
Illegal Dumping
Illegal Dumping
Illegal Dumping
Maintenance Residential or Commercial
Vacant Lot Clean-Up
count
1
3
1
1
1
In [16]:
philly_311_filtered_details = philly_311_filtered_statistic.groupby(['SEG_ID', philly_311_filtered_statistic['Requested Date/Time'].dt.month, 'SHAPE_LEN']).size().reset_index()
In [17]:
philly_311_filtered_details.columns = ['SEG_ID', 'Month', 'SHAPE_LEN', 'monthly_311_request_count']
print(philly_311_filtered_details.shape)
philly_311_filtered_details.head().T
(259881, 4)
Out[17]:
0
1
2
3
4
SEG_ID
100006.000000
100006.000000
100006.000000
100006.000000
100006.000000
Month
2.000000
3.000000
4.000000
5.000000
6.000000
SHAPE_LEN
735.818883
735.818883
735.818883
735.818883
735.818883
monthly_311_request_count
1.000000
3.000000
1.000000
1.000000
1.000000
In [18]:
philly_311_filtered_details['monthly_311_request_count/len'] = philly_311_filtered_details['monthly_311_request_count']/philly_311_filtered_details['SHAPE_LEN']
In [19]:
print(philly_311_filtered_details.shape)
philly_311_filtered_details.head().T
(259881, 5)
Out[19]:
0
1
2
3
4
SEG_ID
100006.000000
100006.000000
100006.000000
100006.000000
100006.000000
Month
2.000000
3.000000
4.000000
5.000000
6.000000
SHAPE_LEN
735.818883
735.818883
735.818883
735.818883
735.818883
monthly_311_request_count
1.000000
3.000000
1.000000
1.000000
1.000000
monthly_311_request_count/len
0.001359
0.004077
0.001359
0.001359
0.001359
In [20]:
for service in unique_services:
if(service is not None):
service_data = philly_311_filtered_typemerge[philly_311_filtered_typemerge['Service Name']==service]
service_count = service + '_count'
service_count_len = service_count + '/len'
service_data_count = service_data[['SEG_ID', 'Month', 'count']]
service_data_count.columns = ['SEG_ID', 'Month', service_count]
philly_311_filtered_details = philly_311_filtered_details.merge(service_data_count, left_on = ['SEG_ID', 'Month'], right_on = ['SEG_ID', 'Month'], right_index=False, how='left')
philly_311_filtered_details[service_count_len] = philly_311_filtered_details[service_count]/philly_311_filtered_details['SHAPE_LEN']
In [22]:
philly_311_filtered_details = philly_311_filtered_details.fillna(0)
print(philly_311_filtered_details.shape)
philly_311_filtered_details.head().T
(259881, 107)
Out[22]:
0
1
2
3
4
SEG_ID
100006.000000
100006.000000
100006.000000
100006.000000
100006.000000
Month
2.000000
3.000000
4.000000
5.000000
6.000000
SHAPE_LEN
735.818883
735.818883
735.818883
735.818883
735.818883
monthly_311_request_count
1.000000
3.000000
1.000000
1.000000
1.000000
monthly_311_request_count/len
0.001359
0.004077
0.001359
0.001359
0.001359
Police Complaint_count
0.000000
0.000000
0.000000
0.000000
0.000000
Police Complaint_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Shoveling_count
0.000000
0.000000
0.000000
0.000000
0.000000
Shoveling_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
No Heat (Residential)_count
0.000000
0.000000
0.000000
0.000000
0.000000
No Heat (Residential)_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Street Trees_count
0.000000
0.000000
0.000000
0.000000
0.000000
Street Trees_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Stop Sign Repair_count
0.000000
0.000000
0.000000
0.000000
0.000000
Stop Sign Repair_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Infestation Residential_count
0.000000
0.000000
0.000000
0.000000
0.000000
Infestation Residential_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Street Paving_count
0.000000
0.000000
0.000000
0.000000
0.000000
Street Paving_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Traffic (Other)_count
0.000000
0.000000
0.000000
0.000000
0.000000
Traffic (Other)_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Manhole Cover_count
0.000000
0.000000
0.000000
0.000000
0.000000
Manhole Cover_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Line Striping_count
0.000000
0.000000
0.000000
0.000000
0.000000
Line Striping_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Street Defect_count
0.000000
0.000000
0.000000
0.000000
0.000000
Street Defect_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Illegal Dumping_count
1.000000
3.000000
1.000000
0.000000
0.000000
Illegal Dumping_count/len
0.001359
0.004077
0.001359
0.000000
0.000000
Vacant Lot Clean-Up_count
0.000000
0.000000
0.000000
0.000000
1.000000
...
...
...
...
...
...
Sanitation / Dumpster Violation_count
0.000000
0.000000
0.000000
0.000000
0.000000
Sanitation / Dumpster Violation_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Parks and Rec Safety and Maintenance_count
0.000000
0.000000
0.000000
0.000000
0.000000
Parks and Rec Safety and Maintenance_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Boarding Room House_count
0.000000
0.000000
0.000000
0.000000
0.000000
Boarding Room House_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Maintenance Residential or Commercial_count
0.000000
0.000000
0.000000
1.000000
0.000000
Maintenance Residential or Commercial_count/len
0.000000
0.000000
0.000000
0.001359
0.000000
Abandoned Bike_count
0.000000
0.000000
0.000000
0.000000
0.000000
Abandoned Bike_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
License Residential_count
0.000000
0.000000
0.000000
0.000000
0.000000
License Residential_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Daycare Residential or Commercial_count
0.000000
0.000000
0.000000
0.000000
0.000000
Daycare Residential or Commercial_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Inlet Cleaning_count
0.000000
0.000000
0.000000
0.000000
0.000000
Inlet Cleaning_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Rubbish/Recyclable Material Collection_count
0.000000
0.000000
0.000000
0.000000
0.000000
Rubbish/Recyclable Material Collection_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Vacant House or Commercial_count
0.000000
0.000000
0.000000
0.000000
0.000000
Vacant House or Commercial_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Building Dangerous_count
0.000000
0.000000
0.000000
0.000000
0.000000
Building Dangerous_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Graffiti Removal_count
0.000000
0.000000
0.000000
0.000000
0.000000
Graffiti Removal_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Dead Animal in Street_count
0.000000
0.000000
0.000000
0.000000
0.000000
Dead Animal in Street_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Tree Dangerous_count
0.000000
0.000000
0.000000
0.000000
0.000000
Tree Dangerous_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Smoke Detector_count
0.000000
0.000000
0.000000
0.000000
0.000000
Smoke Detector_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
107 rows × 5 columns
In [23]:
philly_311_filtered_details.drop_duplicates().shape
Out[23]:
(259881, 107)
In [26]:
from statsmodels import robust
In [27]:
for column in philly_311_filtered_details.columns:
if('_count' in column):
print(column, robust.mad(philly_311_filtered_details[column]))
monthly_311_request_count 1.48260221851
monthly_311_request_count/len 0.00493050307814
Police Complaint_count 0.0
Police Complaint_count/len 0.0
Shoveling_count 0.0
Shoveling_count/len 0.0
No Heat (Residential)_count 0.0
No Heat (Residential)_count/len 0.0
Street Trees_count 0.0
Street Trees_count/len 0.0
Stop Sign Repair_count 0.0
Stop Sign Repair_count/len 0.0
Infestation Residential_count 0.0
Infestation Residential_count/len 0.0
Street Paving_count 0.0
Street Paving_count/len 0.0
Traffic (Other)_count 0.0
Traffic (Other)_count/len 0.0
Manhole Cover_count 0.0
Manhole Cover_count/len 0.0
Line Striping_count 0.0
Line Striping_count/len 0.0
Street Defect_count 0.0
Street Defect_count/len 0.0
Illegal Dumping_count 0.0
Illegal Dumping_count/len 0.0
Vacant Lot Clean-Up_count 0.0
Vacant Lot Clean-Up_count/len 0.0
Other Dangerous_count 0.0
Other Dangerous_count/len 0.0
Street Light Outage_count 0.0
Street Light Outage_count/len 0.0
Other (Streets)_count 0.0
Other (Streets)_count/len 0.0
No Heat Residential_count 0.0
No Heat Residential_count/len 0.0
Newsstand/Outdoor Café_count 0.0
Newsstand/Outdoor Café_count/len 0.0
Dangerous Sidewalk_count 0.0
Dangerous Sidewalk_count/len 0.0
Zoning Residential_count 0.0
Zoning Residential_count/len 0.0
Information Request_count 0.0
Information Request_count/len 0.0
Miscellaneous_count 0.0
Miscellaneous_count/len 0.0
Abandoned Vehicle_count 0.0
Abandoned Vehicle_count/len 0.0
Hydrant Request_count 0.0
Hydrant Request_count/len 0.0
Building Construction_count 0.0
Building Construction_count/len 0.0
Complaint (Streets)_count 0.0
Complaint (Streets)_count/len 0.0
Newsstand Outdoor Cafe_count 0.0
Newsstand Outdoor Cafe_count/len 0.0
Alley Light Outage_count 0.0
Alley Light Outage_count/len 0.0
Zoning Business_count 0.0
Zoning Business_count/len 0.0
Traffic Signal Emergency_count 0.0
Traffic Signal Emergency_count/len 0.0
Hydrant Knocked Down (No Water)_count 0.0
Hydrant Knocked Down (No Water)_count/len 0.0
Salting_count 0.0
Salting_count/len 0.0
Complaints against Fire or EMS_count 0.0
Complaints against Fire or EMS_count/len 0.0
Fire Residential or Commercial_count 0.0
Fire Residential or Commercial_count/len 0.0
Emergency Air Conditioning_count 0.0
Emergency Air Conditioning_count/len 0.0
Construction Site Task Force_count 0.0
Construction Site Task Force_count/len 0.0
Sanitation / Dumpster Violation_count 0.0
Sanitation / Dumpster Violation_count/len 0.0
Parks and Rec Safety and Maintenance_count 0.0
Parks and Rec Safety and Maintenance_count/len 0.0
Boarding Room House_count 0.0
Boarding Room House_count/len 0.0
Maintenance Residential or Commercial_count 0.0
Maintenance Residential or Commercial_count/len 0.0
Abandoned Bike_count 0.0
Abandoned Bike_count/len 0.0
License Residential_count 0.0
License Residential_count/len 0.0
Daycare Residential or Commercial_count 0.0
Daycare Residential or Commercial_count/len 0.0
Inlet Cleaning_count 0.0
Inlet Cleaning_count/len 0.0
Rubbish/Recyclable Material Collection_count 0.0
Rubbish/Recyclable Material Collection_count/len 0.0
Vacant House or Commercial_count 0.0
Vacant House or Commercial_count/len 0.0
Building Dangerous_count 0.0
Building Dangerous_count/len 0.0
Graffiti Removal_count 0.0
Graffiti Removal_count/len 0.0
Dead Animal in Street_count 0.0
Dead Animal in Street_count/len 0.0
Tree Dangerous_count 0.0
Tree Dangerous_count/len 0.0
Smoke Detector_count 0.0
Smoke Detector_count/len 0.0
In [42]:
philly_311_filtered_details.mean()
Out[42]:
SEG_ID 551829.374098
Month 6.079894
SHAPE_LEN 371.222712
monthly_311_request_count 2.498644
monthly_311_request_count/len 0.009738
Police Complaint_count 0.002463
Police Complaint_count/len 0.000009
Shoveling_count 0.013899
Shoveling_count/len 0.000055
No Heat (Residential)_count 0.014753
No Heat (Residential)_count/len 0.000053
Street Trees_count 0.045040
Street Trees_count/len 0.000158
Stop Sign Repair_count 0.021121
Stop Sign Repair_count/len 0.000086
Infestation Residential_count 0.009431
Infestation Residential_count/len 0.000032
Street Paving_count 0.009285
Street Paving_count/len 0.000034
Traffic (Other)_count 0.026993
Traffic (Other)_count/len 0.000110
Manhole Cover_count 0.008923
Manhole Cover_count/len 0.000035
Line Striping_count 0.006368
Line Striping_count/len 0.000024
Street Defect_count 0.194793
Street Defect_count/len 0.000770
Illegal Dumping_count 0.251419
Illegal Dumping_count/len 0.001091
Vacant Lot Clean-Up_count 0.087652
...
Sanitation / Dumpster Violation_count 0.056014
Sanitation / Dumpster Violation_count/len 0.000223
Parks and Rec Safety and Maintenance_count 0.020059
Parks and Rec Safety and Maintenance_count/len 0.000071
Boarding Room House_count 0.007353
Boarding Room House_count/len 0.000028
Maintenance Residential or Commercial_count 0.286320
Maintenance Residential or Commercial_count/len 0.000992
Abandoned Bike_count 0.002717
Abandoned Bike_count/len 0.000014
License Residential_count 0.014934
License Residential_count/len 0.000053
Daycare Residential or Commercial_count 0.000635
Daycare Residential or Commercial_count/len 0.000002
Inlet Cleaning_count 0.030468
Inlet Cleaning_count/len 0.000136
Rubbish/Recyclable Material Collection_count 0.248425
Rubbish/Recyclable Material Collection_count/len 0.000982
Vacant House or Commercial_count 0.076685
Vacant House or Commercial_count/len 0.000278
Building Dangerous_count 0.036055
Building Dangerous_count/len 0.000139
Graffiti Removal_count 0.180991
Graffiti Removal_count/len 0.000811
Dead Animal in Street_count 0.014872
Dead Animal in Street_count/len 0.000054
Tree Dangerous_count 0.002809
Tree Dangerous_count/len 0.000009
Smoke Detector_count 0.030660
Smoke Detector_count/len 0.000098
dtype: float64
In [44]:
philly_311_filtered_details.mad()
Out[44]:
SEG_ID 169294.574445
Month 2.765901
SHAPE_LEN 189.561113
monthly_311_request_count 1.498016
monthly_311_request_count/len 0.006895
Police Complaint_count 0.004914
Police Complaint_count/len 0.000018
Shoveling_count 0.027498
Shoveling_count/len 0.000109
No Heat (Residential)_count 0.029116
No Heat (Residential)_count/len 0.000104
Street Trees_count 0.086701
Street Trees_count/len 0.000304
Stop Sign Repair_count 0.041538
Stop Sign Repair_count/len 0.000169
Infestation Residential_count 0.018696
Infestation Residential_count/len 0.000064
Street Paving_count 0.018414
Street Paving_count/len 0.000068
Traffic (Other)_count 0.052659
Traffic (Other)_count/len 0.000215
Manhole Cover_count 0.017703
Manhole Cover_count/len 0.000070
Line Striping_count 0.012663
Line Striping_count/len 0.000048
Street Defect_count 0.336872
Street Defect_count/len 0.001333
Illegal Dumping_count 0.414588
Illegal Dumping_count/len 0.001801
Vacant Lot Clean-Up_count 0.164458
...
Sanitation / Dumpster Violation_count 0.107073
Sanitation / Dumpster Violation_count/len 0.000426
Parks and Rec Safety and Maintenance_count 0.039463
Parks and Rec Safety and Maintenance_count/len 0.000140
Boarding Room House_count 0.014605
Boarding Room House_count/len 0.000055
Maintenance Residential or Commercial_count 0.453083
Maintenance Residential or Commercial_count/len 0.001571
Abandoned Bike_count 0.005420
Abandoned Bike_count/len 0.000029
License Residential_count 0.029460
License Residential_count/len 0.000104
Daycare Residential or Commercial_count 0.001269
Daycare Residential or Commercial_count/len 0.000004
Inlet Cleaning_count 0.059361
Inlet Cleaning_count/len 0.000265
Rubbish/Recyclable Material Collection_count 0.411170
Rubbish/Recyclable Material Collection_count/len 0.001625
Vacant House or Commercial_count 0.143834
Vacant House or Commercial_count/len 0.000522
Building Dangerous_count 0.069842
Building Dangerous_count/len 0.000269
Graffiti Removal_count 0.319255
Graffiti Removal_count/len 0.001432
Dead Animal in Street_count 0.029350
Dead Animal in Street_count/len 0.000107
Tree Dangerous_count 0.005603
Tree Dangerous_count/len 0.000018
Smoke Detector_count 0.059701
Smoke Detector_count/len 0.000191
dtype: float64
In [54]:
philly_311_filtered_details.std()
Out[54]:
SEG_ID 202007.886754
Month 3.239961
SHAPE_LEN 311.557862
monthly_311_request_count 2.172181
monthly_311_request_count/len 0.012373
Police Complaint_count 0.051618
Police Complaint_count/len 0.000244
Shoveling_count 0.155597
Shoveling_count/len 0.000752
No Heat (Residential)_count 0.138143
No Heat (Residential)_count/len 0.000619
Street Trees_count 0.256032
Street Trees_count/len 0.001110
Stop Sign Repair_count 0.185982
Stop Sign Repair_count/len 0.001107
Infestation Residential_count 0.103648
Infestation Residential_count/len 0.000457
Street Paving_count 0.108982
Street Paving_count/len 0.000518
Traffic (Other)_count 0.178901
Traffic (Other)_count/len 0.001207
Manhole Cover_count 0.105094
Manhole Cover_count/len 0.000520
Line Striping_count 0.087780
Line Striping_count/len 0.000432
Street Defect_count 0.618036
Street Defect_count/len 0.003442
Illegal Dumping_count 0.710043
Illegal Dumping_count/len 0.004063
Vacant Lot Clean-Up_count 0.423710
...
Sanitation / Dumpster Violation_count 0.300362
Sanitation / Dumpster Violation_count/len 0.001528
Parks and Rec Safety and Maintenance_count 0.177224
Parks and Rec Safety and Maintenance_count/len 0.000923
Boarding Room House_count 0.091398
Boarding Room House_count/len 0.000453
Maintenance Residential or Commercial_count 0.687932
Maintenance Residential or Commercial_count/len 0.003002
Abandoned Bike_count 0.058260
Abandoned Bike_count/len 0.000436
License Residential_count 0.143198
License Residential_count/len 0.000608
Daycare Residential or Commercial_count 0.025493
Daycare Residential or Commercial_count/len 0.000104
Inlet Cleaning_count 0.208068
Inlet Cleaning_count/len 0.001244
Rubbish/Recyclable Material Collection_count 0.668857
Rubbish/Recyclable Material Collection_count/len 0.003465
Vacant House or Commercial_count 0.340648
Vacant House or Commercial_count/len 0.001537
Building Dangerous_count 0.217325
Building Dangerous_count/len 0.001093
Graffiti Removal_count 0.632787
Graffiti Removal_count/len 0.003814
Dead Animal in Street_count 0.136237
Dead Animal in Street_count/len 0.000678
Tree Dangerous_count 0.055758
Tree Dangerous_count/len 0.000214
Smoke Detector_count 0.221599
Smoke Detector_count/len 0.000826
dtype: float64
In [53]:
col_median = philly_311_filtered_details.median()
col_mad = philly_311_filtered_details.mad()
for column in philly_311_filtered_details.columns:
if('_count' in column and 'len' not in column):
# col_mad = robust.mad(philly_311_filtered_details[column])
print(column, col_median[column], col_mad[column])
outliers = philly_311_filtered_details[philly_311_filtered_details[column]>(col_median[column] + 3*col_mad[column])][[column]]
print('shape:', outliers.shape, 'min:', min(outliers[column]), 'max:', max(outliers[column]))
monthly_311_request_count 2.0 1.49801579696
shape: (13356, 1) min: 7 max: 79
Police Complaint_count 0.0 0.00491365661968
shape: (616, 1) min: 1.0 max: 3.0
Shoveling_count 0.0 0.0274975241036
shape: (2803, 1) min: 1.0 max: 21.0
No Heat (Residential)_count 0.0 0.029116497819
shape: (3429, 1) min: 1.0 max: 11.0
Street Trees_count 0.0 0.0867005017647
shape: (9749, 1) min: 1.0 max: 10.0
Stop Sign Repair_count 0.0 0.0415382651126
shape: (4332, 1) min: 1.0 max: 9.0
Infestation Residential_count 0.0 0.0186963409589
shape: (2289, 1) min: 1.0 max: 6.0
Street Paving_count 0.0 0.0184142639036
shape: (2180, 1) min: 1.0 max: 8.0
Traffic (Other)_count 0.0 0.0526592396162
shape: (6388, 1) min: 1.0 max: 5.0
Manhole Cover_count 0.0 0.0177027613045
shape: (2095, 1) min: 1.0 max: 5.0
Line Striping_count 0.0 0.0126625945567
shape: (1510, 1) min: 1.0 max: 5.0
Street Defect_count 0.0 0.336871867497
shape: (9058, 1) min: 2.0 max: 20.0
Illegal Dumping_count 0.0 0.414588085471
shape: (11577, 1) min: 2.0 max: 39.0
Vacant Lot Clean-Up_count 0.0 0.164457860911
shape: (16078, 1) min: 1.0 max: 21.0
Other Dangerous_count 0.0 0.00676677497352
shape: (802, 1) min: 1.0 max: 6.0
Street Light Outage_count 0.0 0.188608829341
shape: (21604, 1) min: 1.0 max: 18.0
Other (Streets)_count 0.0 0.0750782700913
shape: (9132, 1) min: 1.0 max: 9.0
No Heat Residential_count 0.0 6.15649817938e-05
shape: (7, 1) min: 1.0 max: 2.0
Newsstand/Outdoor Café_count 0.0 8.46505469986e-05
shape: (11, 1) min: 1.0 max: 1.0
Dangerous Sidewalk_count 0.0 0.0396699820818
shape: (4667, 1) min: 1.0 max: 6.0
Zoning Residential_count 0.0 0.02863615932
shape: (3446, 1) min: 1.0 max: 7.0
Information Request_count 0.0 0.0655910424386
shape: (7494, 1) min: 1.0 max: 25.0
Miscellaneous_count 0.0 0.0418290602945
shape: (4958, 1) min: 1.0 max: 8.0
Abandoned Vehicle_count 0.0 0.312145489476
shape: (34798, 1) min: 1.0 max: 23.0
Hydrant Request_count 0.0 0.0136864664901
shape: (1536, 1) min: 1.0 max: 7.0
Building Construction_count 0.0 0.0567196594276
shape: (6793, 1) min: 1.0 max: 12.0
Complaint (Streets)_count 0.0 0.0394599096807
shape: (4950, 1) min: 1.0 max: 5.0
Newsstand Outdoor Cafe_count 0.0 0.00188395476985
shape: (210, 1) min: 1.0 max: 3.0
Alley Light Outage_count 0.0 0.0348547981559
shape: (4064, 1) min: 1.0 max: 7.0
Zoning Business_count 0.0 0.0462912336985
shape: (5452, 1) min: 1.0 max: 6.0
Traffic Signal Emergency_count 0.0 0.200746189401
shape: (16242, 1) min: 1.0 max: 32.0
Hydrant Knocked Down (No Water)_count 0.0 0.000715458823375
shape: (92, 1) min: 1.0 max: 2.0
Salting_count 0.0 0.216359928942
shape: (12939, 1) min: 1.0 max: 39.0
Complaints against Fire or EMS_count 0.0 0.00191455836923
shape: (231, 1) min: 1.0 max: 5.0
Fire Residential or Commercial_count 0.0 0.0305753920159
shape: (3804, 1) min: 1.0 max: 6.0
Emergency Air Conditioning_count 0.0 1.53915415775e-05
shape: (2, 1) min: 1.0 max: 1.0
Construction Site Task Force_count 0.0 0.088689266981
shape: (10010, 1) min: 1.0 max: 9.0
Sanitation / Dumpster Violation_count 0.0 0.107073421673
shape: (11494, 1) min: 1.0 max: 15.0
Parks and Rec Safety and Maintenance_count 0.0 0.0394633612716
shape: (4243, 1) min: 1.0 max: 20.0
Boarding Room House_count 0.0 0.0146052648366
shape: (1793, 1) min: 1.0 max: 4.0
Maintenance Residential or Commercial_count 0.0 0.453083343125
shape: (12840, 1) min: 2.0 max: 19.0
Abandoned Bike_count 0.0 0.00541995932444
shape: (636, 1) min: 1.0 max: 4.0
License Residential_count 0.0 0.0294604425
shape: (3542, 1) min: 1.0 max: 16.0
Daycare Residential or Commercial_count 0.0 0.00126901551345
shape: (163, 1) min: 1.0 max: 2.0
Inlet Cleaning_count 0.0 0.0593608483536
shape: (6716, 1) min: 1.0 max: 17.0
Rubbish/Recyclable Material Collection_count 0.0 0.411169530741
shape: (12558, 1) min: 2.0 max: 31.0
Vacant House or Commercial_count 0.0 0.143833873954
shape: (16159, 1) min: 1.0 max: 25.0
Building Dangerous_count 0.0 0.0698418639864
shape: (8174, 1) min: 1.0 max: 7.0
Graffiti Removal_count 0.0 0.319254701063
shape: (30675, 1) min: 1.0 max: 31.0
Dead Animal in Street_count 0.0 0.029350089466
shape: (3445, 1) min: 1.0 max: 5.0
Tree Dangerous_count 0.0 0.00560295342328
shape: (694, 1) min: 1.0 max: 4.0
Smoke Detector_count 0.0 0.0597012463269
shape: (6862, 1) min: 1.0 max: 14.0
In [55]:
col_mean = philly_311_filtered_details.mean()
col_std = philly_311_filtered_details.std()
for column in philly_311_filtered_details.columns:
if('_count' in column and 'len' not in column):
# col_mad = robust.mad(philly_311_filtered_details[column])
print(column, col_mean[column], col_std[column])
outliers = philly_311_filtered_details[philly_311_filtered_details[column]>(col_mean[column] + 3*col_std[column])][[column]]
print('shape:', outliers.shape, 'min:', min(outliers[column]), 'max:', max(outliers[column]))
monthly_311_request_count 2.49864360996 2.17218073353
shape: (3954, 1) min: 10 max: 79
Police Complaint_count 0.00246266560464 0.0516178122894
shape: (616, 1) min: 1.0 max: 3.0
Shoveling_count 0.0138986690062 0.155596905932
shape: (2803, 1) min: 1.0 max: 21.0
No Heat (Residential)_count 0.0147529061378 0.138143283835
shape: (3429, 1) min: 1.0 max: 11.0
Street Trees_count 0.0450398451599 0.25603241174
shape: (9749, 1) min: 1.0 max: 10.0
Stop Sign Repair_count 0.0211212054748 0.185981972021
shape: (4332, 1) min: 1.0 max: 9.0
Infestation Residential_count 0.00943123968278 0.103648317891
shape: (2289, 1) min: 1.0 max: 6.0
Street Paving_count 0.0092850189125 0.108981658471
shape: (2180, 1) min: 1.0 max: 8.0
Traffic (Other)_count 0.0269931237759 0.178901390131
shape: (6388, 1) min: 1.0 max: 5.0
Manhole Cover_count 0.00892331490182 0.105093954328
shape: (2095, 1) min: 1.0 max: 5.0
Line Striping_count 0.006368299337 0.0877799938629
shape: (1510, 1) min: 1.0 max: 5.0
Street Defect_count 0.194793001412 0.61803606479
shape: (3251, 1) min: 3.0 max: 20.0
Illegal Dumping_count 0.251418918659 0.710043440373
shape: (4136, 1) min: 3.0 max: 39.0
Vacant Lot Clean-Up_count 0.0876516559502 0.423710380892
shape: (4013, 1) min: 2.0 max: 21.0
Other Dangerous_count 0.0033938610364 0.0658531878688
shape: (802, 1) min: 1.0 max: 6.0
Street Light Outage_count 0.102854768144 0.392816303737
shape: (3589, 1) min: 2.0 max: 18.0
Other (Streets)_count 0.0389062686383 0.216610685336
shape: (9132, 1) min: 1.0 max: 9.0
No Heat Residential_count 3.0783320058e-05 0.00620309201298
shape: (7, 1) min: 1.0 max: 2.0
Newsstand/Outdoor Café_count 4.23270650798e-05 0.00650580020935
shape: (11, 1) min: 1.0 max: 1.0
Dangerous Sidewalk_count 0.0201977058731 0.159205041853
shape: (4667, 1) min: 1.0 max: 6.0
Zoning Residential_count 0.0145104874924 0.13243845806
shape: (3446, 1) min: 1.0 max: 7.0
Information Request_count 0.0337693021037 0.231301174236
shape: (7494, 1) min: 1.0 max: 25.0
Miscellaneous_count 0.0213212970552 0.166412160404
shape: (4958, 1) min: 1.0 max: 8.0
Abandoned Vehicle_count 0.180201707705 0.552667053516
shape: (7971, 1) min: 2.0 max: 23.0
Hydrant Request_count 0.00688391994798 0.09879769511
shape: (1536, 1) min: 1.0 max: 7.0
Building Construction_count 0.0291210207749 0.189370571392
shape: (6793, 1) min: 1.0 max: 12.0
Complaint (Streets)_count 0.0201130517429 0.149158393685
shape: (4950, 1) min: 1.0 max: 5.0
Newsstand Outdoor Cafe_count 0.000942739176777 0.0355679651954
shape: (210, 1) min: 1.0 max: 3.0
Alley Light Outage_count 0.0177042569484 0.15094926549
shape: (4064, 1) min: 1.0 max: 7.0
Zoning Business_count 0.0236415898046 0.172563471605
shape: (5452, 1) min: 1.0 max: 6.0
Traffic Signal Emergency_count 0.107064387162 0.581382970666
shape: (5370, 1) min: 2.0 max: 32.0
Hydrant Knocked Down (No Water)_count 0.000357856095675 0.0191160997808
shape: (92, 1) min: 1.0 max: 2.0
Salting_count 0.11384826132 0.733178217205
shape: (3347, 1) min: 3.0 max: 39.0
Complaints against Fire or EMS_count 0.000958130836806 0.0344687147167
shape: (231, 1) min: 1.0 max: 5.0
Fire Residential or Commercial_count 0.0155147933092 0.131639423396
shape: (3804, 1) min: 1.0 max: 6.0
Emergency Air Conditioning_count 7.69583001451e-06 0.00277413056677
shape: (2, 1) min: 1.0 max: 1.0
Construction Site Task Force_count 0.0461211092769 0.252669237978
shape: (10010, 1) min: 1.0 max: 9.0
Sanitation / Dumpster Violation_count 0.0560140987606 0.300361505321
shape: (11494, 1) min: 1.0 max: 15.0
Parks and Rec Safety and Maintenance_count 0.0200591809328 0.177224330955
shape: (4243, 1) min: 1.0 max: 20.0
Boarding Room House_count 0.00735336557886 0.0913983284837
shape: (1793, 1) min: 1.0 max: 4.0
Maintenance Residential or Commercial_count 0.286319507775 0.687931799406
shape: (4056, 1) min: 3.0 max: 19.0
Abandoned Bike_count 0.00271662799512 0.0582596761004
shape: (636, 1) min: 1.0 max: 4.0
License Residential_count 0.0149337581432 0.143198393553
shape: (3542, 1) min: 1.0 max: 16.0
Daycare Residential or Commercial_count 0.000634905976197 0.0254930781071
shape: (163, 1) min: 1.0 max: 2.0
Inlet Cleaning_count 0.0304677910274 0.208067584111
shape: (6716, 1) min: 1.0 max: 17.0
Rubbish/Recyclable Material Collection_count 0.248425240783 0.668856953731
shape: (4098, 1) min: 3.0 max: 31.0
Vacant House or Commercial_count 0.0766850981796 0.340647954989
shape: (2759, 1) min: 2.0 max: 25.0
Building Dangerous_count 0.036054963618 0.217324930479
shape: (8174, 1) min: 1.0 max: 7.0
Graffiti Removal_count 0.180990530281 0.63278681462
shape: (3559, 1) min: 3.0 max: 31.0
Dead Animal in Street_count 0.014872191503 0.13623715532
shape: (3445, 1) min: 1.0 max: 5.0
Tree Dangerous_count 0.00280897795529 0.0557578044906
shape: (694, 1) min: 1.0 max: 4.0
Smoke Detector_count 0.0306601867778 0.221599017343
shape: (6862, 1) min: 1.0 max: 14.0
In [56]:
from scipy import stats
In [72]:
philly_311_filtered_details[philly_311_filtered_details['Police Complaint_count']==0].shape
Out[72]:
(259265, 107)
In [70]:
min(stats.zscore(philly_311_filtered_details).T[3])
Out[70]:
-0.68992716405346677
In [74]:
min(stats.zscore(philly_311_filtered_details['monthly_311_request_count']))
Out[74]:
-0.68992716405346677
In [28]:
philly_311_filtered_details[philly_311_filtered_details.Month==0]
Out[28]:
SEG_ID
Month
SHAPE_LEN
monthly_311_request_count
monthly_311_request_count/len
Police Complaint_count
Police Complaint_count/len
Shoveling_count
Shoveling_count/len
No Heat (Residential)_count
...
Building Dangerous_count
Building Dangerous_count/len
Graffiti Removal_count
Graffiti Removal_count/len
Dead Animal in Street_count
Dead Animal in Street_count/len
Tree Dangerous_count
Tree Dangerous_count/len
Smoke Detector_count
Smoke Detector_count/len
0 rows × 107 columns
In [29]:
philly_311_filtered_details[philly_311_filtered_details.monthly_311_request_count==0]
Out[29]:
SEG_ID
Month
SHAPE_LEN
monthly_311_request_count
monthly_311_request_count/len
Police Complaint_count
Police Complaint_count/len
Shoveling_count
Shoveling_count/len
No Heat (Residential)_count
...
Building Dangerous_count
Building Dangerous_count/len
Graffiti Removal_count
Graffiti Removal_count/len
Dead Animal in Street_count
Dead Animal in Street_count/len
Tree Dangerous_count
Tree Dangerous_count/len
Smoke Detector_count
Smoke Detector_count/len
0 rows × 107 columns
In [37]:
philly_311_typemerge = philly_311_statistic.groupby(['SEG_ID', 'Service Name']).size().reset_index()
print(philly_311_typemerge.shape)
philly_311_typemerge.head().T
(284729, 3)
Out[37]:
0
1
2
3
4
SEG_ID
100006
100006
100006
100006
100007
Service Name
Abandoned Vehicle
Illegal Dumping
Maintenance Residential or Commercial
Vacant Lot Clean-Up
Illegal Dumping
0
1
6
2
1
2
In [38]:
philly_311_typemerge.columns = ['SEG_ID', 'Service Name', 'count']
In [44]:
philly_311_details = philly_311_statistic.groupby('SEG_ID').size().reset_index()
philly_311_details.columns = ['SEG_ID', 'philly_311_count']
print(philly_311_details.shape)
philly_311_details.head().T
(38930, 2)
Out[44]:
0
1
2
3
4
SEG_ID
100006.0
100007.0
100008.0
100009.0
100011.0
philly_311_count
10.0
3.0
13.0
4.0
6.0
In [45]:
for service in unique_services:
if(service is not None):
service_data = philly_311_typemerge[philly_311_typemerge['Service Name']==service]
service_count = service + '_count'
service_data_count = service_data[['SEG_ID', 'count']]
service_data_count.columns = ['SEG_ID', service_count]
philly_311_details = philly_311_details.merge(service_data_count, left_on = 'SEG_ID', right_on = 'SEG_ID', right_index=False, how='left')
In [46]:
print(philly_311_details.shape)
philly_311_details.head().T
(38930, 53)
Out[46]:
0
1
2
3
4
SEG_ID
100006.0
100007.0
100008.0
100009.0
100011.0
philly_311_count
10.0
3.0
13.0
4.0
6.0
Building Construction_count
NaN
NaN
NaN
NaN
NaN
Line Striping_count
NaN
NaN
NaN
NaN
NaN
Traffic (Other)_count
NaN
NaN
NaN
NaN
NaN
Zoning Business_count
NaN
NaN
NaN
NaN
NaN
Stop Sign Repair_count
NaN
NaN
NaN
NaN
NaN
Inlet Cleaning_count
NaN
NaN
NaN
NaN
NaN
Parks and Rec Safety and Maintenance_count
NaN
NaN
NaN
NaN
NaN
Abandoned Bike_count
NaN
NaN
NaN
NaN
NaN
Newsstand/Outdoor Café_count
NaN
NaN
NaN
NaN
NaN
Dangerous Sidewalk_count
NaN
NaN
NaN
NaN
NaN
Zoning Residential_count
NaN
NaN
NaN
NaN
NaN
Miscellaneous_count
NaN
NaN
NaN
NaN
NaN
Daycare Residential or Commercial_count
NaN
NaN
NaN
NaN
NaN
Street Trees_count
NaN
NaN
NaN
NaN
NaN
Hydrant Request_count
NaN
NaN
NaN
NaN
NaN
Graffiti Removal_count
NaN
NaN
NaN
NaN
NaN
Sanitation / Dumpster Violation_count
NaN
NaN
NaN
NaN
NaN
Manhole Cover_count
NaN
NaN
NaN
NaN
NaN
Maintenance Residential or Commercial_count
2.0
NaN
NaN
NaN
NaN
Illegal Dumping_count
6.0
2.0
5.0
1.0
6.0
Building Dangerous_count
NaN
NaN
NaN
NaN
NaN
Police Complaint_count
NaN
NaN
NaN
NaN
NaN
Fire Residential or Commercial_count
NaN
NaN
NaN
NaN
NaN
Abandoned Vehicle_count
1.0
NaN
NaN
1.0
NaN
Complaint (Streets)_count
NaN
NaN
NaN
NaN
NaN
Boarding Room House_count
NaN
NaN
NaN
NaN
NaN
Smoke Detector_count
NaN
NaN
NaN
NaN
NaN
Other Dangerous_count
NaN
NaN
NaN
NaN
NaN
Tree Dangerous_count
NaN
NaN
NaN
NaN
NaN
Vacant Lot Clean-Up_count
1.0
NaN
1.0
NaN
NaN
Information Request_count
NaN
NaN
NaN
NaN
NaN
Street Defect_count
NaN
NaN
NaN
2.0
NaN
Rubbish/Recyclable Material Collection_count
NaN
1.0
NaN
NaN
NaN
License Residential_count
NaN
NaN
NaN
NaN
NaN
Newsstand Outdoor Cafe_count
NaN
NaN
NaN
NaN
NaN
Complaints against Fire or EMS_count
NaN
NaN
NaN
NaN
NaN
No Heat Residential_count
NaN
NaN
NaN
NaN
NaN
Traffic Signal Emergency_count
NaN
NaN
NaN
NaN
NaN
Emergency Air Conditioning_count
NaN
NaN
NaN
NaN
NaN
Other (Streets)_count
NaN
NaN
NaN
NaN
NaN
Dead Animal in Street_count
NaN
NaN
NaN
NaN
NaN
Construction Site Task Force_count
NaN
NaN
NaN
NaN
NaN
Alley Light Outage_count
NaN
NaN
NaN
NaN
NaN
Shoveling_count
NaN
NaN
NaN
NaN
NaN
Hydrant Knocked Down (No Water)_count
NaN
NaN
NaN
NaN
NaN
Street Paving_count
NaN
NaN
NaN
NaN
NaN
Vacant House or Commercial_count
NaN
NaN
NaN
NaN
NaN
Salting_count
NaN
NaN
1.0
NaN
NaN
Street Light Outage_count
NaN
NaN
6.0
NaN
NaN
Infestation Residential_count
NaN
NaN
NaN
NaN
NaN
No Heat (Residential)_count
NaN
NaN
NaN
NaN
NaN
In [47]:
philly_311_details = philly_311_details.fillna(0)
In [48]:
print(philly_311_details.shape)
philly_311_details.head().T
(38930, 53)
Out[48]:
0
1
2
3
4
SEG_ID
100006.0
100007.0
100008.0
100009.0
100011.0
philly_311_count
10.0
3.0
13.0
4.0
6.0
Building Construction_count
0.0
0.0
0.0
0.0
0.0
Line Striping_count
0.0
0.0
0.0
0.0
0.0
Traffic (Other)_count
0.0
0.0
0.0
0.0
0.0
Zoning Business_count
0.0
0.0
0.0
0.0
0.0
Stop Sign Repair_count
0.0
0.0
0.0
0.0
0.0
Inlet Cleaning_count
0.0
0.0
0.0
0.0
0.0
Parks and Rec Safety and Maintenance_count
0.0
0.0
0.0
0.0
0.0
Abandoned Bike_count
0.0
0.0
0.0
0.0
0.0
Newsstand/Outdoor Café_count
0.0
0.0
0.0
0.0
0.0
Dangerous Sidewalk_count
0.0
0.0
0.0
0.0
0.0
Zoning Residential_count
0.0
0.0
0.0
0.0
0.0
Miscellaneous_count
0.0
0.0
0.0
0.0
0.0
Daycare Residential or Commercial_count
0.0
0.0
0.0
0.0
0.0
Street Trees_count
0.0
0.0
0.0
0.0
0.0
Hydrant Request_count
0.0
0.0
0.0
0.0
0.0
Graffiti Removal_count
0.0
0.0
0.0
0.0
0.0
Sanitation / Dumpster Violation_count
0.0
0.0
0.0
0.0
0.0
Manhole Cover_count
0.0
0.0
0.0
0.0
0.0
Maintenance Residential or Commercial_count
2.0
0.0
0.0
0.0
0.0
Illegal Dumping_count
6.0
2.0
5.0
1.0
6.0
Building Dangerous_count
0.0
0.0
0.0
0.0
0.0
Police Complaint_count
0.0
0.0
0.0
0.0
0.0
Fire Residential or Commercial_count
0.0
0.0
0.0
0.0
0.0
Abandoned Vehicle_count
1.0
0.0
0.0
1.0
0.0
Complaint (Streets)_count
0.0
0.0
0.0
0.0
0.0
Boarding Room House_count
0.0
0.0
0.0
0.0
0.0
Smoke Detector_count
0.0
0.0
0.0
0.0
0.0
Other Dangerous_count
0.0
0.0
0.0
0.0
0.0
Tree Dangerous_count
0.0
0.0
0.0
0.0
0.0
Vacant Lot Clean-Up_count
1.0
0.0
1.0
0.0
0.0
Information Request_count
0.0
0.0
0.0
0.0
0.0
Street Defect_count
0.0
0.0
0.0
2.0
0.0
Rubbish/Recyclable Material Collection_count
0.0
1.0
0.0
0.0
0.0
License Residential_count
0.0
0.0
0.0
0.0
0.0
Newsstand Outdoor Cafe_count
0.0
0.0
0.0
0.0
0.0
Complaints against Fire or EMS_count
0.0
0.0
0.0
0.0
0.0
No Heat Residential_count
0.0
0.0
0.0
0.0
0.0
Traffic Signal Emergency_count
0.0
0.0
0.0
0.0
0.0
Emergency Air Conditioning_count
0.0
0.0
0.0
0.0
0.0
Other (Streets)_count
0.0
0.0
0.0
0.0
0.0
Dead Animal in Street_count
0.0
0.0
0.0
0.0
0.0
Construction Site Task Force_count
0.0
0.0
0.0
0.0
0.0
Alley Light Outage_count
0.0
0.0
0.0
0.0
0.0
Shoveling_count
0.0
0.0
0.0
0.0
0.0
Hydrant Knocked Down (No Water)_count
0.0
0.0
0.0
0.0
0.0
Street Paving_count
0.0
0.0
0.0
0.0
0.0
Vacant House or Commercial_count
0.0
0.0
0.0
0.0
0.0
Salting_count
0.0
0.0
1.0
0.0
0.0
Street Light Outage_count
0.0
0.0
6.0
0.0
0.0
Infestation Residential_count
0.0
0.0
0.0
0.0
0.0
No Heat (Residential)_count
0.0
0.0
0.0
0.0
0.0
In [49]:
street_gpd[['SEG_ID', 'LENGTH']]
Out[49]:
SEG_ID
LENGTH
0
420708
449.863074
1
422065
540.083021
2
420702
446.104120
3
420732
447.261056
4
420718
148.216438
5
420696
319.500352
6
420694
124.069018
7
420524
94.837989
8
422066
1028.697065
9
420562
446.277300
10
420568
448.559252
11
422341
223.095781
12
422975
112.226608
13
420515
441.187740
14
420502
101.568770
15
420488
445.023635
16
420525
446.787387
17
420554
449.023627
18
420586
152.766841
19
420587
447.025403
20
420573
149.831052
21
420512
103.098997
22
420503
447.499575
23
420493
101.221702
24
420402
454.642058
25
420418
444.112916
26
420440
454.073330
27
420463
447.944548
28
420478
129.313148
29
421808
253.397888
...
...
...
40992
741907
885.992828
40993
741903
512.037165
40994
741915
678.697781
40995
741908
257.293050
40996
741913
1122.488488
40997
840535
273.010041
40998
840531
247.792254
40999
840533
343.635411
41000
840529
181.356970
41001
741910
498.241151
41002
742463
511.517819
41003
742126
270.046479
41004
840017
577.410515
41005
741970
225.056910
41006
742114
187.142636
41007
742110
246.997136
41008
741820
246.796251
41009
741730
289.560017
41010
741718
182.961166
41011
741720
171.259505
41012
500021
195.506310
41013
422980
282.918303
41014
422981
281.687421
41015
400719
280.132471
41016
400657
288.947004
41017
401184
252.648180
41018
400675
261.884400
41019
400938
579.004831
41020
400824
241.523846
41021
400683
349.594759
41022 rows × 2 columns
In [50]:
philly_311_details = philly_311_details.merge(street_gpd[['SEG_ID', 'LENGTH']], left_on = 'SEG_ID', right_on = 'SEG_ID', right_index=False, how='left')
In [51]:
print(philly_311_details.shape)
philly_311_details.head().T
(38930, 54)
Out[51]:
0
1
2
3
4
SEG_ID
100006.000000
100007.000000
100008.000000
100009.000000
100011.000000
philly_311_count
10.000000
3.000000
13.000000
4.000000
6.000000
Building Construction_count
0.000000
0.000000
0.000000
0.000000
0.000000
Line Striping_count
0.000000
0.000000
0.000000
0.000000
0.000000
Traffic (Other)_count
0.000000
0.000000
0.000000
0.000000
0.000000
Zoning Business_count
0.000000
0.000000
0.000000
0.000000
0.000000
Stop Sign Repair_count
0.000000
0.000000
0.000000
0.000000
0.000000
Inlet Cleaning_count
0.000000
0.000000
0.000000
0.000000
0.000000
Parks and Rec Safety and Maintenance_count
0.000000
0.000000
0.000000
0.000000
0.000000
Abandoned Bike_count
0.000000
0.000000
0.000000
0.000000
0.000000
Newsstand/Outdoor Café_count
0.000000
0.000000
0.000000
0.000000
0.000000
Dangerous Sidewalk_count
0.000000
0.000000
0.000000
0.000000
0.000000
Zoning Residential_count
0.000000
0.000000
0.000000
0.000000
0.000000
Miscellaneous_count
0.000000
0.000000
0.000000
0.000000
0.000000
Daycare Residential or Commercial_count
0.000000
0.000000
0.000000
0.000000
0.000000
Street Trees_count
0.000000
0.000000
0.000000
0.000000
0.000000
Hydrant Request_count
0.000000
0.000000
0.000000
0.000000
0.000000
Graffiti Removal_count
0.000000
0.000000
0.000000
0.000000
0.000000
Sanitation / Dumpster Violation_count
0.000000
0.000000
0.000000
0.000000
0.000000
Manhole Cover_count
0.000000
0.000000
0.000000
0.000000
0.000000
Maintenance Residential or Commercial_count
2.000000
0.000000
0.000000
0.000000
0.000000
Illegal Dumping_count
6.000000
2.000000
5.000000
1.000000
6.000000
Building Dangerous_count
0.000000
0.000000
0.000000
0.000000
0.000000
Police Complaint_count
0.000000
0.000000
0.000000
0.000000
0.000000
Fire Residential or Commercial_count
0.000000
0.000000
0.000000
0.000000
0.000000
Abandoned Vehicle_count
1.000000
0.000000
0.000000
1.000000
0.000000
Complaint (Streets)_count
0.000000
0.000000
0.000000
0.000000
0.000000
Boarding Room House_count
0.000000
0.000000
0.000000
0.000000
0.000000
Smoke Detector_count
0.000000
0.000000
0.000000
0.000000
0.000000
Other Dangerous_count
0.000000
0.000000
0.000000
0.000000
0.000000
Tree Dangerous_count
0.000000
0.000000
0.000000
0.000000
0.000000
Vacant Lot Clean-Up_count
1.000000
0.000000
1.000000
0.000000
0.000000
Information Request_count
0.000000
0.000000
0.000000
0.000000
0.000000
Street Defect_count
0.000000
0.000000
0.000000
2.000000
0.000000
Rubbish/Recyclable Material Collection_count
0.000000
1.000000
0.000000
0.000000
0.000000
License Residential_count
0.000000
0.000000
0.000000
0.000000
0.000000
Newsstand Outdoor Cafe_count
0.000000
0.000000
0.000000
0.000000
0.000000
Complaints against Fire or EMS_count
0.000000
0.000000
0.000000
0.000000
0.000000
No Heat Residential_count
0.000000
0.000000
0.000000
0.000000
0.000000
Traffic Signal Emergency_count
0.000000
0.000000
0.000000
0.000000
0.000000
Emergency Air Conditioning_count
0.000000
0.000000
0.000000
0.000000
0.000000
Other (Streets)_count
0.000000
0.000000
0.000000
0.000000
0.000000
Dead Animal in Street_count
0.000000
0.000000
0.000000
0.000000
0.000000
Construction Site Task Force_count
0.000000
0.000000
0.000000
0.000000
0.000000
Alley Light Outage_count
0.000000
0.000000
0.000000
0.000000
0.000000
Shoveling_count
0.000000
0.000000
0.000000
0.000000
0.000000
Hydrant Knocked Down (No Water)_count
0.000000
0.000000
0.000000
0.000000
0.000000
Street Paving_count
0.000000
0.000000
0.000000
0.000000
0.000000
Vacant House or Commercial_count
0.000000
0.000000
0.000000
0.000000
0.000000
Salting_count
0.000000
0.000000
1.000000
0.000000
0.000000
Street Light Outage_count
0.000000
0.000000
6.000000
0.000000
0.000000
Infestation Residential_count
0.000000
0.000000
0.000000
0.000000
0.000000
No Heat (Residential)_count
0.000000
0.000000
0.000000
0.000000
0.000000
LENGTH
735.818898
735.209046
728.727137
281.855704
571.366785
In [52]:
street_gpd[street_gpd.SEG_ID==100007]
Out[52]:
CLASS
FNODE_
LENGTH
LPOLY_
L_F_ADD
L_HUNDRED
L_T_ADD
MULTI_REP
NEWSEGDATE
ONEWAY
...
STREETLABE
ST_CODE
ST_NAME
ST_TYPE
SUF_DIR
TNODE_
UPDATE_
ZIP_LEFT
ZIP_RIGHT
geometry
12299
5
7085
735.209046
0
8500
8500
8598
0
None
B
...
HARLEY PL
40570
HARLEY
PL
None
7083
2006-04-07
19153
19153
LINESTRING (-75.24647412253933 39.892658786204...
1 rows × 31 columns
In [55]:
for service in unique_services:
if(service is not None):
service_count = service + '_count'
service_countlen = service + '_count/len'
philly_311_details[service_countlen] = philly_311_details[service_count]/philly_311_details.LENGTH
In [56]:
philly_311_details['philly_311_count/len'] = philly_311_details['philly_311_count']/philly_311_details.LENGTH
In [58]:
normalize('philly_311_count', philly_311_details)
normalize('philly_311_count/len', philly_311_details)
In [59]:
for service in unique_services:
if(service is not None):
service_count = service + '_count'
service_countlen = service + '_count/len'
normalize(service_count, philly_311_details)
normalize(service_countlen, philly_311_details)
In [60]:
print(philly_311_details.shape)
philly_311_details.head().T
(38930, 210)
Out[60]:
0
1
2
3
4
SEG_ID
100006.0
100007.0
100008.000000
100009.0
100011.0
philly_311_count
10.0
3.0
13.000000
4.0
6.0
Building Construction_count
0.0
0.0
0.000000
0.0
0.0
Line Striping_count
0.0
0.0
0.000000
0.0
0.0
Traffic (Other)_count
0.0
0.0
0.000000
0.0
0.0
Zoning Business_count
0.0
0.0
0.000000
0.0
0.0
Stop Sign Repair_count
0.0
0.0
0.000000
0.0
0.0
Inlet Cleaning_count
0.0
0.0
0.000000
0.0
0.0
Parks and Rec Safety and Maintenance_count
0.0
0.0
0.000000
0.0
0.0
Abandoned Bike_count
0.0
0.0
0.000000
0.0
0.0
Newsstand/Outdoor Café_count
0.0
0.0
0.000000
0.0
0.0
Dangerous Sidewalk_count
0.0
0.0
0.000000
0.0
0.0
Zoning Residential_count
0.0
0.0
0.000000
0.0
0.0
Miscellaneous_count
0.0
0.0
0.000000
0.0
0.0
Daycare Residential or Commercial_count
0.0
0.0
0.000000
0.0
0.0
Street Trees_count
0.0
0.0
0.000000
0.0
0.0
Hydrant Request_count
0.0
0.0
0.000000
0.0
0.0
Graffiti Removal_count
0.0
0.0
0.000000
0.0
0.0
Sanitation / Dumpster Violation_count
0.0
0.0
0.000000
0.0
0.0
Manhole Cover_count
0.0
0.0
0.000000
0.0
0.0
Maintenance Residential or Commercial_count
2.0
0.0
0.000000
0.0
0.0
Illegal Dumping_count
6.0
2.0
5.000000
1.0
6.0
Building Dangerous_count
0.0
0.0
0.000000
0.0
0.0
Police Complaint_count
0.0
0.0
0.000000
0.0
0.0
Fire Residential or Commercial_count
0.0
0.0
0.000000
0.0
0.0
Abandoned Vehicle_count
1.0
0.0
0.000000
1.0
0.0
Complaint (Streets)_count
0.0
0.0
0.000000
0.0
0.0
Boarding Room House_count
0.0
0.0
0.000000
0.0
0.0
Smoke Detector_count
0.0
0.0
0.000000
0.0
0.0
Other Dangerous_count
0.0
0.0
0.000000
0.0
0.0
...
...
...
...
...
...
No Heat Residential_count_norm
0.0
0.0
0.000000
0.0
0.0
No Heat Residential_count/len_norm
0.0
0.0
0.000000
0.0
0.0
Traffic Signal Emergency_count_norm
0.0
0.0
0.000000
0.0
0.0
Traffic Signal Emergency_count/len_norm
0.0
0.0
0.000000
0.0
0.0
Emergency Air Conditioning_count_norm
0.0
0.0
0.000000
0.0
0.0
Emergency Air Conditioning_count/len_norm
0.0
0.0
0.000000
0.0
0.0
Other (Streets)_count_norm
0.0
0.0
0.000000
0.0
0.0
Other (Streets)_count/len_norm
0.0
0.0
0.000000
0.0
0.0
Dead Animal in Street_count_norm
0.0
0.0
0.000000
0.0
0.0
Dead Animal in Street_count/len_norm
0.0
0.0
0.000000
0.0
0.0
Construction Site Task Force_count_norm
0.0
0.0
0.000000
0.0
0.0
Construction Site Task Force_count/len_norm
0.0
0.0
0.000000
0.0
0.0
Alley Light Outage_count_norm
0.0
0.0
0.000000
0.0
0.0
Alley Light Outage_count/len_norm
0.0
0.0
0.000000
0.0
0.0
Shoveling_count_norm
0.0
0.0
0.000000
0.0
0.0
Shoveling_count/len_norm
0.0
0.0
0.000000
0.0
0.0
Hydrant Knocked Down (No Water)_count_norm
0.0
0.0
0.000000
0.0
0.0
Hydrant Knocked Down (No Water)_count/len_norm
0.0
0.0
0.000000
0.0
0.0
Street Paving_count_norm
0.0
0.0
0.000000
0.0
0.0
Street Paving_count/len_norm
0.0
0.0
0.000000
0.0
0.0
Vacant House or Commercial_count_norm
0.0
0.0
0.000000
0.0
0.0
Vacant House or Commercial_count/len_norm
0.0
0.0
0.000000
0.0
0.0
Salting_count_norm
0.0
0.0
0.186652
0.0
0.0
Salting_count/len_norm
0.0
0.0
0.004128
0.0
0.0
Street Light Outage_count_norm
0.0
0.0
0.527507
0.0
0.0
Street Light Outage_count/len_norm
0.0
0.0
0.008452
0.0
0.0
Infestation Residential_count_norm
0.0
0.0
0.000000
0.0
0.0
Infestation Residential_count/len_norm
0.0
0.0
0.000000
0.0
0.0
No Heat (Residential)_count_norm
0.0
0.0
0.000000
0.0
0.0
No Heat (Residential)_count/len_norm
0.0
0.0
0.000000
0.0
0.0
210 rows × 5 columns
In [61]:
philly_311_details.to_csv('../philly/Philly_311_Details.csv')
In [24]:
philly_311_filtered_details.to_csv('../philly/Philly_311_Details_2015-16.csv')
In [75]:
for column in philly_311_filtered_details.columns:
if('_count' in column):
col_norm = column + '_norm'
philly_311_filtered_details[col_norm] = stats.zscore(philly_311_filtered_details[column])
In [76]:
print(philly_311_filtered_details.shape)
philly_311_filtered_details.head().T
(259881, 211)
Out[76]:
0
1
2
3
4
SEG_ID
100006.000000
100006.000000
100006.000000
100006.000000
100006.000000
Month
2.000000
3.000000
4.000000
5.000000
6.000000
SHAPE_LEN
735.818883
735.818883
735.818883
735.818883
735.818883
monthly_311_request_count
1.000000
3.000000
1.000000
1.000000
1.000000
monthly_311_request_count/len
0.001359
0.004077
0.001359
0.001359
0.001359
Police Complaint_count
0.000000
0.000000
0.000000
0.000000
0.000000
Police Complaint_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Shoveling_count
0.000000
0.000000
0.000000
0.000000
0.000000
Shoveling_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
No Heat (Residential)_count
0.000000
0.000000
0.000000
0.000000
0.000000
No Heat (Residential)_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Street Trees_count
0.000000
0.000000
0.000000
0.000000
0.000000
Street Trees_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Stop Sign Repair_count
0.000000
0.000000
0.000000
0.000000
0.000000
Stop Sign Repair_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Infestation Residential_count
0.000000
0.000000
0.000000
0.000000
0.000000
Infestation Residential_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Street Paving_count
0.000000
0.000000
0.000000
0.000000
0.000000
Street Paving_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Traffic (Other)_count
0.000000
0.000000
0.000000
0.000000
0.000000
Traffic (Other)_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Manhole Cover_count
0.000000
0.000000
0.000000
0.000000
0.000000
Manhole Cover_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Line Striping_count
0.000000
0.000000
0.000000
0.000000
0.000000
Line Striping_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Street Defect_count
0.000000
0.000000
0.000000
0.000000
0.000000
Street Defect_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Illegal Dumping_count
1.000000
3.000000
1.000000
0.000000
0.000000
Illegal Dumping_count/len
0.001359
0.004077
0.001359
0.000000
0.000000
Vacant Lot Clean-Up_count
0.000000
0.000000
0.000000
0.000000
1.000000
...
...
...
...
...
...
Sanitation / Dumpster Violation_count_norm
-0.186489
-0.186489
-0.186489
-0.186489
-0.186489
Sanitation / Dumpster Violation_count/len_norm
-0.145794
-0.145794
-0.145794
-0.145794
-0.145794
Parks and Rec Safety and Maintenance_count_norm
-0.113185
-0.113185
-0.113185
-0.113185
-0.113185
Parks and Rec Safety and Maintenance_count/len_norm
-0.077007
-0.077007
-0.077007
-0.077007
-0.077007
Boarding Room House_count_norm
-0.080454
-0.080454
-0.080454
-0.080454
-0.080454
Boarding Room House_count/len_norm
-0.060947
-0.060947
-0.060947
-0.060947
-0.060947
Maintenance Residential or Commercial_count_norm
-0.416204
-0.416204
-0.416204
1.037431
-0.416204
Maintenance Residential or Commercial_count/len_norm
-0.330482
-0.330482
-0.330482
0.122176
-0.330482
Abandoned Bike_count_norm
-0.046630
-0.046630
-0.046630
-0.046630
-0.046630
Abandoned Bike_count/len_norm
-0.032966
-0.032966
-0.032966
-0.032966
-0.032966
License Residential_count_norm
-0.104287
-0.104287
-0.104287
-0.104287
-0.104287
License Residential_count/len_norm
-0.086630
-0.086630
-0.086630
-0.086630
-0.086630
Daycare Residential or Commercial_count_norm
-0.024905
-0.024905
-0.024905
-0.024905
-0.024905
Daycare Residential or Commercial_count/len_norm
-0.021147
-0.021147
-0.021147
-0.021147
-0.021147
Inlet Cleaning_count_norm
-0.146432
-0.146432
-0.146432
-0.146432
-0.146432
Inlet Cleaning_count/len_norm
-0.109299
-0.109299
-0.109299
-0.109299
-0.109299
Rubbish/Recyclable Material Collection_count_norm
-0.371418
-0.371418
-0.371418
-0.371418
-0.371418
Rubbish/Recyclable Material Collection_count/len_norm
-0.283251
-0.283251
-0.283251
-0.283251
-0.283251
Vacant House or Commercial_count_norm
-0.225116
-0.225116
-0.225116
-0.225116
-0.225116
Vacant House or Commercial_count/len_norm
-0.181217
-0.181217
-0.181217
-0.181217
-0.181217
Building Dangerous_count_norm
-0.165904
-0.165904
-0.165904
-0.165904
-0.165904
Building Dangerous_count/len_norm
-0.127278
-0.127278
-0.127278
-0.127278
-0.127278
Graffiti Removal_count_norm
-0.286022
-0.286022
-0.286022
-0.286022
-0.286022
Graffiti Removal_count/len_norm
-0.212658
-0.212658
-0.212658
-0.212658
-0.212658
Dead Animal in Street_count_norm
-0.109164
-0.109164
-0.109164
-0.109164
-0.109164
Dead Animal in Street_count/len_norm
-0.080124
-0.080124
-0.080124
-0.080124
-0.080124
Tree Dangerous_count_norm
-0.050378
-0.050378
-0.050378
-0.050378
-0.050378
Tree Dangerous_count/len_norm
-0.041106
-0.041106
-0.041106
-0.041106
-0.041106
Smoke Detector_count_norm
-0.138359
-0.138359
-0.138359
-0.138359
-0.138359
Smoke Detector_count/len_norm
-0.119088
-0.119088
-0.119088
-0.119088
-0.119088
211 rows × 5 columns
In [78]:
for column in philly_311_filtered_details.columns:
if('_norm' in column):
print(column, min(philly_311_filtered_details[column]), max(philly_311_filtered_details[column]))
monthly_311_request_count_norm -0.689927164053 35.2187561537
monthly_311_request_count/len_norm -0.779613092677 42.3705330788
Police Complaint_count_norm -0.0477097000725 58.0718741962
Police Complaint_count/len_norm -0.0365367943396 139.545227568
Shoveling_count_norm -0.0893250136533 134.87505797
Shoveling_count/len_norm -0.0729810036323 120.600319623
No Heat (Residential)_count_norm -0.106794439167 79.5208273908
No Heat (Residential)_count/len_norm -0.0850329274997 100.795963715
Street Trees_count_norm -0.175914961348 38.8817151711
Street Trees_count/len_norm -0.142219561423 63.4074541247
Stop Sign Repair_count_norm -0.11356609397 48.2783141396
Stop Sign Repair_count/len_norm -0.077681619574 253.774281143
Infestation Residential_count_norm -0.0909928691569 57.7971780718
Infestation Residential_count/len_norm -0.0708922696913 189.874606044
Street Paving_count_norm -0.0851981600099 73.321790722
Street Paving_count/len_norm -0.066479650637 150.539561166
Traffic (Other)_count_norm -0.150882984698 27.7975282387
Traffic (Other)_count/len_norm -0.0912494097477 290.894189486
Manhole Cover_count_norm -0.0849081388838 47.491659436
Manhole Cover_count/len_norm -0.0681539855817 103.206987297
Line Striping_count_norm -0.0725485535954 56.8881482951
Line Striping_count/len_norm -0.0561436307958 91.1827138265
Street Defect_count_norm -0.315181244728 32.0454520884
Street Defect_count/len_norm -0.223820981313 99.1461195226
Illegal Dumping_count_norm -0.354090169818 54.572232386
Illegal Dumping_count/len_norm -0.268451477023 60.3373109526
Vacant Lot Clean-Up_count_norm -0.206867304984 49.3553840589
Vacant Lot Clean-Up_count/len_norm -0.170259413745 82.6539386603
Other Dangerous_count_norm -0.0515368758278 91.0604007228
Other Dangerous_count/len_norm -0.0410178757012 207.438566183
Street Light Outage_count_norm -0.261839860143 45.5611936038
Street Light Outage_count/len_norm -0.1709886269 155.18371743
Other (Streets)_count_norm -0.179614147068 41.3696626197
Other (Streets)_count/len_norm -0.134652026566 75.9071345802
No Heat Residential_count_norm -0.00496258627467 322.415508326
No Heat Residential_count/len_norm -0.00442989470239 339.493509286
Newsstand/Outdoor Café_count_norm -0.00650606307501 153.702782846
Newsstand/Outdoor Café_count/len_norm -0.00406602418767 309.999280543
Dangerous Sidewalk_count_norm -0.126866238014 37.5604549295
Dangerous Sidewalk_count/len_norm -0.0966154770161 62.9263730743
Zoning Residential_count_norm -0.109564212862 52.7452754633
Zoning Residential_count/len_norm -0.088698798232 68.6964349438
Information Request_count_norm -0.145997387113 107.938400289
Information Request_count/len_norm -0.119044302357 101.856299531
Miscellaneous_count_norm -0.12812367813 47.9453787167
Miscellaneous_count/len_norm -0.0974914036087 153.389412489
Abandoned Vehicle_count_norm -0.32605897757 41.2903972682
Abandoned Vehicle_count/len_norm -0.244258676943 118.163122367
Hydrant Request_count_norm -0.0696770626552 70.7823145748
Hydrant Request_count/len_norm -0.0577214944687 76.1783915521
Building Construction_count_norm -0.153778259149 63.2141621731
Building Construction_count/len_norm -0.108211905281 218.007219637
Complaint (Streets)_count_norm -0.134843839108 33.3866328697
Complaint (Streets)_count/len_norm -0.101644524342 75.9938876334
Newsstand Outdoor Cafe_count_norm -0.0265053394365 84.3192185555
Newsstand Outdoor Cafe_count/len_norm -0.0208176928129 159.427640962
Alley Light Outage_count_norm -0.117286367398 46.2559996837
Alley Light Outage_count/len_norm -0.0854695099406 181.317233055
Zoning Business_count_norm -0.137002547933 34.6328794437
Zoning Business_count/len_norm -0.105893773368 133.619629989
Traffic Signal Emergency_count_norm -0.184155020962 54.8571227279
Traffic Signal Emergency_count/len_norm -0.131789112463 99.4042726687
Hydrant Knocked Down (No Water)_count_norm -0.0187201776659 104.605333414
Hydrant Knocked Down (No Water)_count/len_norm -0.015961216061 172.197705606
Salting_count_norm -0.155280773061 53.0378912543
Salting_count/len_norm -0.129431112944 117.384971593
Complaints against Fire or EMS_count_norm -0.0277971687686 145.031560597
Complaints against Fire or EMS_count/len_norm -0.022241283162 236.882622388
Fire Residential or Commercial_count_norm -0.11785848615 45.4612802627
Fire Residential or Commercial_count/len_norm -0.0908852685176 95.3223871801
Emergency Air Conditioning_count_norm -0.00277414657882 360.471219378
Emergency Air Conditioning_count/len_norm -0.00275720150833 398.050783935
Construction Site Task Force_count_norm -0.182535865391 35.4372229452
Construction Site Task Force_count/len_norm -0.139693250622 88.8054084638
Sanitation / Dumpster Violation_count_norm -0.186489298853 49.7534284127
Sanitation / Dumpster Violation_count/len_norm -0.145794420518 73.9825456726
Parks and Rec Safety and Maintenance_count_norm -0.11318547187 112.738353431
Parks and Rec Safety and Maintenance_count/len_norm -0.0770067773422 186.964623433
Boarding Room House_count_norm -0.0804542035772 43.6841065078
Boarding Room House_count/len_norm -0.0609471522109 159.553224
Maintenance Residential or Commercial_count_norm -0.416204133738 27.2028659131
Maintenance Residential or Commercial_count/len_norm -0.33048177417 76.9865384404
Abandoned Bike_count_norm -0.0466297343833 68.6116252304
Abandoned Bike_count/len_norm -0.0329661655272 218.308354334
License Residential_count_norm -0.104287391112 111.629024599
License Residential_count/len_norm -0.0866297439132 103.895879792
Daycare Residential or Commercial_count_norm -0.024905081884 78.4279141313
Daycare Residential or Commercial_count/len_norm -0.0211468545545 121.085527003
Inlet Cleaning_count_norm -0.146432467011 81.5579463291
Inlet Cleaning_count/len_norm -0.109299054031 142.655896164
Rubbish/Recyclable Material Collection_count_norm -0.371418309039 45.9763986193
Rubbish/Recyclable Material Collection_count/len_norm -0.28325084876 79.8759066076
Vacant House or Commercial_count_norm -0.225115825872 73.1645750058
Vacant House or Commercial_count/len_norm -0.181217234627 59.3781243153
Building Dangerous_count_norm -0.165903805454 32.0439924652
Building Dangerous_count/len_norm -0.127278239611 88.6309655539
Graffiti Removal_count_norm -0.286021886549 48.7037151414
Graffiti Removal_count/len_norm -0.212657672673 139.783174099
Dead Animal in Street_count_norm -0.109164200336 36.5916139982
Dead Animal in Street_count/len_norm -0.080123656397 134.208952019
Tree Dangerous_count_norm -0.0503782992411 71.6885958656
Tree Dangerous_count/len_norm -0.0411060326281 157.072526058
Smoke Detector_count_norm -0.138359123314 63.0389378853
Smoke Detector_count/len_norm -0.119087816845 74.5557075665
In [82]:
for column in philly_311_filtered_details.columns:
if('_count' in column):
zscore_nonzero = stats.zscore(philly_311_filtered_details[philly_311_filtered_details[column]>0][column])
print(column, min(zscore_nonzero), max(zscore_nonzero))
monthly_311_request_count -0.689927164053 35.2187561537
monthly_311_request_count/len -0.779613092677 42.3705330788
Police Complaint_count -0.179364382667 9.02800726089
Police Complaint_count/len -1.05513089828 9.13275478528
Shoveling_count -0.371975372009 25.4042166612
Shoveling_count/len -0.947297113637 16.5405448416
No Heat (Residential)_count -0.256128713431 21.4294356904
No Heat (Residential)_count/len -1.00273225035 16.0074091349
Street Trees_count -0.334397270157 14.6657821168
Street Trees_count/len -1.02481487056 16.6877814458
Stop Sign Repair_count -0.379115313581 10.9766498614
Stop Sign Repair_count/len -0.723466532879 40.090368779
Infestation Residential_count -0.245304232852 17.0849855511
Infestation Residential_count/len -1.04400604562 25.89997719
Street Paving_count -0.238419975602 15.3765535338
Street Paving_count/len -1.0259313961 18.910073485
Traffic (Other)_count -0.276760076888 11.0019855126
Traffic (Other)_count/len -0.690869864194 55.0411816463
Manhole Cover_count -0.271863079292 9.89872890493
Manhole Cover_count/len -1.11155146176 13.0065018324
Line Striping_count -0.264466033854 10.7519122039
Line Striping_count/len -1.0613128437 9.16172565428
Street Defect_count -0.432997403288 18.2805347201
Street Defect_count/len -0.726030067635 43.5929918352
Illegal Dumping_count -0.398101741377 34.5748809805
Illegal Dumping_count/len -0.776248027367 30.4305339284
Vacant Lot Clean-Up_count -0.412918779025 19.4017600127
Vacant Lot Clean-Up_count/len -0.891611218403 26.6041948007
Other Dangerous_count -0.223313890363 10.9702948641
Other Dangerous_count/len -0.98274733096 15.9663227625
Street Light Outage_count -0.352689121573 24.9168052799
Street Light Outage_count/len -0.704647966384 53.6969878016
Other (Streets)_count -0.274582569867 20.2156158205
Other (Streets)_count/len -0.992249210352 19.1018537383
No Heat Residential_count -0.408248290464 2.44948974278
No Heat Residential_count/len -0.819936625974 1.74353061612
Newsstand/Outdoor Café_count nan nan
Newsstand/Outdoor Café_count/len -0.729879347468 1.78296677
Dangerous Sidewalk_count -0.3032142567 11.8540102795
Dangerous Sidewalk_count/len -0.991539890586 11.0403561095
Zoning Residential_count -0.251045201764 15.7200643111
Zoning Residential_count/len -1.11798256102 11.105809008
Information Request_count -0.236449988055 32.9358971661
Information Request_count/len -0.950794736914 22.9816690753
Miscellaneous_count -0.247133999506 14.4647572301
Miscellaneous_count/len -0.95692242021 28.660944895
Abandoned Vehicle_count -0.409676114241 25.6544813954
Abandoned Vehicle_count/len -0.827792727306 54.4390512642
Hydrant Request_count -0.299257859911 10.6017715351
Hydrant Request_count/len -1.07031251783 7.70719398418
Building Construction_count -0.282414524025 26.9470828679
Building Construction_count/len -0.862290444537 46.0759108452
Complaint (Streets)_count -0.205375661542 14.4749062826
Complaint (Streets)_count/len -1.03998870486 14.2763268942
Newsstand Outdoor Cafe_count -0.367607311047 4.04368042152
Newsstand Outdoor Cafe_count/len -0.91822278622 5.57820033749
Alley Light Outage_count -0.298934065533 13.2750105414
Alley Light Outage_count/len -0.873459337144 29.9352109464
Zoning Business_count -0.302450787885 11.6120123303
Zoning Business_count/len -1.01069306406 26.9943233391
Traffic Signal Emergency_count -0.437469985045 18.5805763291
Traffic Signal Emergency_count/len -0.600541765821 28.324005926
Hydrant Knocked Down (No Water)_count -0.104828483672 9.53939201417
Hydrant Knocked Down (No Water)_count/len -1.45970240845 4.51535486109
Salting_count -0.532956843312 15.2073895426
Salting_count/len -0.686870665176 31.0881950086
Complaints against Fire or EMS_count -0.185873463243 9.35563098321
Complaints against Fire or EMS_count/len -0.917562830112 9.48061098326
Fire Residential or Commercial_count -0.216209435292 17.8202092457
Fire Residential or Commercial_count/len -1.0457518013 16.197695002
Emergency Air Conditioning_count nan nan
Emergency Air Conditioning_count/len -1.0 1.0
Construction Site Task Force_count -0.373781193938 14.7741935078
Construction Site Task Force_count/len -0.965572127472 23.3809868874
Sanitation / Dumpster Violation_count -0.374345595647 19.2920585405
Sanitation / Dumpster Violation_count/len -0.908651302935 20.2589699908
Parks and Rec Safety and Maintenance_count -0.345077491452 28.3344195481
Parks and Rec Safety and Maintenance_count/len -0.731552868476 29.0592926647
Boarding Room House_count -0.228885136308 10.2047856111
Boarding Room House_count/len -0.993298650287 18.35997407
Maintenance Residential or Commercial_count -0.420877888292 19.9775075185
Maintenance Residential or Commercial_count/len -0.911582610524 45.2015057225
Abandoned Bike_count -0.277158720172 7.27739610966
Abandoned Bike_count/len -0.816515301425 13.579640126
License Residential_count -0.16910742886 26.3343665987
License Residential_count/len -1.0108615941 16.861869389
Daycare Residential or Commercial_count -0.111455642515 8.97217922246
Daycare Residential or Commercial_count/len -1.37772740098 4.08206748073
Inlet Cleaning_count -0.315818090142 27.9175834858
Inlet Cleaning_count/len -0.89260234252 30.0384093149
Rubbish/Recyclable Material Collection_count -0.470508779765 31.5674575012
Rubbish/Recyclable Material Collection_count/len -0.84188335565 41.5775715908
Vacant House or Commercial_count -0.351810043066 35.8385251457
Vacant House or Commercial_count/len -0.920862200213 19.8824217263
Building Dangerous_count -0.305827466116 12.2351443133
Building Dangerous_count/len -0.957757149328 21.2221127408
Graffiti Removal_count -0.464467226767 25.6602174155
Graffiti Removal_count/len -0.750524036235 58.349466663
Dead Animal in Street_count -0.3065758768 9.75203265249
Dead Animal in Street_count/len -0.930120253613 20.4342357966
Tree Dangerous_count -0.21052991573 11.9651168773
Tree Dangerous_count/len -1.21262970204 12.057104392
Smoke Detector_count -0.217920361094 17.3587557074
Smoke Detector_count/len -0.984220940936 16.5067592569
monthly_311_request_count_norm -0.686662202655 30.1718206539
monthly_311_request_count/len_norm -0.634281887245 29.6022629217
Police Complaint_count_norm -0.179364382667 9.02800726089
Police Complaint_count/len_norm -1.05513089828 9.13275478528
Shoveling_count_norm -0.371975372009 25.4042166612
Shoveling_count/len_norm -0.947297113637 16.5405448416
No Heat (Residential)_count_norm -0.256128713431 21.4294356904
No Heat (Residential)_count/len_norm -1.00273225035 16.0074091349
Street Trees_count_norm -0.334397270157 14.6657821168
Street Trees_count/len_norm -1.00843309163 16.6875892758
Stop Sign Repair_count_norm -0.379115313581 10.9766498614
Stop Sign Repair_count/len_norm -0.723466532879 40.090368779
Infestation Residential_count_norm -0.245304232852 17.0849855511
Infestation Residential_count/len_norm -1.04400604562 25.89997719
Street Paving_count_norm -0.238419975602 15.3765535338
Street Paving_count/len_norm -1.0259313961 18.910073485
Traffic (Other)_count_norm -0.276760076888 11.0019855126
Traffic (Other)_count/len_norm -0.690869864194 55.0411816463
Manhole Cover_count_norm -0.271863079292 9.89872890493
Manhole Cover_count/len_norm -1.11155146176 13.0065018324
Line Striping_count_norm -0.264466033854 10.7519122039
Line Striping_count/len_norm -1.0613128437 9.16172565428
Street Defect_count_norm -0.432997403288 18.2805347201
Street Defect_count/len_norm -0.645008523538 43.4142380396
Illegal Dumping_count_norm -0.398101741377 34.5748809805
Illegal Dumping_count/len_norm -0.658577887584 30.2839920611
Vacant Lot Clean-Up_count_norm -0.412918779025 19.4017600127
Vacant Lot Clean-Up_count/len_norm -0.854276488857 26.5991094837
Other Dangerous_count_norm -0.223313890363 10.9702948641
Other Dangerous_count/len_norm -0.98274733096 15.9663227625
Street Light Outage_count_norm -0.352689121573 24.9168052799
Street Light Outage_count/len_norm -0.660529961606 53.6557444596
Other (Streets)_count_norm -0.274582569867 20.2156158205
Other (Streets)_count/len_norm -0.971685757172 19.1011592028
No Heat Residential_count_norm -0.408248290464 2.44948974278
No Heat Residential_count/len_norm -0.819936625974 1.74353061612
Newsstand/Outdoor Café_count_norm 1.0 1.0
Newsstand/Outdoor Café_count/len_norm -0.729879347468 1.78296677
Dangerous Sidewalk_count_norm -0.3032142567 11.8540102795
Dangerous Sidewalk_count/len_norm -0.991539890586 11.0403561095
Zoning Residential_count_norm -0.251045201764 15.7200643111
Zoning Residential_count/len_norm -1.11798256102 11.105809008
Information Request_count_norm -0.236449988055 32.9358971661
Information Request_count/len_norm -0.940997423865 22.9808218047
Miscellaneous_count_norm -0.247133999506 14.4647572301
Miscellaneous_count/len_norm -0.95692242021 28.660944895
Abandoned Vehicle_count_norm -0.409676114241 25.6544813954
Abandoned Vehicle_count/len_norm -0.739536955282 54.4009752643
Hydrant Request_count_norm -0.299257859911 10.6017715351
Hydrant Request_count/len_norm -1.07031251783 7.70719398418
Building Construction_count_norm -0.282414524025 26.9470828679
Building Construction_count/len_norm -0.862290444537 46.0759108452
Complaint (Streets)_count_norm -0.205375661542 14.4749062826
Complaint (Streets)_count/len_norm -1.03998870486 14.2763268942
Newsstand Outdoor Cafe_count_norm -0.367607311047 4.04368042152
Newsstand Outdoor Cafe_count/len_norm -0.91822278622 5.57820033749
Alley Light Outage_count_norm -0.298934065533 13.2750105414
Alley Light Outage_count/len_norm -0.873459337144 29.9352109464
Zoning Business_count_norm -0.302450787885 11.6120123303
Zoning Business_count/len_norm -1.01069306406 26.9943233391
Traffic Signal Emergency_count_norm -0.437469985045 18.5805763291
Traffic Signal Emergency_count/len_norm -0.576748568145 28.2686126944
Hydrant Knocked Down (No Water)_count_norm -0.104828483672 9.53939201417
Hydrant Knocked Down (No Water)_count/len_norm -1.45970240845 4.51535486109
Salting_count_norm -0.532956843312 15.2073895426
Salting_count/len_norm -0.668367788821 31.0818987293
Complaints against Fire or EMS_count_norm -0.185873463243 9.35563098321
Complaints against Fire or EMS_count/len_norm -0.917562830112 9.48061098326
Fire Residential or Commercial_count_norm -0.216209435292 17.8202092457
Fire Residential or Commercial_count/len_norm -1.0457518013 16.197695002
Emergency Air Conditioning_count_norm nan nan
Emergency Air Conditioning_count/len_norm -1.0 1.0
Construction Site Task Force_count_norm -0.373781193938 14.7741935078
Construction Site Task Force_count/len_norm -0.942947850427 23.3802307412
Sanitation / Dumpster Violation_count_norm -0.374345595647 19.2920585405
Sanitation / Dumpster Violation_count/len_norm -0.885551883495 20.2587373274
Parks and Rec Safety and Maintenance_count_norm -0.345077491452 28.3344195481
Parks and Rec Safety and Maintenance_count/len_norm -0.731552868476 29.0592926647
Boarding Room House_count_norm -0.228885136308 10.2047856111
Boarding Room House_count/len_norm -0.993298650287 18.35997407
Maintenance Residential or Commercial_count_norm -0.420877888292 19.9775075185
Maintenance Residential or Commercial_count/len_norm -0.755452575262 45.0891170217
Abandoned Bike_count_norm -0.277158720172 7.27739610966
Abandoned Bike_count/len_norm -0.816515301425 13.579640126
License Residential_count_norm -0.16910742886 26.3343665987
License Residential_count/len_norm -1.0108615941 16.861869389
Daycare Residential or Commercial_count_norm -0.111455642515 8.97217922246
Daycare Residential or Commercial_count/len_norm -1.37772740098 4.08206748073
Inlet Cleaning_count_norm -0.315818090142 27.9175834858
Inlet Cleaning_count/len_norm -0.89260234252 30.0384093149
Rubbish/Recyclable Material Collection_count_norm -0.470508779765 31.5674575012
Rubbish/Recyclable Material Collection_count/len_norm -0.725772449361 41.4797479607
Vacant House or Commercial_count_norm -0.351810043066 35.8385251457
Vacant House or Commercial_count/len_norm -0.920862200213 19.8824217263
Building Dangerous_count_norm -0.305827466116 12.2351443133
Building Dangerous_count/len_norm -0.957757149328 21.2221127408
Graffiti Removal_count_norm -0.464467226767 25.6602174155
Graffiti Removal_count/len_norm -0.675614755487 58.2044814528
Dead Animal in Street_count_norm -0.3065758768 9.75203265249
Dead Animal in Street_count/len_norm -0.930120253613 20.4342357966
Tree Dangerous_count_norm -0.21052991573 11.9651168773
Tree Dangerous_count/len_norm -1.21262970204 12.057104392
Smoke Detector_count_norm -0.217920361094 17.3587557074
Smoke Detector_count/len_norm -0.984220940936 16.5067592569
In [85]:
stats.zscore(philly_311_filtered_details[philly_311_filtered_details['Police Complaint_count']>0]['Police Complaint_count'])
Out[85]:
array([-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, 4.42432144, 4.42432144,
-0.17936438, 4.42432144, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
4.42432144, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
4.42432144, -0.17936438, 4.42432144, -0.17936438, 4.42432144,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, 4.42432144, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, 4.42432144, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, 9.02800726, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, 9.02800726, 9.02800726, 4.42432144, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, 4.42432144, 4.42432144,
4.42432144, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, 4.42432144, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, 4.42432144, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, 4.42432144, 4.42432144,
4.42432144, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
-0.17936438])
In [86]:
stats.zscore(philly_311_filtered_details['Police Complaint_count'])
Out[86]:
array([-0.0477097, -0.0477097, -0.0477097, ..., -0.0477097, -0.0477097,
-0.0477097])
In [88]:
philly_311_filtered_details[philly_311_filtered_details['Police Complaint_count']>0]['Police Complaint_count'].shape
Out[88]:
(616,)
In [89]:
philly_311_filtered_details[philly_311_filtered_details['Police Complaint_count']==0]['Police Complaint_count'].shape
Out[89]:
(259265,)
In [91]:
for column in philly_311_filtered_details.columns:
if('_count' in column and 'len' not in column):
print(column, philly_311_filtered_details[philly_311_filtered_details[column]>0][column].shape[0], philly_311_filtered_details[philly_311_filtered_details[column]==0][column].shape[0])
monthly_311_request_count 259881 0
Police Complaint_count 616 259265
Shoveling_count 2803 257078
No Heat (Residential)_count 3429 256452
Street Trees_count 9749 250132
Stop Sign Repair_count 4332 255549
Infestation Residential_count 2289 257592
Street Paving_count 2180 257701
Traffic (Other)_count 6388 253493
Manhole Cover_count 2095 257786
Line Striping_count 1510 258371
Street Defect_count 35164 224717
Illegal Dumping_count 45610 214271
Vacant Lot Clean-Up_count 16078 243803
Other Dangerous_count 802 259079
Street Light Outage_count 21604 238277
Other (Streets)_count 9132 250749
No Heat Residential_count 7 259874
Newsstand/Outdoor Café_count 11 259870
Dangerous Sidewalk_count 4667 255214
Zoning Residential_count 3446 256435
Information Request_count 7494 252387
Miscellaneous_count 4958 254923
Abandoned Vehicle_count 34798 225083
Hydrant Request_count 1536 258345
Building Construction_count 6793 253088
Complaint (Streets)_count 4950 254931
Newsstand Outdoor Cafe_count 210 259671
Alley Light Outage_count 4064 255817
Zoning Business_count 5452 254429
Traffic Signal Emergency_count 16242 243639
Hydrant Knocked Down (No Water)_count 92 259789
Salting_count 12939 246942
Complaints against Fire or EMS_count 231 259650
Fire Residential or Commercial_count 3804 256077
Emergency Air Conditioning_count 2 259879
Construction Site Task Force_count 10010 249871
Sanitation / Dumpster Violation_count 11494 248387
Parks and Rec Safety and Maintenance_count 4243 255638
Boarding Room House_count 1793 258088
Maintenance Residential or Commercial_count 54258 205623
Abandoned Bike_count 636 259245
License Residential_count 3542 256339
Daycare Residential or Commercial_count 163 259718
Inlet Cleaning_count 6716 253165
Rubbish/Recyclable Material Collection_count 44816 215065
Vacant House or Commercial_count 16159 243722
Building Dangerous_count 8174 251707
Graffiti Removal_count 30675 229206
Dead Animal in Street_count 3445 256436
Tree Dangerous_count 694 259187
Smoke Detector_count 6862 253019
monthly_311_request_count_norm 88781 0
Police Complaint_count_norm 616 0
Shoveling_count_norm 2803 0
No Heat (Residential)_count_norm 3429 0
Street Trees_count_norm 9749 0
Stop Sign Repair_count_norm 4332 0
Infestation Residential_count_norm 2289 0
Street Paving_count_norm 2180 0
Traffic (Other)_count_norm 6388 0
Manhole Cover_count_norm 2095 0
Line Striping_count_norm 1510 0
Street Defect_count_norm 35164 0
Illegal Dumping_count_norm 45610 0
Vacant Lot Clean-Up_count_norm 16078 0
Other Dangerous_count_norm 802 0
Street Light Outage_count_norm 21604 0
Other (Streets)_count_norm 9132 0
No Heat Residential_count_norm 7 0
Newsstand/Outdoor Café_count_norm 11 0
Dangerous Sidewalk_count_norm 4667 0
Zoning Residential_count_norm 3446 0
Information Request_count_norm 7494 0
Miscellaneous_count_norm 4958 0
Abandoned Vehicle_count_norm 34798 0
Hydrant Request_count_norm 1536 0
Building Construction_count_norm 6793 0
Complaint (Streets)_count_norm 4950 0
Newsstand Outdoor Cafe_count_norm 210 0
Alley Light Outage_count_norm 4064 0
Zoning Business_count_norm 5452 0
Traffic Signal Emergency_count_norm 16242 0
Hydrant Knocked Down (No Water)_count_norm 92 0
Salting_count_norm 12939 0
Complaints against Fire or EMS_count_norm 231 0
Fire Residential or Commercial_count_norm 3804 0
Emergency Air Conditioning_count_norm 2 0
Construction Site Task Force_count_norm 10010 0
Sanitation / Dumpster Violation_count_norm 11494 0
Parks and Rec Safety and Maintenance_count_norm 4243 0
Boarding Room House_count_norm 1793 0
Maintenance Residential or Commercial_count_norm 54258 0
Abandoned Bike_count_norm 636 0
License Residential_count_norm 3542 0
Daycare Residential or Commercial_count_norm 163 0
Inlet Cleaning_count_norm 6716 0
Rubbish/Recyclable Material Collection_count_norm 44816 0
Vacant House or Commercial_count_norm 16159 0
Building Dangerous_count_norm 8174 0
Graffiti Removal_count_norm 30675 0
Dead Animal in Street_count_norm 3445 0
Tree Dangerous_count_norm 694 0
Smoke Detector_count_norm 6862 0
In [97]:
for column in philly_311_filtered_details.columns:
if('_count' in column and 'len' not in column):
zscore_nonzero = stats.zscore(np.log2(philly_311_filtered_details[column]+1))
print(column, min(zscore_nonzero), max(zscore_nonzero))
monthly_311_request_count -0.923544502178 6.95645249993
Police Complaint_count -0.048410620452 39.9211347329
Shoveling_count -0.100325364848 36.0384429999
No Heat (Residential)_count -0.113403351964 28.8604463223
Street Trees_count -0.19109924165 15.7693184311
Stop Sign Repair_count -0.125273588941 21.8256784877
Infestation Residential_count -0.0932051619362 28.4661263482
Street Paving_count -0.0901284920828 32.1872963468
Traffic (Other)_count -0.156207925832 15.4208571784
Manhole Cover_count -0.088533644599 26.7422642289
Line Striping_count -0.0752102460994 31.7138040298
Street Defect_count -0.371526403533 9.57104298809
Illegal Dumping_count -0.432383878588 10.4171143647
Vacant Lot Clean-Up_count -0.242856028912 14.3106934957
Other Dangerous_count -0.0545739426539 47.1821755956
Street Light Outage_count -0.289900877533 12.906451024
Other (Streets)_count -0.187409715684 16.5470916086
No Heat Residential_count -0.00509980189539 276.939810921
Newsstand/Outdoor Café_count -0.00650606307501 153.702782846
Dangerous Sidewalk_count -0.132544947542 19.2609831298
Zoning Residential_count -0.114053021506 24.4307315757
Information Request_count -0.167099164029 24.9612386929
Miscellaneous_count -0.136542308117 21.2392837197
Abandoned Vehicle_count -0.373682794142 10.5139648433
Hydrant Request_count -0.0750135599273 35.0021250365
Building Construction_count -0.160809552785 21.2617308424
Complaint (Streets)_count -0.137964052341 18.0206397415
Newsstand Outdoor Cafe_count -0.0277574312344 62.8952751448
Alley Light Outage_count -0.123381109976 21.9627393382
Zoning Business_count -0.143404004184 17.8019245715
Traffic Signal Emergency_count -0.238136464498 14.4230757117
Hydrant Knocked Down (No Water)_count -0.0187843373125 83.5509350833
Salting_count -0.206672999106 14.495028202
Complaints against Fire or EMS_count -0.0293542041274 82.1291396886
Fire Residential or Commercial_count -0.120636858185 22.2644147996
Emergency Air Conditioning_count -0.00277414657882 360.471219378
Construction Site Task Force_count -0.194358325198 14.9526925579
Sanitation / Dumpster Violation_count -0.206633749717 16.2392988914
Parks and Rec Safety and Maintenance_count -0.124639597162 29.8182598729
Boarding Room House_count -0.0824287925195 26.6721727311
Maintenance Residential or Commercial_count -0.484087703852 7.96781381276
Abandoned Bike_count -0.0486017411687 43.4772164044
License Residential_count -0.11535513076 32.9382462194
Daycare Residential or Commercial_count -0.0250009381577 62.7022459669
Inlet Cleaning_count -0.158265603853 23.181405547
Rubbish/Recyclable Material Collection_count -0.429717918698 9.77679119544
Vacant House or Commercial_count -0.24858280355 16.4939678986
Building Dangerous_count -0.1759191436 15.3825501254
Graffiti Removal_count -0.341613907045 11.2042338611
Dead Animal in Street_count -0.113686548925 20.6575843987
Tree Dangerous_count -0.0513015677762 43.277690625
Smoke Detector_count -0.159063069875 21.7891784268
monthly_311_request_count_norm -1.0162417246 4.88532472294
Police Complaint_count_norm -0.0486975016795 27.4349396718
Shoveling_count_norm -0.103273791874 21.2952377654
No Heat (Residential)_count_norm -0.115094805465 17.1344757164
Street Trees_count_norm -0.195204902167 10.7093223124
Stop Sign Repair_count_norm -0.128697015149 14.6742961735
Infestation Residential_count_norm -0.0940445508694 17.7392349192
Street Paving_count_norm -0.091597645764 19.4345283591
Traffic (Other)_count_norm -0.158029193845 10.7165723628
Manhole Cover_count_norm -0.0898133696128 17.6297810545
Line Striping_count_norm -0.0762148554853 20.4327439822
Street Defect_count_norm -0.381574648119 7.42413035658
Illegal Dumping_count_norm -0.443617258135 7.97325860526
Vacant Lot Clean-Up_count_norm -0.249870519774 10.4478270266
Other Dangerous_count_norm -0.0554763226156 28.4136244587
Street Light Outage_count_norm -0.296182635419 8.87731732627
Other (Streets)_count_norm -0.18977415368 10.7248039471
No Heat Residential_count_norm -0.00518439951586 214.395859573
Newsstand/Outdoor Café_count_norm -0.00650606307501 153.702782846
Dangerous Sidewalk_count_norm -0.134510086318 12.9065972524
Zoning Residential_count_norm -0.11548464135 15.3844705987
Information Request_count_norm -0.170653048318 15.0436403969
Miscellaneous_count_norm -0.138695561235 13.6105316665
Abandoned Vehicle_count_norm -0.382791635076 7.70082663486
Hydrant Request_count_norm -0.0766853546068 21.894644656
Building Construction_count_norm -0.162949164853 13.0336557191
Complaint (Streets)_count_norm -0.139004194951 12.0647582798
Newsstand Outdoor Cafe_count_norm -0.0283528231896 44.806925838
Alley Light Outage_count_norm -0.125353607076 14.2699552774
Zoning Business_count_norm -0.145552627922 12.0547873848
Traffic Signal Emergency_count_norm -0.245507616006 11.6247859043
Hydrant Knocked Down (No Water)_count_norm -0.0188154819595 62.1147206228
Salting_count_norm -0.211889202115 12.8073387023
Complaints against Fire or EMS_count_norm -0.0297781377442 48.3328334663
Fire Residential or Commercial_count_norm -0.121590929329 14.199993126
Emergency Air Conditioning_count_norm -0.00277414657882 360.471219378
Construction Site Task Force_count_norm -0.198092780211 10.2306114991
Sanitation / Dumpster Violation_count_norm -0.211769198962 11.0285721011
Parks and Rec Safety and Maintenance_count_norm -0.127598491104 17.7630905763
Boarding Room House_count_norm -0.0831736556451 17.9393860177
Maintenance Residential or Commercial_count_norm -0.497043093724 6.02190517024
Abandoned Bike_count_norm -0.0493902893526 28.7174300014
License Residential_count_norm -0.117051607177 18.5587929949
Daycare Residential or Commercial_count_norm -0.0250469536871 47.1145786846
Inlet Cleaning_count_norm -0.161434246313 14.2057243379
Rubbish/Recyclable Material Collection_count_norm -0.440766488436 7.36148421904
Vacant House or Commercial_count_norm -0.253785183768 10.7940410355
Building Dangerous_count_norm -0.178852957592 10.5246987018
Graffiti Removal_count_norm -0.350978707966 8.74543300621
Dead Animal in Street_count_norm -0.115358591037 14.0727422025
Tree Dangerous_count_norm -0.0516802550769 27.7155021965
Smoke Detector_count_norm -0.163017715867 13.8282352565
In [98]:
for column in philly_311_filtered_details.columns:
if('_count' in column):
normalize(column, philly_311_filtered_details)
In [100]:
for column in philly_311_filtered_details.columns:
if('_norm_norm' in column):
philly_311_filtered_details = philly_311_filtered_details.drop(column,axis=1)
In [101]:
print(philly_311_filtered_details.shape)
philly_311_filtered_details.head().T
(259881, 211)
Out[101]:
0
1
2
3
4
SEG_ID
100006.000000
100006.000000
100006.000000
100006.000000
100006.000000
Month
2.000000
3.000000
4.000000
5.000000
6.000000
SHAPE_LEN
735.818883
735.818883
735.818883
735.818883
735.818883
monthly_311_request_count
1.000000
3.000000
1.000000
1.000000
1.000000
monthly_311_request_count/len
0.001359
0.004077
0.001359
0.001359
0.001359
Police Complaint_count
0.000000
0.000000
0.000000
0.000000
0.000000
Police Complaint_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Shoveling_count
0.000000
0.000000
0.000000
0.000000
0.000000
Shoveling_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
No Heat (Residential)_count
0.000000
0.000000
0.000000
0.000000
0.000000
No Heat (Residential)_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Street Trees_count
0.000000
0.000000
0.000000
0.000000
0.000000
Street Trees_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Stop Sign Repair_count
0.000000
0.000000
0.000000
0.000000
0.000000
Stop Sign Repair_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Infestation Residential_count
0.000000
0.000000
0.000000
0.000000
0.000000
Infestation Residential_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Street Paving_count
0.000000
0.000000
0.000000
0.000000
0.000000
Street Paving_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Traffic (Other)_count
0.000000
0.000000
0.000000
0.000000
0.000000
Traffic (Other)_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Manhole Cover_count
0.000000
0.000000
0.000000
0.000000
0.000000
Manhole Cover_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Line Striping_count
0.000000
0.000000
0.000000
0.000000
0.000000
Line Striping_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Street Defect_count
0.000000
0.000000
0.000000
0.000000
0.000000
Street Defect_count/len
0.000000
0.000000
0.000000
0.000000
0.000000
Illegal Dumping_count
1.000000
3.000000
1.000000
0.000000
0.000000
Illegal Dumping_count/len
0.001359
0.004077
0.001359
0.000000
0.000000
Vacant Lot Clean-Up_count
0.000000
0.000000
0.000000
0.000000
1.000000
...
...
...
...
...
...
Sanitation / Dumpster Violation_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Sanitation / Dumpster Violation_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Parks and Rec Safety and Maintenance_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Parks and Rec Safety and Maintenance_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Boarding Room House_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Boarding Room House_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Maintenance Residential or Commercial_count_norm
0.000000
0.000000
0.000000
0.231378
0.000000
Maintenance Residential or Commercial_count/len_norm
0.000000
0.000000
0.000000
0.006506
0.000000
Abandoned Bike_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Abandoned Bike_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
License Residential_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
License Residential_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Daycare Residential or Commercial_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Daycare Residential or Commercial_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Inlet Cleaning_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Inlet Cleaning_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Rubbish/Recyclable Material Collection_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Rubbish/Recyclable Material Collection_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Vacant House or Commercial_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Vacant House or Commercial_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Building Dangerous_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Building Dangerous_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Graffiti Removal_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Graffiti Removal_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Dead Animal in Street_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Dead Animal in Street_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Tree Dangerous_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Tree Dangerous_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Smoke Detector_count_norm
0.000000
0.000000
0.000000
0.000000
0.000000
Smoke Detector_count/len_norm
0.000000
0.000000
0.000000
0.000000
0.000000
211 rows × 5 columns
In [124]:
for column in philly_311_filtered_details.columns:
if('Sign' in column):
print(column)
Stop Sign Repair_count
Stop Sign Repair_count/len
Traffic Signal Emergency_count
Traffic Signal Emergency_count/len
Stop Sign Repair_count_norm
Stop Sign Repair_count/len_norm
Traffic Signal Emergency_count_norm
Traffic Signal Emergency_count/len_norm
In [103]:
for column in philly_311_filtered_details.columns:
if('_norm' in column):
print(column, philly_311_filtered_details[column].mean(), philly_311_filtered_details[column].median())
monthly_311_request_count_norm 0.25684188870433294 0.2507087200189668
monthly_311_request_count/len_norm 0.022484177587232104 0.014774571629217578
Police Complaint_count_norm 0.0012111876686117507 0.0
Police Complaint_count/len_norm 0.0002653119882305808 0.0
Shoveling_count_norm 0.00277611466543034 0.0
Shoveling_count/len_norm 0.0006285981300833229 0.0
No Heat (Residential)_count_norm 0.003913989795570009 0.0
No Heat (Residential)_count/len_norm 0.0008658065386161155 0.0
Street Trees_count_norm 0.011973323353346095 0.0
Street Trees_count/len_norm 0.002306862547113694 0.0
Stop Sign Repair_count_norm 0.005706977469752205 0.0
Stop Sign Repair_count/len_norm 0.0003448937779402623 0.0
Infestation Residential_count_norm 0.0032635624507890057 0.0
Infestation Residential_count/len_norm 0.0003879514406374133 0.0
Street Paving_count_norm 0.0027923073954205196 0.0
Street Paving_count/len_norm 0.0004566454129026689 0.0
Traffic (Other)_count_norm 0.010028071705865779 0.0
Traffic (Other)_count/len_norm 0.00036361386180594534 0.0
Manhole Cover_count_norm 0.003299702268129981 0.0
Manhole Cover_count/len_norm 0.000674929988478828 0.0
Line Striping_count_norm 0.002365919416267349 0.0
Line Striping_count/len_norm 0.000625009686712782 0.0
Street Defect_count_norm 0.03736724270150047 0.0
Street Defect_count/len_norm 0.002598504423035878 0.0
Illegal Dumping_count_norm 0.039852891709118335 0.0
Illegal Dumping_count/len_norm 0.0049159242276973405 0.0
Vacant Lot Clean-Up_count_norm 0.016687065138417972 0.0
Vacant Lot Clean-Up_count/len_norm 0.0022115546501957836 0.0
Other Dangerous_count_norm 0.0011553280695098319 0.0
Other Dangerous_count/len_norm 0.00020287693218577679 0.0
Street Light Outage_count_norm 0.021968259083796255 0.0
Street Light Outage_count/len_norm 0.0012759383806447938 0.0
Other (Streets)_count_norm 0.011199002112561926 0.0
Other (Streets)_count/len_norm 0.001838343968752919 0.0
No Heat Residential_count_norm 1.8414499410994825e-05 0.0
No Heat Residential_count/len_norm 1.306448748990345e-05 0.0
Newsstand/Outdoor Café_count_norm 4.232706507978652e-05 0.0
Newsstand/Outdoor Café_count/len_norm 1.3140028267990387e-05 0.0
Dangerous Sidewalk_count_norm 0.006834493807090921 0.0
Dangerous Sidewalk_count/len_norm 0.001567262409360834 0.0
Zoning Residential_count_norm 0.004646731408643151 0.0
Zoning Residential_count/len_norm 0.0013107023216245089 0.0
Information Request_count_norm 0.006649829566153646 0.0
Information Request_count/len_norm 0.0012284935474887607 0.0
Miscellaneous_count_norm 0.006387697389557205 0.0
Miscellaneous_count/len_norm 0.0006748046187454325 0.0
Abandoned Vehicle_count_norm 0.034321720043375074 0.0
Abandoned Vehicle_count/len_norm 0.0023659370190422004 0.0
Hydrant Request_count_norm 0.0021385313320560306 0.0
Hydrant Request_count/len_norm 0.0007680103042972108 0.0
Building Construction_count_norm 0.0075065585042168636 0.0
Building Construction_count/len_norm 0.0005475486111158329 0.0
Complaint (Streets)_count_norm 0.007597723586424906 0.0
Complaint (Streets)_count/len_norm 0.0013680521111033214 0.0
Newsstand Outdoor Cafe_count_norm 0.0004411330811316418 0.0
Newsstand Outdoor Cafe_count/len_norm 0.00013217833098126526 0.0
Alley Light Outage_count_norm 0.005586364081679968 0.0
Alley Light Outage_count/len_norm 0.000499824653006062 0.0
Zoning Business_count_norm 0.007991160684494618 0.0
Zoning Business_count/len_norm 0.0008339077736951383 0.0
Traffic Signal Emergency_count_norm 0.016242617706945166 0.0
Traffic Signal Emergency_count/len_norm 0.0015016383744905048 0.0
Hydrant Knocked Down (No Water)_count_norm 0.00022477444513066617 0.0
Hydrant Knocked Down (No Water)_count/len_norm 9.309711527794624e-05 0.0
Salting_count_norm 0.0140577608182034 0.0
Salting_count/len_norm 0.0012901012872440407 0.0
Complaints against Fire or EMS_count_norm 0.0003572875151012169 0.0
Complaints against Fire or EMS_count/len_norm 9.51675990055193e-05 0.0
Fire Residential or Commercial_count_norm 0.00538917041736114 0.0
Fire Residential or Commercial_count/len_norm 0.0009771315600753218 0.0
Emergency Air Conditioning_count_norm 7.695830014506639e-06 0.0
Emergency Air Conditioning_count/len_norm 6.927363882248013e-06 0.0
Construction Site Task Force_count_norm 0.012831430137717462 0.0
Construction Site Task Force_count/len_norm 0.0016552599041455762 0.0
Sanitation / Dumpster Violation_count_norm 0.012564428799885135 0.0
Sanitation / Dumpster Violation_count/len_norm 0.002065244926036989 0.0
Parks and Rec Safety and Maintenance_count_norm 0.00416257608206416 0.0
Parks and Rec Safety and Maintenance_count/len_norm 0.0004437139475274355 0.0
Boarding Room House_count_norm 0.003080920209056384 0.0
Boarding Room House_count/len_norm 0.00039402577409075455 0.0
Maintenance Residential or Commercial_count_norm 0.05727559684652339 0.0
Maintenance Residential or Commercial_count/len_norm 0.004729767258142824 0.0
Abandoned Bike_count_norm 0.001116618670007509 0.0
Abandoned Bike_count/len_norm 0.0001570415785726755 0.0
License Residential_count_norm 0.0034899413694246084 0.0
License Residential_count/len_norm 0.0008561963191240621 0.0
Daycare Residential or Commercial_count_norm 0.0003985658448482354 0.0
Daycare Residential or Commercial_count/len_norm 0.00017527762233645957 0.0
Inlet Cleaning_count_norm 0.006780969741606498 0.0
Inlet Cleaning_count/len_norm 0.0008270412624120675 0.0
Rubbish/Recyclable Material Collection_count_norm 0.042102340172558754 0.0
Rubbish/Recyclable Material Collection_count/len_norm 0.003978565972688632 0.0
Vacant House or Commercial_count_norm 0.014847367523203809 0.0
Vacant House or Commercial_count/len_norm 0.0031660933632440195 0.0
Building Dangerous_count_norm 0.011306969892617197 0.0
Building Dangerous_count/len_norm 0.0014959916813386779 0.0
Graffiti Removal_count_norm 0.029587598408089555 0.0
Graffiti Removal_count/len_norm 0.0018788699070412353 0.0
Dead Animal in Street_count_norm 0.005473259157405738 0.0
Dead Animal in Street_count/len_norm 0.0006208096593470852 0.0
Tree Dangerous_count_norm 0.0011840009467091763 0.0
Tree Dangerous_count/len_norm 0.00026532453745931576 0.0
Smoke Detector_count_norm 0.007247189707624379 0.0
Smoke Detector_count/len_norm 0.0016377102606168752 0.0
In [62]:
norm_count = 0
for column in philly_311_details.columns:
if('_norm' in column):
print(column)
norm_count +=1
print(norm_count)
philly_311_count_norm
philly_311_count/len_norm
Building Construction_count_norm
Building Construction_count/len_norm
Line Striping_count_norm
Line Striping_count/len_norm
Traffic (Other)_count_norm
Traffic (Other)_count/len_norm
Zoning Business_count_norm
Zoning Business_count/len_norm
Stop Sign Repair_count_norm
Stop Sign Repair_count/len_norm
Inlet Cleaning_count_norm
Inlet Cleaning_count/len_norm
Parks and Rec Safety and Maintenance_count_norm
Parks and Rec Safety and Maintenance_count/len_norm
Abandoned Bike_count_norm
Abandoned Bike_count/len_norm
Newsstand/Outdoor Café_count_norm
Newsstand/Outdoor Café_count/len_norm
Dangerous Sidewalk_count_norm
Dangerous Sidewalk_count/len_norm
Zoning Residential_count_norm
Zoning Residential_count/len_norm
Miscellaneous_count_norm
Miscellaneous_count/len_norm
Daycare Residential or Commercial_count_norm
Daycare Residential or Commercial_count/len_norm
Street Trees_count_norm
Street Trees_count/len_norm
Hydrant Request_count_norm
Hydrant Request_count/len_norm
Graffiti Removal_count_norm
Graffiti Removal_count/len_norm
Sanitation / Dumpster Violation_count_norm
Sanitation / Dumpster Violation_count/len_norm
Manhole Cover_count_norm
Manhole Cover_count/len_norm
Maintenance Residential or Commercial_count_norm
Maintenance Residential or Commercial_count/len_norm
Illegal Dumping_count_norm
Illegal Dumping_count/len_norm
Building Dangerous_count_norm
Building Dangerous_count/len_norm
Police Complaint_count_norm
Police Complaint_count/len_norm
Fire Residential or Commercial_count_norm
Fire Residential or Commercial_count/len_norm
Abandoned Vehicle_count_norm
Abandoned Vehicle_count/len_norm
Complaint (Streets)_count_norm
Complaint (Streets)_count/len_norm
Boarding Room House_count_norm
Boarding Room House_count/len_norm
Smoke Detector_count_norm
Smoke Detector_count/len_norm
Other Dangerous_count_norm
Other Dangerous_count/len_norm
Tree Dangerous_count_norm
Tree Dangerous_count/len_norm
Vacant Lot Clean-Up_count_norm
Vacant Lot Clean-Up_count/len_norm
Information Request_count_norm
Information Request_count/len_norm
Street Defect_count_norm
Street Defect_count/len_norm
Rubbish/Recyclable Material Collection_count_norm
Rubbish/Recyclable Material Collection_count/len_norm
License Residential_count_norm
License Residential_count/len_norm
Newsstand Outdoor Cafe_count_norm
Newsstand Outdoor Cafe_count/len_norm
Complaints against Fire or EMS_count_norm
Complaints against Fire or EMS_count/len_norm
No Heat Residential_count_norm
No Heat Residential_count/len_norm
Traffic Signal Emergency_count_norm
Traffic Signal Emergency_count/len_norm
Emergency Air Conditioning_count_norm
Emergency Air Conditioning_count/len_norm
Other (Streets)_count_norm
Other (Streets)_count/len_norm
Dead Animal in Street_count_norm
Dead Animal in Street_count/len_norm
Construction Site Task Force_count_norm
Construction Site Task Force_count/len_norm
Alley Light Outage_count_norm
Alley Light Outage_count/len_norm
Shoveling_count_norm
Shoveling_count/len_norm
Hydrant Knocked Down (No Water)_count_norm
Hydrant Knocked Down (No Water)_count/len_norm
Street Paving_count_norm
Street Paving_count/len_norm
Vacant House or Commercial_count_norm
Vacant House or Commercial_count/len_norm
Salting_count_norm
Salting_count/len_norm
Street Light Outage_count_norm
Street Light Outage_count/len_norm
Infestation Residential_count_norm
Infestation Residential_count/len_norm
No Heat (Residential)_count_norm
No Heat (Residential)_count/len_norm
104
In [63]:
def gradient_color(percent):
min_color = np.array([255,255,255])
max_color = np.array([178,34,34])
return '#%02x%02x%02x' % tuple([int(k) for k in min_color+(max_color-min_color)*percent])
def write_var(col_name, var_name, df,f):
df['color']=df[col_name].apply(gradient_color)
f.write('var %s = %s;\n' % (var_name, df.to_json()))
with open('../visualization/Volumes of Philly 311 Requests.js','w+') as f:
for column in philly_311_details.columns:
if('_norm' in column):
write_var(column, column +'_var', philly_311_details,f)
In [104]:
print(philly_311_filtered_details.shape)
print(philly_311_filtered_details.drop_duplicates().shape)
(259881, 211)
(259881, 211)
In [105]:
philly_311_filtered_details.to_csv('../philly/Philly_311_Details_2015-16.csv')
In [3]:
philly_311_details = pd.read_csv('../philly/Philly_311_Details.csv')
philly_311_details.shape
Out[3]:
(38930, 211)
In [4]:
philly_311_details.drop_duplicates().shape
Out[4]:
(38930, 211)
In [2]:
street_gpd = gpd.read_file('../philly/GeoJSON_data/Street_Centerline_qgis3.geojson')
print(street_gpd.shape)
street_gpd.head().T
(41022, 31)
Out[2]:
0
1
2
3
4
CLASS
3
3
3
5
4
FNODE_
2
2
1
6
5
LENGTH
449.863
540.083
446.104
447.261
148.216
LPOLY_
0
0
0
0
0
L_F_ADD
1500
400
1600
1600
350
L_HUNDRED
1500
400
1600
1600
300
L_T_ADD
1598
498
1698
1698
398
MULTI_REP
0
0
0
0
0
NEWSEGDATE
None
None
None
None
None
ONEWAY
FT
TF
FT
TF
TF
PRE_DIR
None
N
None
None
N
RESPONSIBL
FAM
FAM
FAM
None
None
RPOLY_
0
0
0
0
0
R_F_ADD
1501
401
1601
1601
351
R_HUNDRED
1500
400
1600
1600
300
R_T_ADD
1599
499
1699
1699
399
SEG_ID
420708
422065
420702
420732
420718
SHAPE_LEN
449.863
540.083
446.104
447.261
148.216
STCL2_
1
2
3
4
5
STCL2_ID
85205
86540
85199
85229
85215
STNAME
CALLOWHILL ST
N 15TH ST
CALLOWHILL ST
CARLTON ST
N 17TH ST
STREETLABE
CALLOWHILL ST
N 15TH ST
CALLOWHILL ST
CARLTON ST
N 17TH ST
ST_CODE
20880
88070
20880
21440
88110
ST_NAME
CALLOWHILL
15TH
CALLOWHILL
CARLTON
17TH
ST_TYPE
ST
ST
ST
ST
ST
SUF_DIR
None
None
None
None
None
TNODE_
1
3
4
5
4
UPDATE_
1997-02-10
1997-02-10
1997-02-10
1997-02-10
1997-06-27
ZIP_LEFT
19130
19130
19130
19103
19103
ZIP_RIGHT
19130
19130
19130
19103
19103
geometry
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16529687686355 39.960013687935...
LINESTRING (-75.16537539004391 39.959601782423...
LINESTRING (-75.16694872703492 39.959805718080...
In [110]:
collision_gpd = gpd.read_file('../philly/GeoJSON_data/GIS_HEALTH.Collisions_crash_2011_2014PUBV.geojson')
print(collision_gpd.shape)
(43488, 54)
In [4]:
collision_gpd.head().T
Out[4]:
0
1
2
3
4
AUTOMOBILE_COUNT
0
1
1
1
0
BELTED_DEATH_COUNT
0
0
0
0
0
BELTED_MAJ_INJ_COUNT
0
0
0
0
0
BICYCLE_COUNT
0
1
1
0
0
BICYCLE_DEATH_COUNT
0
0
0
0
0
BICYCLE_MAJ_INJ_COUNT
0
0
0
0
0
BUS_COUNT
0
0
0
0
0
COLLISION_TYPE
1
4
4
8
8
COMM_VEH_COUNT
1
0
0
0
0
CRASH_MONTH
8
8
8
8
8
CRASH_YEAR
2014
2014
2014
2014
2014
CRN
2014087880
2014087758
2014096425
2014088141
2014091439
DAY_OF_WEEK
6
6
7
7
7
FATAL_COUNT
0
0
0
0
0
HEAVY_TRUCK_COUNT
1
0
0
0
0
HOUR_OF_DAY
15
14
19
19
19
ILLUMINATION
1
1
1
1
1
INJURY_COUNT
5
1
1
1
1
INTERSECTION
0
0
1
0
0
INTERSECT_TYPE
0
0
1
0
0
LATITUDE
40.085
39.9904
40.0293
39.9535
40.0162
LENGTH
439.229
778.344
170.622
194.532
396.315
LOCATION_TYPE
0
0
0
0
0
LONGITUDE
-75.038
-75.1028
-75.0548
-75.2405
-75.0906
MAJ_INJ_COUNT
0
0
0
0
0
MAX_SEVERITY_LEVEL
4
4
3
4
8
MCYCLE_DEATH_COUNT
0
0
0
0
0
MCYCLE_MAJ_INJ_COUNT
0
0
0
0
0
MIN_INJ_COUNT
5
1
0
1
0
MOD_INJ_COUNT
0
0
1
0
0
MOTORCYCLE_COUNT
0
0
0
0
0
OBJECTID
4001
4002
4003
4004
4005
PED_COUNT
0
0
0
1
1
PED_DEATH_COUNT
0
0
0
0
0
PED_MAJ_INJ_COUNT
0
0
0
0
0
PERSON_COUNT
7
3
2
2
2
RELATION_TO_ROAD
1
1
1
1
1
ROAD_CONDITION
1
0
0
0
0
SCH_BUS_IND
N
N
N
N
N
SCH_ZONE_IND
N
N
U
U
N
SEG_ID
960283
541001
760684
300613
640755
SMALL_TRUCK_COUNT
0
0
0
0
0
SUV_COUNT
2
0
0
0
1
TCD_TYPE
0
0
0
0
0
TIME_OF_DAY
1500
1450
1919
1910
1905
UNBELTED_OCC_COUNT
0
0
0
0
0
UNB_DEATH_COUNT
0
0
0
0
0
UNB_MAJ_INJ_COUNT
0
0
0
0
0
UNK_INJ_DEG_COUNT
0
0
0
0
1
UNK_INJ_PER_COUNT
0
0
0
0
0
VAN_COUNT
0
0
0
0
0
VEHICLE_COUNT
3
1
1
1
1
WEATHER
2
1
1
1
1
geometry
POINT (-75.03795178519658 40.08497541304385)
POINT (-75.10275539598592 39.990404578965)
POINT (-75.05481628915624 40.02929723040301)
POINT (-75.24050521636346 39.95347871767484)
POINT (-75.09056881169107 40.01618353851767)
In [47]:
collision_gpd.groupby(['CRASH_YEAR', 'CRASH_MONTH']).sum()
Out[47]:
AUTOMOBILE_COUNT
BELTED_DEATH_COUNT
BELTED_MAJ_INJ_COUNT
BICYCLE_COUNT
BICYCLE_DEATH_COUNT
BICYCLE_MAJ_INJ_COUNT
BUS_COUNT
COLLISION_TYPE
COMM_VEH_COUNT
CRN
...
SUV_COUNT
TCD_TYPE
UNBELTED_OCC_COUNT
UNB_DEATH_COUNT
UNB_MAJ_INJ_COUNT
UNK_INJ_DEG_COUNT
UNK_INJ_PER_COUNT
VAN_COUNT
VEHICLE_COUNT
WEATHER
CRASH_YEAR
CRASH_MONTH
2011
1
790
0
2
3
0
0
22
3076
44
1367495703371
...
169
749
103
2
1
264
102
48
1282
1427
10
1149
1
2
53
0
1
25
4394
46
1999071846869
...
223
1156
217
3
8
399
235
87
1816
1456
11
1036
0
1
37
1
0
24
3897
45
1777940339108
...
211
1183
277
0
2
371
217
94
1654
1201
12
1110
1
2
27
0
0
27
4389
63
1931386765695
...
206
1251
152
4
2
357
213
80
1767
1341
2
855
0
6
10
0
2
23
3167
45
1449956436090
...
177
750
125
4
2
288
122
59
1384
1172
3
985
0
1
20
0
2
20
3642
53
1695306387606
...
236
995
129
0
0
329
131
74
1606
1032
4
1105
0
0
31
0
0
20
4168
40
1922565157714
...
255
1075
169
1
0
405
192
89
1814
1270
5
1187
0
2
51
0
2
27
4460
54
2029162368316
...
214
1214
197
0
7
423
172
101
1902
1285
6
1009
0
0
84
0
4
34
3956
56
1884380358546
...
242
1076
199
0
2
352
168
98
1741
1121
7
1037
0
5
66
0
0
33
3675
65
1771768116370
...
201
1054
256
1
8
372
197
80
1673
970
8
1035
0
6
74
1
2
20
4094
51
1878362850573
...
221
1092
264
0
3
364
201
83
1709
1251
9
993
0
2
52
0
3
26
3813
53
1747655310845
...
210
1048
250
0
1
334
212
75
1634
1206
2012
1
1020
4
7
15
0
0
18
3764
54
1760515032423
...
211
1018
255
3
7
315
242
87
1660
1367
10
1163
3
2
42
1
0
24
4212
56
1998044231386
...
240
1158
135
0
4
360
162
80
1903
1395
11
961
1
1
20
0
0
32
3507
45
1698381279543
...
212
957
136
1
8
301
151
61
1598
1037
12
1032
1
4
27
0
0
23
4163
51
1839777951991
...
227
1055
119
2
2
339
183
70
1696
1535
2
936
0
3
22
0
0
19
3485
35
1649864792851
...
220
971
151
0
1
300
93
77
1562
1168
3
1062
1
4
54
1
0
24
4216
66
1951678281469
...
253
1152
136
2
3
382
128
88
1868
1216
4
1059
0
5
42
0
1
12
4219
47
1977852708925
...
268
1154
164
1
6
343
152
105
1894
1227
5
1170
2
4
56
1
1
23
4466
50
2120714086555
...
280
1115
172
1
0
394
158
97
2012
1382
6
1037
0
4
66
0
0
22
4262
49
1991957502864
...
299
1069
188
0
1
390
129
119
1864
1205
7
986
0
4
70
0
3
15
3646
37
1798819296841
...
227
1027
137
0
2
320
130
108
1704
1034
8
1024
0
5
30
0
1
22
4111
54
1869239995372
...
200
1029
152
4
2
322
135
84
1655
1147
9
1081
0
1
44
1
0
22
3825
51
1871259469188
...
209
1060
140
1
4
361
153
72
1785
1213
2013
1
899
0
3
10
0
1
18
3568
39
1616453105334
...
218
891
126
0
6
284
129
70
1499
1257
10
991
0
2
46
0
3
37
3694
54
1775571834452
...
260
904
96
0
0
332
138
76
1727
1136
11
1001
1
3
32
0
1
23
4048
55
1880472971569
...
289
1045
124
0
2
352
99
90
1809
1223
12
886
0
1
16
0
0
16
3734
54
1669514817772
...
247
969
115
1
0
271
119
69
1578
1554
2
854
0
1
8
0
0
19
3383
43
1531915607237
...
177
896
108
0
3
272
119
71
1454
1289
3
1038
0
3
15
0
0
18
4130
50
1890247667462
...
287
1049
151
0
1
360
98
80
1840
1256
4
1070
0
2
48
0
1
17
4347
45
1984868500024
...
252
1088
152
0
2
370
125
92
1870
1172
5
1138
1
5
59
0
3
19
4627
51
2117743637438
...
268
1139
134
1
3
354
119
127
1955
1361
6
1152
0
1
70
0
2
18
4421
50
2061386235225
...
253
1153
178
1
0
346
106
101
1954
1387
7
1048
0
4
71
0
1
23
3755
54
1809760878123
...
229
997
126
1
1
351
122
76
1726
1088
8
932
0
3
82
0
2
16
3889
40
1805745319046
...
231
991
153
0
1
365
120
88
1663
1058
9
1007
2
7
81
0
1
27
4413
60
1994991006644
...
279
1150
125
1
2
385
134
94
1814
1123
2014
1
775
0
2
7
0
0
20
3451
63
1560869568345
...
257
832
88
0
1
230
87
84
1493
1618
10
999
0
7
65
0
2
21
4497
70
2064492934243
...
384
1231
157
2
1
379
138
126
1961
1310
11
1009
0
12
33
0
0
11
4201
52
1929564568815
...
326
1087
143
3
0
360
109
122
1797
1298
12
970
0
4
36
0
1
19
4171
44
1887647014226
...
362
994
109
2
0
310
138
100
1783
1419
2
596
2
0
9
0
0
27
2639
65
1228561495801
...
176
640
74
0
2
249
76
67
1145
1220
3
881
0
4
13
0
0
19
3777
50
1740134421473
...
278
905
133
1
4
307
99
105
1719
1247
4
944
0
3
44
1
2
24
4008
59
1836815049715
...
300
965
156
1
4
343
125
112
1742
1127
5
936
0
2
48
1
2
16
3928
49
1796545967202
...
311
924
149
1
4
362
111
108
1705
1137
6
934
0
3
81
0
2
20
3866
41
1836844430272
...
348
1047
119
0
2
351
100
111
1777
1107
7
907
0
4
68
0
1
9
3777
46
1790529422735
...
324
1034
112
1
3
324
136
103
1717
1052
8
936
1
5
74
0
2
11
3835
50
1836857351951
...
305
1072
109
0
1
307
162
101
1702
1104
9
990
2
5
73
1
1
15
4048
49
1895289839787
...
328
1115
127
2
3
371
155
104
1773
1098
48 rows × 46 columns
In [94]:
collision_gpd['CRASH_YEAR'].isin(['2013', '2014'])
Out[94]:
0 True
1 True
2 True
3 True
4 True
5 True
6 True
7 True
8 True
9 True
10 True
11 True
12 True
13 True
14 True
15 True
16 True
17 True
18 True
19 True
20 True
21 True
22 True
23 True
24 True
25 True
26 True
27 True
28 True
29 True
...
43458 False
43459 False
43460 False
43461 False
43462 False
43463 False
43464 False
43465 False
43466 False
43467 False
43468 False
43469 False
43470 False
43471 False
43472 False
43473 False
43474 False
43475 False
43476 False
43477 False
43478 False
43479 False
43480 False
43481 False
43482 False
43483 False
43484 False
43485 False
43486 False
43487 False
Name: CRASH_YEAR, dtype: bool
In [95]:
collision_gpd[collision_gpd['CRASH_YEAR'].isin(['2013', '2014'])].groupby(collision_gpd.CRASH_YEAR).size()
Out[95]:
CRASH_YEAR
2013 10997
2014 10627
dtype: int64
In [111]:
collision_filtered_gpd = collision_gpd[collision_gpd['CRASH_YEAR'].isin(['2013', '2014'])]
print(collision_filtered_gpd.shape)
collision_filtered_gpd.head().T
(21624, 54)
Out[111]:
0
1
2
3
4
AUTOMOBILE_COUNT
0
1
1
1
0
BELTED_DEATH_COUNT
0
0
0
0
0
BELTED_MAJ_INJ_COUNT
0
0
0
0
0
BICYCLE_COUNT
0
1
1
0
0
BICYCLE_DEATH_COUNT
0
0
0
0
0
BICYCLE_MAJ_INJ_COUNT
0
0
0
0
0
BUS_COUNT
0
0
0
0
0
COLLISION_TYPE
1
4
4
8
8
COMM_VEH_COUNT
1
0
0
0
0
CRASH_MONTH
8
8
8
8
8
CRASH_YEAR
2014
2014
2014
2014
2014
CRN
2014087880
2014087758
2014096425
2014088141
2014091439
DAY_OF_WEEK
6
6
7
7
7
FATAL_COUNT
0
0
0
0
0
HEAVY_TRUCK_COUNT
1
0
0
0
0
HOUR_OF_DAY
15
14
19
19
19
ILLUMINATION
1
1
1
1
1
INJURY_COUNT
5
1
1
1
1
INTERSECTION
0
0
1
0
0
INTERSECT_TYPE
0
0
1
0
0
LATITUDE
40.085
39.9904
40.0293
39.9535
40.0162
LENGTH
439.229
778.344
170.622
194.532
396.315
LOCATION_TYPE
0
0
0
0
0
LONGITUDE
-75.038
-75.1028
-75.0548
-75.2405
-75.0906
MAJ_INJ_COUNT
0
0
0
0
0
MAX_SEVERITY_LEVEL
4
4
3
4
8
MCYCLE_DEATH_COUNT
0
0
0
0
0
MCYCLE_MAJ_INJ_COUNT
0
0
0
0
0
MIN_INJ_COUNT
5
1
0
1
0
MOD_INJ_COUNT
0
0
1
0
0
MOTORCYCLE_COUNT
0
0
0
0
0
OBJECTID
4001
4002
4003
4004
4005
PED_COUNT
0
0
0
1
1
PED_DEATH_COUNT
0
0
0
0
0
PED_MAJ_INJ_COUNT
0
0
0
0
0
PERSON_COUNT
7
3
2
2
2
RELATION_TO_ROAD
1
1
1
1
1
ROAD_CONDITION
1
0
0
0
0
SCH_BUS_IND
N
N
N
N
N
SCH_ZONE_IND
N
N
U
U
N
SEG_ID
960283
541001
760684
300613
640755
SMALL_TRUCK_COUNT
0
0
0
0
0
SUV_COUNT
2
0
0
0
1
TCD_TYPE
0
0
0
0
0
TIME_OF_DAY
1500
1450
1919
1910
1905
UNBELTED_OCC_COUNT
0
0
0
0
0
UNB_DEATH_COUNT
0
0
0
0
0
UNB_MAJ_INJ_COUNT
0
0
0
0
0
UNK_INJ_DEG_COUNT
0
0
0
0
1
UNK_INJ_PER_COUNT
0
0
0
0
0
VAN_COUNT
0
0
0
0
0
VEHICLE_COUNT
3
1
1
1
1
WEATHER
2
1
1
1
1
geometry
POINT (-75.03795178519658 40.08497541304385)
POINT (-75.10275539598592 39.990404578965)
POINT (-75.05481628915624 40.02929723040301)
POINT (-75.24050521636346 39.95347871767484)
POINT (-75.09056881169107 40.01618353851767)
In [14]:
collision_gpd[collision_gpd.SEG_ID==960283][['AUTOMOBILE_COUNT', 'CRASH_MONTH', 'CRASH_YEAR']]
Out[14]:
AUTOMOBILE_COUNT
CRASH_MONTH
CRASH_YEAR
0
0
8
2014
4447
1
10
2014
5149
1
1
2014
12654
2
10
2013
42203
2
8
2011
In [29]:
collision_gpd[['SEG_ID']].drop_duplicates().shape
Out[29]:
(13206, 1)
In [6]:
collision_filtered_gpd[['SEG_ID', 'CRASH_YEAR']].drop_duplicates().shape
Out[6]:
(11697, 2)
In [6]:
collision_col_count = collision_gpd.groupby('SEG_ID')['AUTOMOBILE_COUNT'].sum().reset_index()
print(collision_col_count.shape)
collision_col_count.head().T
(13205, 2)
Out[6]:
0
1
2
3
4
SEG_ID
100002.0
100003.0
100006.0
100007.0
100008.0
AUTOMOBILE_COUNT
1.0
20.0
2.0
0.0
1.0
In [7]:
collision_gpd.groupby(['SEG_ID', 'CRASH_YEAR'])['AUTOMOBILE_COUNT'].sum().reset_index()
Out[7]:
SEG_ID
CRASH_YEAR
AUTOMOBILE_COUNT
0
100002.0
2013
1
1
100003.0
2012
7
2
100003.0
2013
6
3
100003.0
2014
7
4
100006.0
2012
2
5
100007.0
2013
0
6
100008.0
2014
1
7
100023.0
2011
0
8
100024.0
2013
1
9
100040.0
2011
10
10
100040.0
2012
15
11
100040.0
2013
4
12
100040.0
2014
7
13
100044.0
2011
3
14
100045.0
2011
1
15
100045.0
2012
4
16
100045.0
2013
2
17
100045.0
2014
1
18
100047.0
2014
1
19
100048.0
2013
0
20
100057.0
2011
0
21
100063.0
2011
1
22
100066.0
2011
1
23
100066.0
2012
0
24
100066.0
2013
3
25
100066.0
2014
1
26
100070.0
2012
1
27
100070.0
2013
6
28
100075.0
2013
2
29
100098.0
2013
1
...
...
...
...
23463
1160159.0
2011
2
23464
1160159.0
2013
1
23465
1160159.0
2014
1
23466
1160179.0
2011
2
23467
1160179.0
2012
4
23468
1160179.0
2013
0
23469
1160179.0
2014
1
23470
1160180.0
2012
1
23471
1160180.0
2014
1
23472
1160196.0
2011
1
23473
1160196.0
2014
1
23474
1160212.0
2011
2
23475
1160212.0
2012
7
23476
1160212.0
2013
1
23477
1160214.0
2012
4
23478
1160214.0
2013
4
23479
1160214.0
2014
1
23480
1160215.0
2013
1
23481
1160215.0
2014
1
23482
1180024.0
2012
1
23483
1180024.0
2013
2
23484
1180041.0
2011
0
23485
1180041.0
2014
1
23486
1180042.0
2012
0
23487
1180058.0
2011
0
23488
1180058.0
2012
0
23489
1180058.0
2013
1
23490
1180073.0
2014
2
23491
1180077.0
2012
1
23492
1180092.0
2014
1
23493 rows × 3 columns
In [7]:
collision_col_count[collision_col_count.SEG_ID==960283]
Out[7]:
SEG_ID
AUTOMOBILE_COUNT
12466
960283.0
6
In [30]:
collision_agg_count = collision_gpd[['SEG_ID', 'LENGTH']].drop_duplicates()
for column in collision_gpd.columns:
if('_COUNT' in column):
collision_col_count = collision_gpd.groupby('SEG_ID')[column].sum().reset_index()
collision_agg_count = collision_agg_count.merge(collision_col_count, left_on = 'SEG_ID', right_on = 'SEG_ID', right_index=False, how='left')
print(collision_agg_count.shape)
collision_agg_count.head().T
(13206, 32)
Out[30]:
0
1
2
3
4
SEG_ID
960283.000000
541001.000000
760684.000000
300613.000000
640755.000000
LENGTH
439.229039
778.344098
170.621503
194.531579
396.314637
AUTOMOBILE_COUNT
6.000000
9.000000
3.000000
1.000000
0.000000
BELTED_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
BELTED_MAJ_INJ_COUNT
0.000000
2.000000
0.000000
0.000000
0.000000
BICYCLE_COUNT
0.000000
1.000000
1.000000
0.000000
0.000000
BICYCLE_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
BICYCLE_MAJ_INJ_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
BUS_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
COMM_VEH_COUNT
1.000000
0.000000
0.000000
0.000000
0.000000
FATAL_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
HEAVY_TRUCK_COUNT
1.000000
0.000000
0.000000
0.000000
0.000000
INJURY_COUNT
9.000000
8.000000
2.000000
1.000000
1.000000
MAJ_INJ_COUNT
0.000000
2.000000
0.000000
0.000000
0.000000
MCYCLE_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
MCYCLE_MAJ_INJ_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
MIN_INJ_COUNT
9.000000
4.000000
0.000000
1.000000
0.000000
MOD_INJ_COUNT
0.000000
2.000000
1.000000
0.000000
0.000000
MOTORCYCLE_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
PED_COUNT
0.000000
1.000000
0.000000
1.000000
1.000000
PED_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
PED_MAJ_INJ_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
PERSON_COUNT
17.000000
16.000000
3.000000
2.000000
2.000000
SMALL_TRUCK_COUNT
0.000000
1.000000
0.000000
0.000000
0.000000
SUV_COUNT
4.000000
0.000000
0.000000
0.000000
1.000000
UNBELTED_OCC_COUNT
0.000000
1.000000
0.000000
0.000000
0.000000
UNB_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
UNB_MAJ_INJ_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
UNK_INJ_DEG_COUNT
0.000000
0.000000
1.000000
0.000000
1.000000
UNK_INJ_PER_COUNT
0.000000
1.000000
0.000000
0.000000
0.000000
VAN_COUNT
0.000000
1.000000
0.000000
0.000000
0.000000
VEHICLE_COUNT
11.000000
11.000000
3.000000
1.000000
1.000000
In [112]:
collision_filtered_agg_count = collision_filtered_gpd[['SEG_ID', 'LENGTH', 'CRASH_MONTH']].drop_duplicates()
for column in collision_filtered_gpd.columns:
if('_COUNT' in column):
collision_col_count = collision_filtered_gpd.groupby(['SEG_ID', 'CRASH_MONTH'])[column].sum().reset_index()
collision_filtered_agg_count = collision_filtered_agg_count.merge(collision_col_count, left_on = ['SEG_ID', 'CRASH_MONTH'], right_on = ['SEG_ID', 'CRASH_MONTH'], right_index=False, how='left')
print(collision_filtered_agg_count.shape)
collision_filtered_agg_count.head().T
(17599, 33)
Out[112]:
0
1
2
3
4
SEG_ID
960283
541001
760684
300613
640755
LENGTH
439.229
778.344
170.622
194.532
396.315
CRASH_MONTH
8
8
8
8
8
AUTOMOBILE_COUNT
0
1
1
1
0
BELTED_DEATH_COUNT
0
0
0
0
0
BELTED_MAJ_INJ_COUNT
0
0
0
0
0
BICYCLE_COUNT
0
1
1
0
0
BICYCLE_DEATH_COUNT
0
0
0
0
0
BICYCLE_MAJ_INJ_COUNT
0
0
0
0
0
BUS_COUNT
0
0
0
0
0
COMM_VEH_COUNT
1
0
0
0
0
FATAL_COUNT
0
0
0
0
0
HEAVY_TRUCK_COUNT
1
0
0
0
0
INJURY_COUNT
5
1
1
1
1
MAJ_INJ_COUNT
0
0
0
0
0
MCYCLE_DEATH_COUNT
0
0
0
0
0
MCYCLE_MAJ_INJ_COUNT
0
0
0
0
0
MIN_INJ_COUNT
5
1
0
1
0
MOD_INJ_COUNT
0
0
1
0
0
MOTORCYCLE_COUNT
0
0
0
0
0
PED_COUNT
0
0
0
1
1
PED_DEATH_COUNT
0
0
0
0
0
PED_MAJ_INJ_COUNT
0
0
0
0
0
PERSON_COUNT
7
3
2
2
2
SMALL_TRUCK_COUNT
0
0
0
0
0
SUV_COUNT
2
0
0
0
1
UNBELTED_OCC_COUNT
0
0
0
0
0
UNB_DEATH_COUNT
0
0
0
0
0
UNB_MAJ_INJ_COUNT
0
0
0
0
0
UNK_INJ_DEG_COUNT
0
0
0
0
1
UNK_INJ_PER_COUNT
0
0
0
0
0
VAN_COUNT
0
0
0
0
0
VEHICLE_COUNT
3
1
1
1
1
In [113]:
collision_filtered_agg_count[collision_filtered_agg_count['SEG_ID']==960283]
Out[113]:
SEG_ID
LENGTH
CRASH_MONTH
AUTOMOBILE_COUNT
BELTED_DEATH_COUNT
BELTED_MAJ_INJ_COUNT
BICYCLE_COUNT
BICYCLE_DEATH_COUNT
BICYCLE_MAJ_INJ_COUNT
BUS_COUNT
...
PERSON_COUNT
SMALL_TRUCK_COUNT
SUV_COUNT
UNBELTED_OCC_COUNT
UNB_DEATH_COUNT
UNB_MAJ_INJ_COUNT
UNK_INJ_DEG_COUNT
UNK_INJ_PER_COUNT
VAN_COUNT
VEHICLE_COUNT
0
960283.0
439.229039
8
0.0
0.0
0.0
0.0
0.0
0.0
0.0
...
7.0
0.0
2.0
0.0
0.0
0.0
0.0
0.0
0.0
3.0
4125
960283.0
439.229039
10
3.0
0.0
0.0
0.0
0.0
0.0
0.0
...
5.0
0.0
2.0
0.0
0.0
0.0
0.0
0.0
0.0
5.0
4720
960283.0
439.229039
1
1.0
0.0
0.0
0.0
0.0
0.0
0.0
...
2.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
3 rows × 33 columns
In [13]:
collision_agg_count.AUTOMOBILE_COUNT + collision_agg_count.BICYCLE_COUNT
Out[13]:
0 6.0
1 10.0
2 4.0
3 1.0
4 0.0
5 10.0
6 11.0
7 4.0
8 14.0
9 3.0
10 25.0
11 2.0
12 3.0
13 1.0
14 1.0
15 0.0
16 17.0
17 93.0
18 4.0
19 0.0
20 30.0
21 11.0
22 27.0
23 8.0
24 192.0
25 3.0
26 2.0
27 3.0
28 88.0
29 6.0
...
43458 2.0
43459 5.0
43460 2.0
43461 2.0
43462 62.0
43463 0.0
43464 1.0
43465 1.0
43466 7.0
43467 2.0
43468 2.0
43469 7.0
43470 3.0
43471 9.0
43472 3.0
43473 11.0
43474 4.0
43475 21.0
43476 4.0
43477 9.0
43478 1.0
43479 8.0
43480 4.0
43481 1.0
43482 16.0
43483 6.0
43484 2.0
43485 1.0
43486 4.0
43487 6.0
dtype: float64
In [15]:
range(2, collision_agg_count.shape[0])
Out[15]:
range(2, 43488)
In [115]:
col_list= list(collision_filtered_agg_count)
col_list.remove('SEG_ID')
col_list.remove('LENGTH')
col_list.remove('CRASH_MONTH')
col_list
Out[115]:
['AUTOMOBILE_COUNT',
'BELTED_DEATH_COUNT',
'BELTED_MAJ_INJ_COUNT',
'BICYCLE_COUNT',
'BICYCLE_DEATH_COUNT',
'BICYCLE_MAJ_INJ_COUNT',
'BUS_COUNT',
'COMM_VEH_COUNT',
'FATAL_COUNT',
'HEAVY_TRUCK_COUNT',
'INJURY_COUNT',
'MAJ_INJ_COUNT',
'MCYCLE_DEATH_COUNT',
'MCYCLE_MAJ_INJ_COUNT',
'MIN_INJ_COUNT',
'MOD_INJ_COUNT',
'MOTORCYCLE_COUNT',
'PED_COUNT',
'PED_DEATH_COUNT',
'PED_MAJ_INJ_COUNT',
'PERSON_COUNT',
'SMALL_TRUCK_COUNT',
'SUV_COUNT',
'UNBELTED_OCC_COUNT',
'UNB_DEATH_COUNT',
'UNB_MAJ_INJ_COUNT',
'UNK_INJ_DEG_COUNT',
'UNK_INJ_PER_COUNT',
'VAN_COUNT',
'VEHICLE_COUNT']
In [32]:
collision_agg_count['TOTAL_COLLISION_COUNT'] = collision_agg_count[col_list].sum(axis=1)
In [33]:
print(collision_agg_count.shape)
collision_agg_count.head().T
(13206, 33)
Out[33]:
0
1
2
3
4
SEG_ID
960283.000000
541001.000000
760684.000000
300613.000000
640755.000000
LENGTH
439.229039
778.344098
170.621503
194.531579
396.314637
AUTOMOBILE_COUNT
6.000000
9.000000
3.000000
1.000000
0.000000
BELTED_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
BELTED_MAJ_INJ_COUNT
0.000000
2.000000
0.000000
0.000000
0.000000
BICYCLE_COUNT
0.000000
1.000000
1.000000
0.000000
0.000000
BICYCLE_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
BICYCLE_MAJ_INJ_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
BUS_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
COMM_VEH_COUNT
1.000000
0.000000
0.000000
0.000000
0.000000
FATAL_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
HEAVY_TRUCK_COUNT
1.000000
0.000000
0.000000
0.000000
0.000000
INJURY_COUNT
9.000000
8.000000
2.000000
1.000000
1.000000
MAJ_INJ_COUNT
0.000000
2.000000
0.000000
0.000000
0.000000
MCYCLE_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
MCYCLE_MAJ_INJ_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
MIN_INJ_COUNT
9.000000
4.000000
0.000000
1.000000
0.000000
MOD_INJ_COUNT
0.000000
2.000000
1.000000
0.000000
0.000000
MOTORCYCLE_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
PED_COUNT
0.000000
1.000000
0.000000
1.000000
1.000000
PED_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
PED_MAJ_INJ_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
PERSON_COUNT
17.000000
16.000000
3.000000
2.000000
2.000000
SMALL_TRUCK_COUNT
0.000000
1.000000
0.000000
0.000000
0.000000
SUV_COUNT
4.000000
0.000000
0.000000
0.000000
1.000000
UNBELTED_OCC_COUNT
0.000000
1.000000
0.000000
0.000000
0.000000
UNB_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
UNB_MAJ_INJ_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
UNK_INJ_DEG_COUNT
0.000000
0.000000
1.000000
0.000000
1.000000
UNK_INJ_PER_COUNT
0.000000
1.000000
0.000000
0.000000
0.000000
VAN_COUNT
0.000000
1.000000
0.000000
0.000000
0.000000
VEHICLE_COUNT
11.000000
11.000000
3.000000
1.000000
1.000000
TOTAL_COLLISION_COUNT
58.000000
60.000000
14.000000
7.000000
7.000000
In [116]:
collision_filtered_agg_count['TOTAL_COLLISION_COUNT'] = collision_filtered_agg_count[col_list].sum(axis=1)
print(collision_filtered_agg_count.shape)
collision_filtered_agg_count.head().T
(17599, 34)
Out[116]:
0
1
2
3
4
SEG_ID
960283
541001
760684
300613
640755
LENGTH
439.229
778.344
170.622
194.532
396.315
CRASH_MONTH
8
8
8
8
8
AUTOMOBILE_COUNT
0
1
1
1
0
BELTED_DEATH_COUNT
0
0
0
0
0
BELTED_MAJ_INJ_COUNT
0
0
0
0
0
BICYCLE_COUNT
0
1
1
0
0
BICYCLE_DEATH_COUNT
0
0
0
0
0
BICYCLE_MAJ_INJ_COUNT
0
0
0
0
0
BUS_COUNT
0
0
0
0
0
COMM_VEH_COUNT
1
0
0
0
0
FATAL_COUNT
0
0
0
0
0
HEAVY_TRUCK_COUNT
1
0
0
0
0
INJURY_COUNT
5
1
1
1
1
MAJ_INJ_COUNT
0
0
0
0
0
MCYCLE_DEATH_COUNT
0
0
0
0
0
MCYCLE_MAJ_INJ_COUNT
0
0
0
0
0
MIN_INJ_COUNT
5
1
0
1
0
MOD_INJ_COUNT
0
0
1
0
0
MOTORCYCLE_COUNT
0
0
0
0
0
PED_COUNT
0
0
0
1
1
PED_DEATH_COUNT
0
0
0
0
0
PED_MAJ_INJ_COUNT
0
0
0
0
0
PERSON_COUNT
7
3
2
2
2
SMALL_TRUCK_COUNT
0
0
0
0
0
SUV_COUNT
2
0
0
0
1
UNBELTED_OCC_COUNT
0
0
0
0
0
UNB_DEATH_COUNT
0
0
0
0
0
UNB_MAJ_INJ_COUNT
0
0
0
0
0
UNK_INJ_DEG_COUNT
0
0
0
0
1
UNK_INJ_PER_COUNT
0
0
0
0
0
VAN_COUNT
0
0
0
0
0
VEHICLE_COUNT
3
1
1
1
1
TOTAL_COLLISION_COUNT
24
8
7
7
7
In [18]:
collision_filtered_agg_count[collision_filtered_agg_count['SEG_ID']==960283].T
Out[18]:
0
4125
4720
SEG_ID
960283
960283
960283
LENGTH
439.229
439.229
439.229
CRASH_MONTH
8
10
1
AUTOMOBILE_COUNT
0
3
1
BELTED_DEATH_COUNT
0
0
0
BELTED_MAJ_INJ_COUNT
0
0
0
BICYCLE_COUNT
0
0
0
BICYCLE_DEATH_COUNT
0
0
0
BICYCLE_MAJ_INJ_COUNT
0
0
0
BUS_COUNT
0
0
0
COMM_VEH_COUNT
1
0
0
FATAL_COUNT
0
0
0
HEAVY_TRUCK_COUNT
1
0
0
INJURY_COUNT
5
2
0
MAJ_INJ_COUNT
0
0
0
MCYCLE_DEATH_COUNT
0
0
0
MCYCLE_MAJ_INJ_COUNT
0
0
0
MIN_INJ_COUNT
5
2
0
MOD_INJ_COUNT
0
0
0
MOTORCYCLE_COUNT
0
0
0
PED_COUNT
0
0
0
PED_DEATH_COUNT
0
0
0
PED_MAJ_INJ_COUNT
0
0
0
PERSON_COUNT
7
5
2
SMALL_TRUCK_COUNT
0
0
0
SUV_COUNT
2
2
0
UNBELTED_OCC_COUNT
0
0
0
UNB_DEATH_COUNT
0
0
0
UNB_MAJ_INJ_COUNT
0
0
0
UNK_INJ_DEG_COUNT
0
0
0
UNK_INJ_PER_COUNT
0
0
0
VAN_COUNT
0
0
0
VEHICLE_COUNT
3
5
1
TOTAL_COLLISION_COUNT
24
19
4
In [117]:
for column in collision_filtered_agg_count.columns:
if('_COUNT' in column):
col_len = column + '/LEN'
collision_filtered_agg_count[col_len] = collision_filtered_agg_count[column]/collision_filtered_agg_count.LENGTH
normalize(column, collision_filtered_agg_count)
normalize(col_len, collision_filtered_agg_count)
In [118]:
print(collision_filtered_agg_count.shape)
print(collision_filtered_agg_count.drop_duplicates().shape)
collision_filtered_agg_count[collision_filtered_agg_count['SEG_ID']==960283].T
(17599, 127)
(17599, 127)
Out[118]:
0
4125
4720
SEG_ID
960283
960283
960283
LENGTH
439.229
439.229
439.229
CRASH_MONTH
8
10
1
AUTOMOBILE_COUNT
0
3
1
BELTED_DEATH_COUNT
0
0
0
BELTED_MAJ_INJ_COUNT
0
0
0
BICYCLE_COUNT
0
0
0
BICYCLE_DEATH_COUNT
0
0
0
BICYCLE_MAJ_INJ_COUNT
0
0
0
BUS_COUNT
0
0
0
COMM_VEH_COUNT
1
0
0
FATAL_COUNT
0
0
0
HEAVY_TRUCK_COUNT
1
0
0
INJURY_COUNT
5
2
0
MAJ_INJ_COUNT
0
0
0
MCYCLE_DEATH_COUNT
0
0
0
MCYCLE_MAJ_INJ_COUNT
0
0
0
MIN_INJ_COUNT
5
2
0
MOD_INJ_COUNT
0
0
0
MOTORCYCLE_COUNT
0
0
0
PED_COUNT
0
0
0
PED_DEATH_COUNT
0
0
0
PED_MAJ_INJ_COUNT
0
0
0
PERSON_COUNT
7
5
2
SMALL_TRUCK_COUNT
0
0
0
SUV_COUNT
2
2
0
UNBELTED_OCC_COUNT
0
0
0
UNB_DEATH_COUNT
0
0
0
UNB_MAJ_INJ_COUNT
0
0
0
UNK_INJ_DEG_COUNT
0
0
0
...
...
...
...
SMALL_TRUCK_COUNT/LEN
0
0
0
SMALL_TRUCK_COUNT_norm
0
0
0
SMALL_TRUCK_COUNT/LEN_norm
0
0
0
SUV_COUNT/LEN
0.00455343
0.00455343
0
SUV_COUNT_norm
0.41629
0.41629
0
SUV_COUNT/LEN_norm
0.0389145
0.0389145
0
UNBELTED_OCC_COUNT/LEN
0
0
0
UNBELTED_OCC_COUNT_norm
0
0
0
UNBELTED_OCC_COUNT/LEN_norm
0
0
0
UNB_DEATH_COUNT/LEN
0
0
0
UNB_DEATH_COUNT_norm
0
0
0
UNB_DEATH_COUNT/LEN_norm
0
0
0
UNB_MAJ_INJ_COUNT/LEN
0
0
0
UNB_MAJ_INJ_COUNT_norm
0
0
0
UNB_MAJ_INJ_COUNT/LEN_norm
0
0
0
UNK_INJ_DEG_COUNT/LEN
0
0
0
UNK_INJ_DEG_COUNT_norm
0
0
0
UNK_INJ_DEG_COUNT/LEN_norm
0
0
0
UNK_INJ_PER_COUNT/LEN
0
0
0
UNK_INJ_PER_COUNT_norm
0
0
0
UNK_INJ_PER_COUNT/LEN_norm
0
0
0
VAN_COUNT/LEN
0
0
0
VAN_COUNT_norm
0
0
0
VAN_COUNT/LEN_norm
0
0
0
VEHICLE_COUNT/LEN
0.00683015
0.0113836
0.00227672
VEHICLE_COUNT_norm
0.35085
0.453467
0.175425
VEHICLE_COUNT/LEN_norm
0.0291109
0.0484087
0.00972568
TOTAL_COLLISION_COUNT/LEN
0.0546412
0.0432576
0.00910687
TOTAL_COLLISION_COUNT_norm
0.591442
0.550441
0.295721
TOTAL_COLLISION_COUNT/LEN_norm
0.066306
0.0527801
0.0112989
127 rows × 3 columns
In [184]:
[col for col in collision_filtered_agg_count.columns if 'LEN_norm' in col]
Out[184]:
['AUTOMOBILE_COUNT/LEN_norm',
'BELTED_DEATH_COUNT/LEN_norm',
'BELTED_MAJ_INJ_COUNT/LEN_norm',
'BICYCLE_COUNT/LEN_norm',
'BICYCLE_DEATH_COUNT/LEN_norm',
'BICYCLE_MAJ_INJ_COUNT/LEN_norm',
'BUS_COUNT/LEN_norm',
'COMM_VEH_COUNT/LEN_norm',
'FATAL_COUNT/LEN_norm',
'HEAVY_TRUCK_COUNT/LEN_norm',
'INJURY_COUNT/LEN_norm',
'MAJ_INJ_COUNT/LEN_norm',
'MCYCLE_DEATH_COUNT/LEN_norm',
'MCYCLE_MAJ_INJ_COUNT/LEN_norm',
'MIN_INJ_COUNT/LEN_norm',
'MOD_INJ_COUNT/LEN_norm',
'MOTORCYCLE_COUNT/LEN_norm',
'PED_COUNT/LEN_norm',
'PED_DEATH_COUNT/LEN_norm',
'PED_MAJ_INJ_COUNT/LEN_norm',
'PERSON_COUNT/LEN_norm',
'SMALL_TRUCK_COUNT/LEN_norm',
'SUV_COUNT/LEN_norm',
'UNBELTED_OCC_COUNT/LEN_norm',
'UNB_DEATH_COUNT/LEN_norm',
'UNB_MAJ_INJ_COUNT/LEN_norm',
'UNK_INJ_DEG_COUNT/LEN_norm',
'UNK_INJ_PER_COUNT/LEN_norm',
'VAN_COUNT/LEN_norm',
'VEHICLE_COUNT/LEN_norm',
'TOTAL_COLLISION_COUNT/LEN_norm']
In [186]:
collision_filtered_agg_count[['AUTOMOBILE_COUNT', 'BICYCLE_COUNT', 'BUS_COUNT', 'COMM_VEH_COUNT', 'HEAVY_TRUCK_COUNT', 'MOTORCYCLE_COUNT', 'SMALL_TRUCK_COUNT', 'SUV_COUNT', 'VAN_COUNT', 'VEHICLE_COUNT']].T
Out[186]:
0
1
2
3
4
5
6
7
8
9
...
17589
17590
17591
17592
17593
17594
17595
17596
17597
17598
AUTOMOBILE_COUNT
0.0
1.0
1.0
1.0
0.0
1.0
2.0
2.0
1.0
0.0
...
0.0
1.0
2.0
1.0
0.0
0.0
0.0
1.0
2.0
0.0
BICYCLE_COUNT
0.0
1.0
1.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
BUS_COUNT
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
COMM_VEH_COUNT
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
HEAVY_TRUCK_COUNT
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
MOTORCYCLE_COUNT
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
...
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
SMALL_TRUCK_COUNT
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
SUV_COUNT
2.0
0.0
0.0
0.0
1.0
0.0
1.0
0.0
0.0
2.0
...
1.0
1.0
0.0
0.0
0.0
0.0
2.0
1.0
0.0
0.0
VAN_COUNT
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
...
0.0
0.0
0.0
0.0
1.0
1.0
0.0
0.0
0.0
1.0
VEHICLE_COUNT
3.0
1.0
1.0
1.0
1.0
2.0
3.0
2.0
2.0
2.0
...
2.0
2.0
2.0
2.0
1.0
1.0
2.0
3.0
2.0
1.0
10 rows × 17599 columns
In [188]:
collision_filtered_agg_count[['PED_COUNT', 'PED_DEATH_COUNT', 'PED_MAJ_INJ_COUNT', 'PERSON_COUNT']].T
Out[188]:
0
1
2
3
4
5
6
7
8
9
...
17589
17590
17591
17592
17593
17594
17595
17596
17597
17598
PED_COUNT
0.0
0.0
0.0
1.0
1.0
0.0
0.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
1.0
PED_DEATH_COUNT
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
PED_MAJ_INJ_COUNT
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
PERSON_COUNT
7.0
3.0
2.0
2.0
2.0
3.0
6.0
4.0
2.0
2.0
...
2.0
5.0
2.0
2.0
1.0
2.0
2.0
2.0
2.0
2.0
4 rows × 17599 columns
In [23]:
def normalize(col_name, df):
col_name_norm = col_name + '_norm'
df[col_name_norm] = df[col_name]+1
df[col_name_norm] = np.log2(df[col_name_norm])
df[col_name_norm] = df[col_name_norm]/df[col_name_norm].max()
In [34]:
for column in collision_agg_count.columns:
if('_COUNT' in column):
col_len = column + '/LEN'
collision_agg_count[col_len] = collision_agg_count[column]/collision_agg_count.LENGTH
normalize(column, collision_agg_count)
normalize(col_len, collision_agg_count)
In [35]:
print(collision_agg_count.shape)
print(collision_agg_count.drop_duplicates().shape)
(13206, 126)
(13206, 126)
In [119]:
print(collision_filtered_agg_count.shape)
print(collision_filtered_agg_count.drop_duplicates().shape)
print(collision_filtered_agg_count.dropna().shape)
(17599, 127)
(17599, 127)
(17587, 127)
In [122]:
print(set(collision_filtered_agg_count.columns)-set(collision_filtered_agg_count.dropna().columns))
set()
In [120]:
collision_filtered_agg_count[pd.isnull(collision_filtered_agg_count.CRASH_MONTH)==1].shape
Out[120]:
(0, 127)
In [121]:
collision_filtered_agg_count.head().T
Out[121]:
0
1
2
3
4
SEG_ID
960283
541001
760684
300613
640755
LENGTH
439.229
778.344
170.622
194.532
396.315
CRASH_MONTH
8
8
8
8
8
AUTOMOBILE_COUNT
0
1
1
1
0
BELTED_DEATH_COUNT
0
0
0
0
0
BELTED_MAJ_INJ_COUNT
0
0
0
0
0
BICYCLE_COUNT
0
1
1
0
0
BICYCLE_DEATH_COUNT
0
0
0
0
0
BICYCLE_MAJ_INJ_COUNT
0
0
0
0
0
BUS_COUNT
0
0
0
0
0
COMM_VEH_COUNT
1
0
0
0
0
FATAL_COUNT
0
0
0
0
0
HEAVY_TRUCK_COUNT
1
0
0
0
0
INJURY_COUNT
5
1
1
1
1
MAJ_INJ_COUNT
0
0
0
0
0
MCYCLE_DEATH_COUNT
0
0
0
0
0
MCYCLE_MAJ_INJ_COUNT
0
0
0
0
0
MIN_INJ_COUNT
5
1
0
1
0
MOD_INJ_COUNT
0
0
1
0
0
MOTORCYCLE_COUNT
0
0
0
0
0
PED_COUNT
0
0
0
1
1
PED_DEATH_COUNT
0
0
0
0
0
PED_MAJ_INJ_COUNT
0
0
0
0
0
PERSON_COUNT
7
3
2
2
2
SMALL_TRUCK_COUNT
0
0
0
0
0
SUV_COUNT
2
0
0
0
1
UNBELTED_OCC_COUNT
0
0
0
0
0
UNB_DEATH_COUNT
0
0
0
0
0
UNB_MAJ_INJ_COUNT
0
0
0
0
0
UNK_INJ_DEG_COUNT
0
0
0
0
1
...
...
...
...
...
...
SMALL_TRUCK_COUNT/LEN
0
0
0
0
0
SMALL_TRUCK_COUNT_norm
0
0
0
0
0
SMALL_TRUCK_COUNT/LEN_norm
0
0
0
0
0
SUV_COUNT/LEN
0.00455343
0
0
0
0.00252325
SUV_COUNT_norm
0.41629
0
0
0
0.26265
SUV_COUNT/LEN_norm
0.0389145
0
0
0
0.021586
UNBELTED_OCC_COUNT/LEN
0
0
0
0
0
UNBELTED_OCC_COUNT_norm
0
0
0
0
0
UNBELTED_OCC_COUNT/LEN_norm
0
0
0
0
0
UNB_DEATH_COUNT/LEN
0
0
0
0
0
UNB_DEATH_COUNT_norm
0
0
0
0
0
UNB_DEATH_COUNT/LEN_norm
0
0
0
0
0
UNB_MAJ_INJ_COUNT/LEN
0
0
0
0
0
UNB_MAJ_INJ_COUNT_norm
0
0
0
0
0
UNB_MAJ_INJ_COUNT/LEN_norm
0
0
0
0
0
UNK_INJ_DEG_COUNT/LEN
0
0
0
0
0.00252325
UNK_INJ_DEG_COUNT_norm
0
0
0
0
0.224244
UNK_INJ_DEG_COUNT/LEN_norm
0
0
0
0
0.0287781
UNK_INJ_PER_COUNT/LEN
0
0
0
0
0
UNK_INJ_PER_COUNT_norm
0
0
0
0
0
UNK_INJ_PER_COUNT/LEN_norm
0
0
0
0
0
VAN_COUNT/LEN
0
0
0
0
0
VAN_COUNT_norm
0
0
0
0
0
VAN_COUNT/LEN_norm
0
0
0
0
0
VEHICLE_COUNT/LEN
0.00683015
0.00128478
0.00586093
0.00514055
0.00252325
VEHICLE_COUNT_norm
0.35085
0.175425
0.175425
0.175425
0.175425
VEHICLE_COUNT/LEN_norm
0.0291109
0.00549104
0.024992
0.0219281
0.0107775
TOTAL_COLLISION_COUNT/LEN
0.0546412
0.0102782
0.0410265
0.0359839
0.0176627
TOTAL_COLLISION_COUNT_norm
0.591442
0.403722
0.38208
0.38208
0.38208
TOTAL_COLLISION_COUNT/LEN_norm
0.066306
0.0127448
0.0501118
0.04406
0.0218216
127 rows × 5 columns
In [36]:
collision_agg_count.head().T
Out[36]:
0
1
2
3
4
SEG_ID
960283.000000
541001.000000
760684.000000
300613.000000
640755.000000
LENGTH
439.229039
778.344098
170.621503
194.531579
396.314637
AUTOMOBILE_COUNT
6.000000
9.000000
3.000000
1.000000
0.000000
BELTED_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
BELTED_MAJ_INJ_COUNT
0.000000
2.000000
0.000000
0.000000
0.000000
BICYCLE_COUNT
0.000000
1.000000
1.000000
0.000000
0.000000
BICYCLE_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
BICYCLE_MAJ_INJ_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
BUS_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
COMM_VEH_COUNT
1.000000
0.000000
0.000000
0.000000
0.000000
FATAL_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
HEAVY_TRUCK_COUNT
1.000000
0.000000
0.000000
0.000000
0.000000
INJURY_COUNT
9.000000
8.000000
2.000000
1.000000
1.000000
MAJ_INJ_COUNT
0.000000
2.000000
0.000000
0.000000
0.000000
MCYCLE_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
MCYCLE_MAJ_INJ_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
MIN_INJ_COUNT
9.000000
4.000000
0.000000
1.000000
0.000000
MOD_INJ_COUNT
0.000000
2.000000
1.000000
0.000000
0.000000
MOTORCYCLE_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
PED_COUNT
0.000000
1.000000
0.000000
1.000000
1.000000
PED_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
PED_MAJ_INJ_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
PERSON_COUNT
17.000000
16.000000
3.000000
2.000000
2.000000
SMALL_TRUCK_COUNT
0.000000
1.000000
0.000000
0.000000
0.000000
SUV_COUNT
4.000000
0.000000
0.000000
0.000000
1.000000
UNBELTED_OCC_COUNT
0.000000
1.000000
0.000000
0.000000
0.000000
UNB_DEATH_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
UNB_MAJ_INJ_COUNT
0.000000
0.000000
0.000000
0.000000
0.000000
UNK_INJ_DEG_COUNT
0.000000
0.000000
1.000000
0.000000
1.000000
UNK_INJ_PER_COUNT
0.000000
1.000000
0.000000
0.000000
0.000000
...
...
...
...
...
...
SMALL_TRUCK_COUNT/LEN
0.000000
0.001285
0.000000
0.000000
0.000000
SMALL_TRUCK_COUNT_norm
0.000000
0.173765
0.000000
0.000000
0.000000
SMALL_TRUCK_COUNT/LEN_norm
0.000000
0.010017
0.000000
0.000000
0.000000
SUV_COUNT/LEN
0.009107
0.000000
0.000000
0.000000
0.002523
SUV_COUNT_norm
0.330128
0.000000
0.000000
0.000000
0.142178
SUV_COUNT/LEN_norm
0.032025
0.000000
0.000000
0.000000
0.008902
UNBELTED_OCC_COUNT/LEN
0.000000
0.001285
0.000000
0.000000
0.000000
UNBELTED_OCC_COUNT_norm
0.000000
0.182088
0.000000
0.000000
0.000000
UNBELTED_OCC_COUNT/LEN_norm
0.000000
0.007687
0.000000
0.000000
0.000000
UNB_DEATH_COUNT/LEN
0.000000
0.000000
0.000000
0.000000
0.000000
UNB_DEATH_COUNT_norm
0.000000
0.000000
0.000000
0.000000
0.000000
UNB_DEATH_COUNT/LEN_norm
0.000000
0.000000
0.000000
0.000000
0.000000
UNB_MAJ_INJ_COUNT/LEN
0.000000
0.000000
0.000000
0.000000
0.000000
UNB_MAJ_INJ_COUNT_norm
0.000000
0.000000
0.000000
0.000000
0.000000
UNB_MAJ_INJ_COUNT/LEN_norm
0.000000
0.000000
0.000000
0.000000
0.000000
UNK_INJ_DEG_COUNT/LEN
0.000000
0.000000
0.005861
0.000000
0.002523
UNK_INJ_DEG_COUNT_norm
0.000000
0.000000
0.160053
0.000000
0.160053
UNK_INJ_DEG_COUNT/LEN_norm
0.000000
0.000000
0.015892
0.000000
0.006853
UNK_INJ_PER_COUNT/LEN
0.000000
0.001285
0.000000
0.000000
0.000000
UNK_INJ_PER_COUNT_norm
0.000000
0.187902
0.000000
0.000000
0.000000
UNK_INJ_PER_COUNT/LEN_norm
0.000000
0.006881
0.000000
0.000000
0.000000
VAN_COUNT/LEN
0.000000
0.001285
0.000000
0.000000
0.000000
VAN_COUNT_norm
0.000000
0.227670
0.000000
0.000000
0.000000
VAN_COUNT/LEN_norm
0.000000
0.018292
0.000000
0.000000
0.000000
VEHICLE_COUNT/LEN
0.025044
0.014133
0.017583
0.005141
0.002523
VEHICLE_COUNT_norm
0.389064
0.389064
0.217053
0.108527
0.108527
VEHICLE_COUNT/LEN_norm
0.025339
0.014376
0.017856
0.005253
0.002582
TOTAL_COLLISION_COUNT/LEN
0.132050
0.077087
0.082053
0.035984
0.017663
TOTAL_COLLISION_COUNT_norm
0.520493
0.524748
0.345679
0.265438
0.265438
TOTAL_COLLISION_COUNT/LEN_norm
0.055549
0.033259
0.035319
0.015833
0.007842
126 rows × 5 columns
In [38]:
collision_agg_count.dropna().to_csv('../philly/Philly_Collision_Details.csv')
In [123]:
collision_filtered_agg_count.dropna().to_csv('../philly/Philly_Collision_Details_2013-14.csv')
In [48]:
parking_vio = pd.read_csv('../philly/Parking_Violations.csv')
In [49]:
print(parking_vio.shape)
parking_vio.head().T
(6863434, 11)
Out[49]:
0
1
2
3
4
Issue Date and Time
01/01/2012 12:00:00 AM
01/01/2012 12:00:00 AM
01/01/2012 12:00:00 AM
01/01/2012 12:01:00 AM
01/01/2012 12:07:00 AM
State
PA
PA
PA
TX
PA
Plate ID
1048932
679582
1082165
1720567
1172258
Division
0
9
5
2
63
Location
1000 VINE
400 S 25TH
7016 HENRY AVE
300 DEVEREAU
450 N 6TH
Location Standardized
1000 VINE ST, 19107
400 S 25TH ST, 19146
7016 HENRY AVE, 19128
NaN
450 N 6TH ST, 19123
Coordinates
(39.9568850255,-75.1563742867)
(39.9473721746,-75.1814961216)
(40.0483515634,-75.2185249487)
NaN
(39.9598882869,-75.1484695504)
Violation Description
PARKING PROHBITED CC
SCHOOL ZONE
SIDEWALK
BLOCKING DRIVEWAY
PARKING PROHBITED CC
Fine
$51.00
$36.00
$51.00
$51.00
$51.00
Issuing Agency
POLICE
POLICE
POLICE
POLICE
POLICE
Violation Location
(39.9568850255, -75.1563742867)
(39.9473721746, -75.1814961216)
(40.0483515634, -75.2185249487)
NaN
(39.9598882869, -75.1484695504)
In [52]:
parking_vio = parking_vio.dropna(subset=['Coordinates'])
print(parking_vio.shape)
parking_vio.head().T
(6522895, 11)
Out[52]:
0
1
2
4
5
Issue Date and Time
01/01/2012 12:00:00 AM
01/01/2012 12:00:00 AM
01/01/2012 12:00:00 AM
01/01/2012 12:07:00 AM
01/01/2012 12:07:00 AM
State
PA
PA
PA
PA
PA
Plate ID
1048932
679582
1082165
1172258
978500
Division
0
9
5
63
16
Location
1000 VINE
400 S 25TH
7016 HENRY AVE
450 N 6TH
1000 N 42ND
Location Standardized
1000 VINE ST, 19107
400 S 25TH ST, 19146
7016 HENRY AVE, 19128
450 N 6TH ST, 19123
1000 N 42ND ST, 19104
Coordinates
(39.9568850255,-75.1563742867)
(39.9473721746,-75.1814961216)
(40.0483515634,-75.2185249487)
(39.9598882869,-75.1484695504)
(39.9722105836,-75.2086555318)
Violation Description
PARKING PROHBITED CC
SCHOOL ZONE
SIDEWALK
PARKING PROHBITED CC
DOUBLE PARKED
Fine
$51.00
$36.00
$51.00
$51.00
$51.00
Issuing Agency
POLICE
POLICE
POLICE
POLICE
POLICE
Violation Location
(39.9568850255, -75.1563742867)
(39.9473721746, -75.1814961216)
(40.0483515634, -75.2185249487)
(39.9598882869, -75.1484695504)
(39.9722105836, -75.2086555318)
In [55]:
parking_vio['Violation Location'][0]
Out[55]:
'(39.9568850255, -75.1563742867)'
In [60]:
parking_vio['Coordinates'][0].replace('(', '').replace(')', '').split(',')[0]
Out[60]:
'39.9568850255'
In [57]:
parking_vio.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 6522895 entries, 0 to 6863433
Data columns (total 11 columns):
Issue Date and Time object
State object
Plate ID int64
Division float64
Location object
Location Standardized object
Coordinates object
Violation Description object
Fine object
Issuing Agency object
Violation Location object
dtypes: float64(1), int64(1), object(9)
memory usage: 597.2+ MB
In [66]:
points = [Point(float(row['Coordinates'].replace('(', '').replace(')', '').split(',')[1]), float(row['Coordinates'].replace('(', '').replace(')', '').split(',')[0])) for key, row in parking_vio.iterrows()]
geo_df = GeoDataFrame(parking_vio,geometry=points)
geo_df.to_file('../philly/Parking_Violations.geojson', driver='GeoJSON')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-66-1df58c4ba068> in <module>()
1 points = [Point(float(row['Coordinates'].replace('(', '').replace(')', '').split(',')[1]), float(row['Coordinates'].replace('(', '').replace(')', '').split(',')[0])) for key, row in parking_vio.iterrows()]
2 geo_df = GeoDataFrame(parking_vio,geometry=points)
----> 3 geo_df.to_file('../philly/Parking_Violations.geojson', driver='GeoJSON')
//anaconda/lib/python3.5/site-packages/geopandas/geodataframe.py in to_file(self, filename, driver, schema, **kwargs)
341 """
342 from geopandas.io.file import to_file
--> 343 to_file(self, filename, driver, schema, **kwargs)
344
345 def to_crs(self, crs=None, epsg=None, inplace=False):
//anaconda/lib/python3.5/site-packages/geopandas/io/file.py in to_file(df, filename, driver, schema, **kwargs)
59 filename = os.path.abspath(os.path.expanduser(filename))
60 with fiona.open(filename, 'w', driver=driver, crs=df.crs,
---> 61 schema=schema, **kwargs) as c:
62 for feature in df.iterfeatures():
63 c.write(feature)
//anaconda/lib/python3.5/site-packages/fiona/__init__.py in open(path, mode, driver, schema, crs, encoding, layer, vfs, enabled_drivers, crs_wkt)
176 c = Collection(path, mode, crs=crs, driver=driver, schema=this_schema,
177 encoding=encoding, layer=layer, vsi=vsi, archive=archive,
--> 178 enabled_drivers=enabled_drivers, crs_wkt=crs_wkt)
179 else:
180 raise ValueError(
//anaconda/lib/python3.5/site-packages/fiona/collection.py in __init__(self, path, mode, driver, schema, crs, encoding, layer, vsi, archive, enabled_drivers, crs_wkt, **kwargs)
153 self.encoding = encoding
154 self.session = WritingSession()
--> 155 self.session.start(self, **kwargs)
156 self.encoding = encoding or self.session.get_fileencoding().lower()
157
fiona/ogrext.pyx in fiona.ogrext.WritingSession.start (fiona/ogrext.c:15539)()
ValueError: Null layer
In [ ]:
parking_vio_gpd = gpd.read_file('../philly/Parking_Violations.geojson')
print(parking_vio_gpd.shape)
parking_vio.head().T
In [67]:
geo_df.shape
Out[67]:
(6522895, 12)
In [68]:
geo_df.head().T
Out[68]:
0
1
2
4
5
Issue Date and Time
01/01/2012 12:00:00 AM
01/01/2012 12:00:00 AM
01/01/2012 12:00:00 AM
01/01/2012 12:07:00 AM
01/01/2012 12:07:00 AM
State
PA
PA
PA
PA
PA
Plate ID
1048932
679582
1082165
1172258
978500
Division
0
9
5
63
16
Location
1000 VINE
400 S 25TH
7016 HENRY AVE
450 N 6TH
1000 N 42ND
Location Standardized
1000 VINE ST, 19107
400 S 25TH ST, 19146
7016 HENRY AVE, 19128
450 N 6TH ST, 19123
1000 N 42ND ST, 19104
Coordinates
(39.9568850255,-75.1563742867)
(39.9473721746,-75.1814961216)
(40.0483515634,-75.2185249487)
(39.9598882869,-75.1484695504)
(39.9722105836,-75.2086555318)
Violation Description
PARKING PROHBITED CC
SCHOOL ZONE
SIDEWALK
PARKING PROHBITED CC
DOUBLE PARKED
Fine
$51.00
$36.00
$51.00
$51.00
$51.00
Issuing Agency
POLICE
POLICE
POLICE
POLICE
POLICE
Violation Location
(39.9568850255, -75.1563742867)
(39.9473721746, -75.1814961216)
(40.0483515634, -75.2185249487)
(39.9598882869, -75.1484695504)
(39.9722105836, -75.2086555318)
geometry
POINT (-75.1563742867 39.9568850255)
POINT (-75.18149612160001 39.9473721746)
POINT (-75.21852494869999 40.0483515634)
POINT (-75.14846955039999 39.9598882869)
POINT (-75.20865553180001 39.9722105836)
In [69]:
geo_df[['geometry']].dropna().shape
Out[69]:
(6522895, 1)
In [ ]:
geo_df.to_file('../philly/GeoJSON_data/Parking_Violations.geojson', driver='GeoJSON')
In [ ]:
philly_seg_buffer_gp = street_gpd[['SEG_ID','geometry']].copy()
philly_seg_buffer_gp.geometry = philly_seg_buffer_gp.buffer(0.0001)
park_vio_buffer_gp = geo_df[['Plate ID','Coordinates','geometry']].dropna().copy()
park_vio_buffer_gp.geometry = park_vio_buffer_gp.buffer(0.0001)
joined = sjoin(park_vio_buffer_gp, philly_seg_buffer_gp, how='left', op='intersects')
gb = joined.reset_index().groupby(['Plate ID', 'Coordinates']).count()
print(gb[gb.SEG_ID==0].shape[0], 'points spatially match no segment')
print(gb[gb.SEG_ID==1].shape[0], 'points spatially match 1 segment')
print(gb[gb.SEG_ID>1].shape[0], 'points spatially match multiple segments')
Exception ignored in: <bound method Collection.__del__ of <open Collection '/Users/kiranjavkar/Documents/Cycling Safety/philly/Parking_Violations.geojson:OgrGeoJSON', mode 'w' at 0x3e6aac438>>
Traceback (most recent call last):
File "//anaconda/lib/python3.5/site-packages/fiona/collection.py", line 416, in __del__
self.__exit__(None, None, None)
File "//anaconda/lib/python3.5/site-packages/fiona/collection.py", line 411, in __exit__
self.close()
File "//anaconda/lib/python3.5/site-packages/fiona/collection.py", line 395, in close
self.flush()
File "//anaconda/lib/python3.5/site-packages/fiona/collection.py", line 384, in flush
if self.session is not None and self.session.get_length() > 0:
File "fiona/ogrext.pyx", line 495, in fiona.ogrext.Session.get_length (fiona/ogrext.c:9385)
ValueError: Null layer
In [127]:
bike_network = pd.read_csv('../philly/Bike_Network.csv')
print(bike_network.shape)
bike_network.head().T
(4065, 7)
Out[127]:
0
1
2
3
4
OBJECTID
1
2
3
4
5
SEG_ID
420562
420568
420554
421801
421802
STREETNAME
SPRING GARDEN ST
SPRING GARDEN ST
SPRING GARDEN ST
N 21ST ST
N 21ST ST
ST_CODE
73800
73800
73800
88190
88190
ONEWAY
TF
TF
TF
TF
TF
CLASS
2
2
2
4
4
TYPE
Conventional
Conventional
Conventional
Sharrow
Sharrow
In [126]:
street_gpd = gpd.read_file('../philly/GeoJSON_data/Street_Centerline_qgis3.geojson')
In [128]:
bike_lane = bike_network.groupby('SEG_ID').size().reset_index()
bike_lane.columns = ['SEG_ID', 'BIKELANE']
print(bike_lane.shape)
bike_lane.head().T
(4065, 2)
Out[128]:
0
1
2
3
4
SEG_ID
100002
100003
100004
100045
100066
BIKELANE
1
1
1
1
1
In [129]:
street_gpd = street_gpd.merge(bike_lane, left_on='SEG_ID', right_on='SEG_ID', right_index=False, how='left')
In [130]:
street_gpd.BIKELANE = street_gpd.BIKELANE.fillna(0)
print(street_gpd.shape)
street_gpd.head().T
(41022, 32)
Out[130]:
0
1
2
3
4
CLASS
3
3
3
5
4
FNODE_
2
2
1
6
5
LENGTH
449.863
540.083
446.104
447.261
148.216
LPOLY_
0
0
0
0
0
L_F_ADD
1500
400
1600
1600
350
L_HUNDRED
1500
400
1600
1600
300
L_T_ADD
1598
498
1698
1698
398
MULTI_REP
0
0
0
0
0
NEWSEGDATE
None
None
None
None
None
ONEWAY
FT
TF
FT
TF
TF
PRE_DIR
None
N
None
None
N
RESPONSIBL
FAM
FAM
FAM
None
None
RPOLY_
0
0
0
0
0
R_F_ADD
1501
401
1601
1601
351
R_HUNDRED
1500
400
1600
1600
300
R_T_ADD
1599
499
1699
1699
399
SEG_ID
420708
422065
420702
420732
420718
SHAPE_LEN
449.863
540.083
446.104
447.261
148.216
STCL2_
1
2
3
4
5
STCL2_ID
85205
86540
85199
85229
85215
STNAME
CALLOWHILL ST
N 15TH ST
CALLOWHILL ST
CARLTON ST
N 17TH ST
STREETLABE
CALLOWHILL ST
N 15TH ST
CALLOWHILL ST
CARLTON ST
N 17TH ST
ST_CODE
20880
88070
20880
21440
88110
ST_NAME
CALLOWHILL
15TH
CALLOWHILL
CARLTON
17TH
ST_TYPE
ST
ST
ST
ST
ST
SUF_DIR
None
None
None
None
None
TNODE_
1
3
4
5
4
UPDATE_
1997-02-10
1997-02-10
1997-02-10
1997-02-10
1997-06-27
ZIP_LEFT
19130
19130
19130
19103
19103
ZIP_RIGHT
19130
19130
19130
19103
19103
geometry
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16529687686355 39.960013687935...
LINESTRING (-75.16537539004391 39.959601782423...
LINESTRING (-75.16694872703492 39.959805718080...
BIKELANE
0
0
0
0
0
In [149]:
unique(street_gpd.BIKELANE)
Out[149]:
array([ 0., 1.])
In [132]:
street_gpd[street_gpd.BIKELANE==1].shape
Out[132]:
(4061, 32)
In [131]:
set(bike_lane.SEG_ID).difference(set(street_gpd.SEG_ID))
Out[131]:
{422130, 422142, 500805, 500807}
In [28]:
street_gpd[street_gpd.SEG_ID==500807]
Out[28]:
CLASS
FNODE_
LENGTH
LPOLY_
L_F_ADD
L_HUNDRED
L_T_ADD
MULTI_REP
NEWSEGDATE
ONEWAY
...
ST_CODE
ST_NAME
ST_TYPE
SUF_DIR
TNODE_
UPDATE_
ZIP_LEFT
ZIP_RIGHT
geometry
BIKELANE
0 rows × 32 columns
In [133]:
set(bike_network.ST_CODE).difference(set(street_gpd.ST_CODE))
Out[133]:
{36585}
In [134]:
bike_network[bike_network.SEG_ID==422130].ST_CODE.iloc[0]
Out[134]:
88550
In [59]:
bike_network.loc[bike_network['SEG_ID']==422130, 'ST_CODE'].iloc[0]
Out[59]:
88550
In [63]:
remaining_segids = set(bike_lane.SEG_ID).difference(set(street_gpd.SEG_ID))
for segid in remaining_segids:
print(segid)
print(street_gpd[street_gpd.ST_CODE==bike_network[bike_network.SEG_ID==segid].ST_CODE.iloc[0]].SEG_ID)
print()
422130
28908 420969
28909 420955
28914 420907
29007 420928
39237 423108
Name: SEG_ID, dtype: int64
500805
39239 500804
39986 500802
Name: SEG_ID, dtype: int64
422142
28523 421124
28995 420876
29005 420699
29324 421122
29325 420762
29838 422653
33314 422518
33319 422517
33321 422522
33422 422494
33514 420314
33616 420413
33622 420360
33623 420395
33717 421103
33818 422840
33819 422839
33836 422652
33929 422655
33933 420633
33935 422654
39153 423440
39154 423441
39285 423405
Name: SEG_ID, dtype: int64
500807
Series([], Name: SEG_ID, dtype: int64)
In [61]:
street_gpd[street_gpd.ST_CODE==bike_network[bike_network.SEG_ID==422130].ST_CODE.iloc[0]].SEG_ID
Out[61]:
28908 420969
28909 420955
28914 420907
29007 420928
39237 423108
Name: SEG_ID, dtype: int64
In [32]:
unique(bike_network.TYPE)
Out[32]:
array(['Buffered', 'Buffered w Conventional',
'Contraflow w Conventional, same', 'Conventional',
'Conventional w Sharrows', 'Sharrow'], dtype=object)
In [33]:
unique(bike_network.ONEWAY)
Out[33]:
array(['B', 'FT', 'TF'], dtype=object)
In [147]:
bike_lane_type = bike_network.groupby(['SEG_ID','TYPE']).size().reset_index()
bike_lane_type.columns = ['SEG_ID', 'TYPE', 'BIKELANE']
print(bike_lane_type.shape)
bike_lane_type.head().T
(4065, 3)
Out[147]:
0
1
2
3
4
SEG_ID
100002
100003
100004
100045
100066
TYPE
Conventional
Conventional
Conventional
Conventional
Conventional
BIKELANE
1
1
1
1
1
In [145]:
unique(bike_lane.BIKELANE)
Out[145]:
array([1])
In [148]:
unique(bike_lane_type.BIKELANE)
Out[148]:
array([1])
In [67]:
len(street_gpd.SEG_ID)
Out[67]:
41022
In [68]:
import geopandas as gpd
from geopandas.tools import sjoin
import pandas as pd
from IPython.display import display
from pandas import DataFrame
from geopandas import GeoDataFrame
from shapely.geometry import Point
In [151]:
for column in collision_filtered_agg_count.columns:
if('norm' not in column and 'LEN' not in column):
print(column)
SEG_ID
CRASH_MONTH
AUTOMOBILE_COUNT
BELTED_DEATH_COUNT
BELTED_MAJ_INJ_COUNT
BICYCLE_COUNT
BICYCLE_DEATH_COUNT
BICYCLE_MAJ_INJ_COUNT
BUS_COUNT
COMM_VEH_COUNT
FATAL_COUNT
HEAVY_TRUCK_COUNT
INJURY_COUNT
MAJ_INJ_COUNT
MCYCLE_DEATH_COUNT
MCYCLE_MAJ_INJ_COUNT
MIN_INJ_COUNT
MOD_INJ_COUNT
MOTORCYCLE_COUNT
PED_COUNT
PED_DEATH_COUNT
PED_MAJ_INJ_COUNT
PERSON_COUNT
SMALL_TRUCK_COUNT
SUV_COUNT
UNBELTED_OCC_COUNT
UNB_DEATH_COUNT
UNB_MAJ_INJ_COUNT
UNK_INJ_DEG_COUNT
UNK_INJ_PER_COUNT
VAN_COUNT
VEHICLE_COUNT
TOTAL_COLLISION_COUNT
In [153]:
street_gpd[['SEG_ID', 'BIKELANE']].shape
Out[153]:
(41022, 2)
In [154]:
unique(street_gpd[['SEG_ID', 'BIKELANE']].BIKELANE)
Out[154]:
array([ 0., 1.])
In [157]:
sum(street_gpd.BIKELANE>=1)
Out[157]:
4061
In [158]:
street_gpd[['SEG_ID', 'BIKELANE']].to_csv('../philly/Philly_Bikelanes.csv')
In [159]:
street_gpd.head().T
Out[159]:
0
1
2
3
4
CLASS
3
3
3
5
4
FNODE_
2
2
1
6
5
LENGTH
449.863
540.083
446.104
447.261
148.216
LPOLY_
0
0
0
0
0
L_F_ADD
1500
400
1600
1600
350
L_HUNDRED
1500
400
1600
1600
300
L_T_ADD
1598
498
1698
1698
398
MULTI_REP
0
0
0
0
0
NEWSEGDATE
None
None
None
None
None
ONEWAY
FT
TF
FT
TF
TF
PRE_DIR
None
N
None
None
N
RESPONSIBL
FAM
FAM
FAM
None
None
RPOLY_
0
0
0
0
0
R_F_ADD
1501
401
1601
1601
351
R_HUNDRED
1500
400
1600
1600
300
R_T_ADD
1599
499
1699
1699
399
SEG_ID
420708
422065
420702
420732
420718
SHAPE_LEN
449.863
540.083
446.104
447.261
148.216
STCL2_
1
2
3
4
5
STCL2_ID
85205
86540
85199
85229
85215
STNAME
CALLOWHILL ST
N 15TH ST
CALLOWHILL ST
CARLTON ST
N 17TH ST
STREETLABE
CALLOWHILL ST
N 15TH ST
CALLOWHILL ST
CARLTON ST
N 17TH ST
ST_CODE
20880
88070
20880
21440
88110
ST_NAME
CALLOWHILL
15TH
CALLOWHILL
CARLTON
17TH
ST_TYPE
ST
ST
ST
ST
ST
SUF_DIR
None
None
None
None
None
TNODE_
1
3
4
5
4
UPDATE_
1997-02-10
1997-02-10
1997-02-10
1997-02-10
1997-06-27
ZIP_LEFT
19130
19130
19130
19103
19103
ZIP_RIGHT
19130
19130
19130
19103
19103
geometry
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16371257852462 39.959816984389...
LINESTRING (-75.16529687686355 39.960013687935...
LINESTRING (-75.16537539004391 39.959601782423...
LINESTRING (-75.16694872703492 39.959805718080...
BIKELANE
0
0
0
0
0
In [174]:
street_gpd[['OXFORD' in stname for stname in street_gpd.STNAME]].ST_NAME
Out[174]:
3093 OXFORD
3175 OXFORD
3176 OXFORD
3178 OXFORD
3257 OXFORD
3270 OXFORD
3271 OXFORD
3275 OXFORD
3277 OXFORD
3493 OXFORD
3494 OXFORD
3967 OXFORD
4706 OXFORD
4707 OXFORD
4708 OXFORD
4709 OXFORD
4805 OXFORD
4807 OXFORD
4814 OXFORD
4816 OXFORD
4817 OXFORD
4819 OXFORD
4834 OXFORD
4835 OXFORD
4836 OXFORD
4838 OXFORD
4840 OXFORD
4841 OXFORD
4842 OXFORD
4844 OXFORD
...
31134 OXFORD
31499 OXFORD
31507 OXFORD
31645 OXFORD
31646 OXFORD
31647 OXFORD
32282 OXFORD
32284 OXFORD
33067 OXFORD
33073 OXFORD
33074 OXFORD
33160 OXFORD
33859 OXFORD
33860 OXFORD
35346 OXFORD
35347 OXFORD
35791 OXFORD
35861 OXFORD
35862 OXFORD
35866 OXFORD
35868 OXFORD
36660 OXFORD
37047 OXFORD
37110 OXFORD
37113 OXFORD
37118 OXFORD
38468 OXFORD
40531 OXFORD
40534 OXFORD
40618 OXFORD
Name: ST_NAME, dtype: object
In [ ]:
Content source: KiranJavkar/Cycling_Safety
Similar notebooks: