In [19]:
import shapefile
from pandas import DataFrame
from geopandas import GeoDataFrame
from shapely.geometry import Point
# read the shapefile
reader = shapefile.Reader("../philly/STR_Centerline/Street_Centerline.shp")
fields = reader.fields[1:]
field_names = [field[0] for field in fields]
buffer = []
for sr in reader.shapeRecords():
    atr = dict(zip(field_names, sr.record))
    geom = sr.shape.__geo_interface__
    buffer.append(dict(type="Feature", \
    geometry=geom, properties=atr))

# write the GeoJSON file
buffer = str(buffer)
from json import dumps
gjson = open("../philly/GeoJSON_data/Street_Centerline.json", "w+")
gjson.write(dumps({"type": "FeatureCollection",\
"features": buffer}, indent=2) + "\n")
gjson.close()

In [1]:
%pylab inline
import geopandas as gpd
from geopandas.tools import sjoin
import pandas as pd
from IPython.display import display
from pandas import DataFrame
from geopandas import GeoDataFrame
from shapely.geometry import Point


Populating the interactive namespace from numpy and matplotlib

In [37]:
# street_gpd_2.crs = fiona.crs.from_epsg(102729)
street_gpd_2.crs = {'init':'epsg:102729'}

In [3]:
import geojson, json

In [2]:
street_gpd = gpd.read_file('../philly/GeoJSON_data/Street_Centerline_qgis3.geojson')
street_gpd.plot()
print(street_gpd.shape)
street_gpd.head().T


(41022, 31)
Out[2]:
0 1 2 3 4
CLASS 3 3 3 5 4
FNODE_ 2 2 1 6 5
LENGTH 449.863 540.083 446.104 447.261 148.216
LPOLY_ 0 0 0 0 0
L_F_ADD 1500 400 1600 1600 350
L_HUNDRED 1500 400 1600 1600 300
L_T_ADD 1598 498 1698 1698 398
MULTI_REP 0 0 0 0 0
NEWSEGDATE None None None None None
ONEWAY FT TF FT TF TF
PRE_DIR None N None None N
RESPONSIBL FAM FAM FAM None None
RPOLY_ 0 0 0 0 0
R_F_ADD 1501 401 1601 1601 351
R_HUNDRED 1500 400 1600 1600 300
R_T_ADD 1599 499 1699 1699 399
SEG_ID 420708 422065 420702 420732 420718
SHAPE_LEN 449.863 540.083 446.104 447.261 148.216
STCL2_ 1 2 3 4 5
STCL2_ID 85205 86540 85199 85229 85215
STNAME CALLOWHILL ST N 15TH ST CALLOWHILL ST CARLTON ST N 17TH ST
STREETLABE CALLOWHILL ST N 15TH ST CALLOWHILL ST CARLTON ST N 17TH ST
ST_CODE 20880 88070 20880 21440 88110
ST_NAME CALLOWHILL 15TH CALLOWHILL CARLTON 17TH
ST_TYPE ST ST ST ST ST
SUF_DIR None None None None None
TNODE_ 1 3 4 5 4
UPDATE_ 1997-02-10 1997-02-10 1997-02-10 1997-02-10 1997-06-27
ZIP_LEFT 19130 19130 19130 19103 19103
ZIP_RIGHT 19130 19130 19130 19103 19103
geometry LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16529687686355 39.960013687935... LINESTRING (-75.16537539004391 39.959601782423... LINESTRING (-75.16694872703492 39.959805718080...

In [5]:
collision_gpd = gpd.read_file('../philly/GeoJSON_data/GIS_HEALTH.Collisions_crash_2011_2014PUBV.geojson')
print(collision_gpd.shape)


(43488, 54)

In [6]:
collision_gpd.head().T


Out[6]:
0 1 2 3 4
AUTOMOBILE_COUNT 0 1 1 1 0
BELTED_DEATH_COUNT 0 0 0 0 0
BELTED_MAJ_INJ_COUNT 0 0 0 0 0
BICYCLE_COUNT 0 1 1 0 0
BICYCLE_DEATH_COUNT 0 0 0 0 0
BICYCLE_MAJ_INJ_COUNT 0 0 0 0 0
BUS_COUNT 0 0 0 0 0
COLLISION_TYPE 1 4 4 8 8
COMM_VEH_COUNT 1 0 0 0 0
CRASH_MONTH 8 8 8 8 8
CRASH_YEAR 2014 2014 2014 2014 2014
CRN 2014087880 2014087758 2014096425 2014088141 2014091439
DAY_OF_WEEK 6 6 7 7 7
FATAL_COUNT 0 0 0 0 0
HEAVY_TRUCK_COUNT 1 0 0 0 0
HOUR_OF_DAY 15 14 19 19 19
ILLUMINATION 1 1 1 1 1
INJURY_COUNT 5 1 1 1 1
INTERSECTION 0 0 1 0 0
INTERSECT_TYPE 0 0 1 0 0
LATITUDE 40.085 39.9904 40.0293 39.9535 40.0162
LENGTH 439.229 778.344 170.622 194.532 396.315
LOCATION_TYPE 0 0 0 0 0
LONGITUDE -75.038 -75.1028 -75.0548 -75.2405 -75.0906
MAJ_INJ_COUNT 0 0 0 0 0
MAX_SEVERITY_LEVEL 4 4 3 4 8
MCYCLE_DEATH_COUNT 0 0 0 0 0
MCYCLE_MAJ_INJ_COUNT 0 0 0 0 0
MIN_INJ_COUNT 5 1 0 1 0
MOD_INJ_COUNT 0 0 1 0 0
MOTORCYCLE_COUNT 0 0 0 0 0
OBJECTID 4001 4002 4003 4004 4005
PED_COUNT 0 0 0 1 1
PED_DEATH_COUNT 0 0 0 0 0
PED_MAJ_INJ_COUNT 0 0 0 0 0
PERSON_COUNT 7 3 2 2 2
RELATION_TO_ROAD 1 1 1 1 1
ROAD_CONDITION 1 0 0 0 0
SCH_BUS_IND N N N N N
SCH_ZONE_IND N N U U N
SEG_ID 960283 541001 760684 300613 640755
SMALL_TRUCK_COUNT 0 0 0 0 0
SUV_COUNT 2 0 0 0 1
TCD_TYPE 0 0 0 0 0
TIME_OF_DAY 1500 1450 1919 1910 1905
UNBELTED_OCC_COUNT 0 0 0 0 0
UNB_DEATH_COUNT 0 0 0 0 0
UNB_MAJ_INJ_COUNT 0 0 0 0 0
UNK_INJ_DEG_COUNT 0 0 0 0 1
UNK_INJ_PER_COUNT 0 0 0 0 0
VAN_COUNT 0 0 0 0 0
VEHICLE_COUNT 3 1 1 1 1
WEATHER 2 1 1 1 1
geometry POINT (-75.03795178519658 40.08497541304385) POINT (-75.10275539598592 39.990404578965) POINT (-75.05481628915624 40.02929723040301) POINT (-75.24050521636346 39.95347871767484) POINT (-75.09056881169107 40.01618353851767)

In [21]:
collision_gpd[collision_gpd.SEG_ID==420708].T


Out[21]:
6682 11674 21114 33351 34755 36630 36938
AUTOMOBILE_COUNT 2 1 2 1 2 2 0
BELTED_DEATH_COUNT 0 0 0 0 0 0 0
BELTED_MAJ_INJ_COUNT 0 0 0 0 0 0 0
BICYCLE_COUNT 0 1 0 0 0 0 1
BICYCLE_DEATH_COUNT 0 0 0 0 0 0 0
BICYCLE_MAJ_INJ_COUNT 0 0 0 0 0 0 0
BUS_COUNT 0 0 0 0 0 0 0
COLLISION_TYPE 4 4 4 4 4 4 4
COMM_VEH_COUNT 0 0 0 0 0 0 0
CRASH_MONTH 10 10 6 1 6 10 10
CRASH_YEAR 2013 2013 2013 2012 2011 2011 2011
CRN 2013114450 2013111794 2013079452 2012015372 2011069166 2011115373 2011115250
DAY_OF_WEEK 3 6 4 1 5 4 6
FATAL_COUNT 0 0 0 0 0 0 0
HEAVY_TRUCK_COUNT 0 0 0 0 0 0 0
HOUR_OF_DAY 13 6 13 13 15 16 13
ILLUMINATION 1 1 1 1 1 1 1
INJURY_COUNT 0 1 0 0 0 3 1
INTERSECTION 1 1 1 1 1 1 0
INTERSECT_TYPE 1 1 1 1 1 1 0
LATITUDE 39.9599 39.9599 39.9599 39.9599 39.9599 39.9599 39.9599
LENGTH 449.863 449.863 449.863 449.863 449.863 449.863 449.863
LOCATION_TYPE 0 0 0 0 0 0 0
LONGITUDE -75.1645 -75.1645 -75.1645 -75.1645 -75.1645 -75.1645 -75.1645
MAJ_INJ_COUNT 0 0 0 0 0 0 0
MAX_SEVERITY_LEVEL 0 8 0 0 0 3 8
MCYCLE_DEATH_COUNT 0 0 0 0 0 0 0
MCYCLE_MAJ_INJ_COUNT 0 0 0 0 0 0 0
MIN_INJ_COUNT 0 0 0 0 0 2 0
MOD_INJ_COUNT 0 0 0 0 0 1 0
MOTORCYCLE_COUNT 0 0 0 0 0 0 0
OBJECTID 11683 7675 21115 35352 33756 37631 37939
PED_COUNT 0 0 0 0 0 0 0
PED_DEATH_COUNT 0 0 0 0 0 0 0
PED_MAJ_INJ_COUNT 0 0 0 0 0 0 0
PERSON_COUNT 3 2 2 3 4 3 2
RELATION_TO_ROAD 1 1 1 1 1 1 1
ROAD_CONDITION 0 1 0 0 0 0 0
SCH_BUS_IND N N N N N N N
SCH_ZONE_IND N U N N N U N
SEG_ID 420708 420708 420708 420708 420708 420708 420708
SMALL_TRUCK_COUNT 0 0 0 0 0 0 0
SUV_COUNT 0 0 0 1 0 0 0
TCD_TYPE 2 3 2 2 3 2 0
TIME_OF_DAY 1358 640 1300 1315 1530 1618 1345
UNBELTED_OCC_COUNT 0 0 2 0 0 0 0
UNB_DEATH_COUNT 0 0 0 0 0 0 0
UNB_MAJ_INJ_COUNT 0 0 0 0 0 0 0
UNK_INJ_DEG_COUNT 0 1 0 0 0 0 1
UNK_INJ_PER_COUNT 0 0 0 0 0 0 0
VAN_COUNT 0 0 0 0 0 0 1
VEHICLE_COUNT 2 1 2 2 2 2 1
WEATHER 1 2 1 1 1 1 1
geometry POINT (-75.16450649969386 39.95992340269341) POINT (-75.16450649969386 39.95992340269341) POINT (-75.16450649969386 39.95992340269341) POINT (-75.16450649969386 39.95992340269341) POINT (-75.16450649969386 39.95992340269341) POINT (-75.16450649969386 39.95992340269341) POINT (-75.16450649969386 39.95992340269341)

In [28]:
none_seg_count = 0
for seg in collision_gpd.SEG_ID:
    if seg is None:
        none_seg_count += 1
print(none_seg_count)


0

In [19]:
for col_name in collision_gpd.columns:
    if ('_COUNT' in col_name) or ('_TYPE' in col_name) or ('CRASH' in col_name) or (col_name == 'SEG_ID'):
        print(col_name)


AUTOMOBILE_COUNT
BELTED_DEATH_COUNT
BELTED_MAJ_INJ_COUNT
BICYCLE_COUNT
BICYCLE_DEATH_COUNT
BICYCLE_MAJ_INJ_COUNT
BUS_COUNT
COLLISION_TYPE
COMM_VEH_COUNT
CRASH_MONTH
CRASH_YEAR
FATAL_COUNT
HEAVY_TRUCK_COUNT
INJURY_COUNT
INTERSECT_TYPE
LOCATION_TYPE
MAJ_INJ_COUNT
MCYCLE_DEATH_COUNT
MCYCLE_MAJ_INJ_COUNT
MIN_INJ_COUNT
MOD_INJ_COUNT
MOTORCYCLE_COUNT
PED_COUNT
PED_DEATH_COUNT
PED_MAJ_INJ_COUNT
PERSON_COUNT
SEG_ID
SMALL_TRUCK_COUNT
SUV_COUNT
TCD_TYPE
UNBELTED_OCC_COUNT
UNB_DEATH_COUNT
UNB_MAJ_INJ_COUNT
UNK_INJ_DEG_COUNT
UNK_INJ_PER_COUNT
VAN_COUNT
VEHICLE_COUNT

In [6]:
street_gpd[street_gpd.SEG_ID==960283]


Out[6]:
CLASS FNODE_ LENGTH LPOLY_ L_F_ADD L_HUNDRED L_T_ADD MULTI_REP NEWSEGDATE ONEWAY ... STREETLABE ST_CODE ST_NAME ST_TYPE SUF_DIR TNODE_ UPDATE_ ZIP_LEFT ZIP_RIGHT geometry
22464 2 16299 439.229039 0 9400 9400 9498 0 None B ... BUSTLETON AVE 20020 BUSTLETON AVE None 16543 1998-10-08 19115 19115 LINESTRING (-75.038401134496 40.08447396565553...

1 rows × 31 columns


In [7]:
collision_gpd[collision_gpd.CRASH_YEAR=='2014'].shape


Out[7]:
(10627, 54)

In [7]:
collision_gpd.groupby(collision_gpd.CRASH_YEAR).size()


Out[7]:
CRASH_YEAR
2011    10668
2012    11196
2013    10997
2014    10627
dtype: int64

In [9]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    from math import radians, cos, sin, asin, sqrt
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    km = 6367 * c
    m = km *1000
    return m   
def ptfromln(pt, ln):
    n_pt = ln.interpolate(ln.project(pt))
    lon1, lat1 = n_pt.coords[0]
    lon2, lat2 = pt.coords[0]
    return haversine(lon1, lat1, lon2, lat2)
def pts2seg(pts, gp_segs, buffer_dis=50, near_dis_thres=5):
    pts_crs,gp_segs_crs = pts.to_crs(epsg=3559), gp_segs.to_crs(epsg=3559)
    pts_crs_bfr = pts_crs.copy()

    pts_crs_bfr.geometry = pts_crs_bfr.buffer(near_dis_thres*1.1)
    close_jn = gpd.tools.sjoin(pts_crs_bfr, gp_segs_crs)[['OBJECTID','SEG_ID_right']]
    handledid = set(pd.unique(close_jn.OBJECTID))
    mask = (~pts_crs_bfr.OBJECTID.isin(handledid))

    far_jns = []
    while pts_crs_bfr[mask].shape[0]!=0:
        pts_crs_bfr.loc[mask, 'geometry'] = pts_crs_bfr[mask].buffer(buffer_dis)
        jn = gpd.tools.sjoin(pts_crs_bfr[mask], gp_segs_crs)[['OBJECTID','SEG_ID_right']]
        far_jns.append(jn)
        handledid |= set(pd.unique(jn.OBJECTID))
        mask = (~pts_crs_bfr.OBJECTID.isin(handledid))
        
    far_jns = pd.concat(far_jns)
    mr = pd.merge(gp_segs[['geometry','SEG_ID']],far_jns , left_on='SEG_ID', right_on='SEG_ID_right')
    #mr = pd.merge(pts[['OBJECTID','geometry','SEG_ID']],mr, left_on='OBJECTID', right_on='OBJECTID_left')
    mr['dis']=mr.apply(lambda x: ptfromln(x.geometry_x, x.geometry_y),axis=1)
    result = close_jn.groupby('OBJECTID')['SEG_ID_right'].apply(list).append(mr.groupby('OBJECTID').apply(lambda x: [x.ix[x.dis.idxmin()].SEG_ID_y]))
    return pd.DataFrame(result, columns=['segid'])

In [8]:
philly_seg_stats = street_gpd[['SEG_ID', 'SHAPE_LEN', 'geometry']]
philly_seg_stats.shape


Out[8]:
(41022, 3)

In [49]:
collision_pt_seg = pts2seg(collision_gpd, philly_seg_stats)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-49-07a5db01d260> in <module>()
----> 1 collision_pt_seg = pts2seg(collision_gpd, philly_seg_stats)

<ipython-input-45-242f3563623a> in pts2seg(pts, gp_segs, buffer_dis, near_dis_thres)
     32     while pts_crs_bfr[mask].shape[0]!=0:
     33         pts_crs_bfr.loc[mask, 'geometry'] = pts_crs_bfr[mask].buffer(buffer_dis)
---> 34         jn = gpd.tools.sjoin(pts_crs_bfr[mask], gp_segs_crs)[['OBJECTID','SEG_ID_right']]
     35         far_jns.append(jn)
     36         handledid |= set(pd.unique(jn.OBJECTID))

//anaconda/lib/python3.5/site-packages/geopandas/tools/sjoin.py in sjoin(left_df, right_df, how, op, lsuffix, rsuffix)
     55     idxmatch = idxmatch[idxmatch.apply(len) > 0]
     56 
---> 57     r_idx = np.concatenate(idxmatch.values)
     58     l_idx = np.concatenate([[i] * len(v) for i, v in idxmatch.iteritems()])
     59 

ValueError: need at least one array to concatenate

In [12]:
philly_seg_buffer_gp = street_gpd[['SEG_ID','STCL2_ID','geometry']].copy()
philly_seg_buffer_gp.geometry = philly_seg_buffer_gp.buffer(0.0001)
collision_buffer_gp = collision_gpd[['SEG_ID','OBJECTID', 'geometry','CRASH_YEAR']].copy()
collision_buffer_gp.geometry = collision_buffer_gp.buffer(0.0001)
joined = sjoin(collision_buffer_gp, philly_seg_buffer_gp, how='left', op='intersects')

In [91]:
philly_seg_buffer_gp = street_gpd[['SEG_ID','STCL2_ID','geometry']].copy()
philly_seg_buffer_gp.geometry = philly_seg_buffer_gp.buffer(0.0001)

In [42]:
print(philly_seg_buffer_gp.shape)
philly_seg_buffer_gp.head().T


(41022, 3)
Out[42]:
0 1 2 3 4
SEG_ID 420708 422065 420702 420732 420718
STCL2_ID 85205 86540 85199 85229 85215
geometry POLYGON ((2692954.035913117 238893.638507401, ... POLYGON ((2693439.911167583 239131.2080263407,... POLYGON ((2692511.692965856 238951.4471188142,... POLYGON ((2692493.394445261 238804.3647359097,... POLYGON ((2692511.692879579 238951.4472303169,...

In [55]:
collision_buffer_gp = collision_gpd[['SEG_ID','OBJECTID','geometry','CRASH_YEAR']].copy()
collision_buffer_gp.geometry = collision_buffer_gp.buffer(0.0001)
print(collision_buffer_gp.shape)
collision_buffer_gp.head().T


(43488, 4)
Out[55]:
0 1 2 3 4
SEG_ID 960283 541001 760684 300613 640755
OBJECTID 4001 4002 4003 4004 4005
geometry POLYGON ((-75.03785178519658 40.08497541304385... POLYGON ((-75.10265539598592 39.990404578965, ... POLYGON ((-75.05471628915623 40.02929723040301... POLYGON ((-75.24040521636346 39.95347871767484... POLYGON ((-75.09046881169107 40.01618353851767...
CRASH_YEAR 2014 2014 2014 2014 2014

In [13]:
joined.shape


Out[13]:
(67230, 7)

In [47]:
joined = sjoin(collision_buffer_gp, philly_seg_buffer_gp, how='left', op='intersects')

In [48]:
joined.head().T


Out[48]:
0 1 2 3 4
SEG_ID_left 960283 541001 760684 300613 640755
OBJECTID 4001 4002 4003 4004 4005
geometry POLYGON ((-75.03785178519658 40.08497541304385... POLYGON ((-75.10265539598592 39.990404578965, ... POLYGON ((-75.05471628915623 40.02929723040301... POLYGON ((-75.24040521636346 39.95347871767484... POLYGON ((-75.09046881169107 40.01618353851767...
CRASH_YEAR 2014 2014 2014 2014 2014
index_right 22464 14639 27112 11124 8369
SEG_ID_right 960283 541001 760684 300613 640755
STCL2_ID 1993 1001 9617 82230 4375

In [52]:
street_gpd.crs


Out[52]:
{'init': 'epsg:4326'}

In [53]:
collision_gpd.crs


Out[53]:
{'init': 'epsg:4326'}

In [49]:
joined.info()


<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 67230 entries, 0 to 43487
Data columns (total 7 columns):
SEG_ID_left     66695 non-null float64
OBJECTID        67230 non-null int64
geometry        67230 non-null object
CRASH_YEAR      67230 non-null object
index_right     66694 non-null float64
SEG_ID_right    66694 non-null float64
STCL2_ID        66694 non-null float64
dtypes: float64(4), int64(1), object(2)
memory usage: 4.1+ MB

In [17]:
street_gpd.plot()


Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x14269c5f8>

In [50]:
join_gb = joined.reset_index().groupby('OBJECTID').count()

In [51]:
join_gb.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 43488 entries, 1 to 43488
Data columns (total 7 columns):
index           43488 non-null int64
SEG_ID_left     43488 non-null int64
geometry        43488 non-null int64
CRASH_YEAR      43488 non-null int64
index_right     43488 non-null int64
SEG_ID_right    43488 non-null int64
STCL2_ID        43488 non-null int64
dtypes: int64(7)
memory usage: 2.7 MB

In [21]:
joined.shape


Out[21]:
(67230, 7)

In [29]:
join_gb.shape


Out[29]:
(43488, 7)

In [52]:
print(join_gb[join_gb.SEG_ID_right==0].shape[0], ' points spatially match no segment')
print(join_gb[join_gb.SEG_ID_right==1].shape[0], ' points spatially match 1 segment')
print(join_gb[join_gb.SEG_ID_right>1].shape[0], ' points spatially match multiple segments')


536  points spatially match no segment
31630  points spatially match 1 segment
11322  points spatially match multiple segments

In [31]:
join_gb.head().T


Out[31]:
OBJECTID 1 2 3 4 5
index 1 2 1 1 1
SEG_ID_left 1 2 1 1 1
geometry 1 2 1 1 1
CRASH_YEAR 1 2 1 1 1
index_right 1 2 1 1 1
SEG_ID_right 1 2 1 1 1
STCL2_ID 1 2 1 1 1

In [41]:
philly_seg_stats.head().T


Out[41]:
0 1 2 3 4
SEG_ID 420708 422065 420702 420732 420718
SHAPE_LEN 449.863 540.083 446.104 447.261 148.216
geometry LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16529687686355 39.960013687935... LINESTRING (-75.16537539004391 39.959601782423... LINESTRING (-75.16694872703492 39.959805718080...

In [54]:
join_df = joined[['SEG_ID_left', 'OBJECTID']]

In [62]:
join_df.columns = ['SEG_ID', 'OBJECTID']
join_df.head().T
join_df.info()
philly_seg_stats.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 67230 entries, 0 to 43487
Data columns (total 2 columns):
SEG_ID      66695 non-null float64
OBJECTID    67230 non-null int64
dtypes: float64(1), int64(1)
memory usage: 1.5 MB
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 41022 entries, 0 to 41021
Data columns (total 3 columns):
SEG_ID       41022 non-null int64
SHAPE_LEN    41022 non-null float64
geometry     41022 non-null object
dtypes: float64(1), int64(1), object(1)
memory usage: 961.5+ KB

In [77]:
join_df_numeric = join_df[['SEG_ID', 'OBJECTID']].dropna().astype(int)

In [78]:
join_df_numeric.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 66695 entries, 0 to 43487
Data columns (total 2 columns):
SEG_ID      66695 non-null int64
OBJECTID    66695 non-null int64
dtypes: int64(2)
memory usage: 1.5 MB

In [135]:
collision_count = pd.DataFrame(join_df_numeric.groupby(['SEG_ID']).size().reset_index())
collision_count.columns = ['SEG_ID', 'collision_count']

In [136]:
collision_count.head().T


Out[136]:
0 1 2 3 4
SEG_ID 100002 100003 100006 100007 100008
collision_count 1 15 1 1 1

In [137]:
collision_count[collision_count.SEG_ID == 100003]


Out[137]:
SEG_ID collision_count
1 100003 15

In [134]:
joined.shape


Out[134]:
(67230, 7)

In [141]:
philly_collision_stats = philly_seg_stats.merge(collision_count, on = 'SEG_ID', how = 'inner')

In [142]:
philly_collision_stats.shape


Out[142]:
(13186, 4)

In [152]:
philly_collision_stats['collision_count/length'] = philly_collision_stats.collision_count/philly_collision_stats.SHAPE_LEN

In [153]:
philly_collision_stats.head().T


Out[153]:
0 1 2 3 4
SEG_ID 420708 422065 420702 420732 420696
SHAPE_LEN 449.863 540.083 446.104 447.261 319.5
geometry LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16529687686355 39.960013687935... LINESTRING (-75.16537539004391 39.959601782423... LINESTRING (-75.16686813417091 39.960207835856...
collision_count 7 1 4 1 2
collsion_count/length 0.0155603 0.00185157 0.00896651 0.00223583 0.00625978
collision_count_norm 0.333648 0.111216 0.258235 0.111216 0.176273
collision_count/length 0.0155603 0.00185157 0.00896651 0.00223583 0.00625978

In [94]:
def normalize(col_name, df):
    col_name_norm = col_name + '_norm'
    df[col_name_norm] = df[col_name]+1
    df[col_name_norm] = np.log2(df[col_name_norm])
    df[col_name_norm] = df[col_name_norm]/df[col_name_norm].max()

In [154]:
normalize('collision_count', philly_collision_stats)
normalize('collision_count/length', philly_collision_stats)

In [155]:
philly_collision_stats.head().T


Out[155]:
0 1 2 3 4
SEG_ID 420708 422065 420702 420732 420696
SHAPE_LEN 449.863 540.083 446.104 447.261 319.5
geometry LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16529687686355 39.960013687935... LINESTRING (-75.16537539004391 39.959601782423... LINESTRING (-75.16686813417091 39.960207835856...
collision_count 7 1 4 1 2
collsion_count/length 0.0155603 0.00185157 0.00896651 0.00223583 0.00625978
collision_count_norm 0.333648 0.111216 0.258235 0.111216 0.176273
collision_count/length 0.0155603 0.00185157 0.00896651 0.00223583 0.00625978
collision_count/length_norm 0.00826819 0.000990576 0.00478006 0.00119592 0.00334159

In [157]:
def gradient_color(percent):
    min_color = np.array([255,255,255])
    max_color = np.array([65,105,255])
    return '#%02x%02x%02x' % tuple([int(k) for k in min_color+(max_color-min_color)*percent])

def write_var(col_name, var_name, df,f):
    df['color']=df[col_name].apply(gradient_color)
    f.write('var %s = %s;\n' % (var_name, df.to_json()))

with open('../visualization/Volumes of Collisions.js','w+') as f:
    write_var('collision_count_norm', 'collision_count', philly_collision_stats,f)
    write_var('collision_count/length_norm', 'collision_count_norm', philly_collision_stats,f)

In [162]:
philly_collision_stats[philly_collision_stats.SEG_ID==422279]


Out[162]:
SEG_ID SHAPE_LEN geometry collision_count collsion_count/length collision_count_norm collision_count/length collision_count/length_norm color
1674 422279 2360.798811 LINESTRING (-75.18428206823198 39.949500166985... 270 0.114368 0.898863 0.114368 0.057987 #f3f6ff

In [165]:
collision_gpd[collision_gpd.SEG_ID==422279.0]


Out[165]:
AUTOMOBILE_COUNT BELTED_DEATH_COUNT BELTED_MAJ_INJ_COUNT BICYCLE_COUNT BICYCLE_DEATH_COUNT BICYCLE_MAJ_INJ_COUNT BUS_COUNT COLLISION_TYPE COMM_VEH_COUNT CRASH_MONTH ... TIME_OF_DAY UNBELTED_OCC_COUNT UNB_DEATH_COUNT UNB_MAJ_INJ_COUNT UNK_INJ_DEG_COUNT UNK_INJ_PER_COUNT VAN_COUNT VEHICLE_COUNT WEATHER geometry
137 1 0 0 0 0 0 0 7 0 8 ... 935 0 0 0 0 0 0 1 1 POINT (-75.18744029819194 39.94735998737308)
957 1 0 0 0 0 0 0 1 0 4 ... 1203 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
1321 2 0 0 0 0 0 0 5 0 2 ... 106 1 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
1778 1 0 0 0 0 0 0 4 1 3 ... 1005 0 0 0 0 1 0 2 2 POINT (-75.18744029819194 39.94735998737308)
2821 2 0 0 0 0 0 0 4 0 12 ... 723 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
2876 0 0 0 0 0 0 0 5 0 3 ... 2353 4 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
2925 1 0 0 0 0 0 0 1 0 1 ... 1041 0 0 0 1 0 1 2 1 POINT (-75.18744029819194 39.94735998737308)
4094 1 0 0 0 0 0 0 5 1 10 ... 1134 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
5421 2 0 0 0 0 0 0 1 0 11 ... 1425 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
5693 1 0 0 0 0 0 0 1 0 12 ... 1328 0 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)
5995 3 0 0 0 0 0 0 1 0 9 ... 1843 0 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)
6425 0 0 0 0 0 0 0 1 0 8 ... 1300 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
7077 0 0 0 0 0 0 0 1 0 12 ... 1355 0 0 0 0 1 0 2 2 POINT (-75.18744029819194 39.94735998737308)
7806 0 0 0 0 0 0 0 1 0 12 ... 1418 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
7955 2 0 0 0 0 0 0 1 0 12 ... 1912 0 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)
8105 2 0 0 0 0 0 0 1 0 11 ... 1519 0 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)
8594 1 0 0 0 0 0 0 1 0 11 ... 634 0 0 0 0 0 1 2 2 POINT (-75.18744029819194 39.94735998737308)
8620 4 0 0 0 0 0 0 1 0 11 ... 2125 0 0 0 0 0 0 4 1 POINT (-75.18744029819194 39.94735998737308)
8683 3 0 0 0 0 0 0 4 0 11 ... 2017 0 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)
8812 3 0 0 0 0 0 0 1 0 11 ... 1742 0 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)
8883 1 0 0 0 0 0 0 1 0 10 ... 2313 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
9152 2 0 0 0 0 0 0 5 0 3 ... 1510 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
9778 2 0 0 0 0 0 0 1 0 10 ... 1308 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
10913 1 0 0 0 0 0 0 1 0 8 ... 2145 0 0 0 0 1 0 2 1 POINT (-75.18744029819194 39.94735998737308)
11299 2 0 0 0 0 0 0 1 0 1 ... 802 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
11750 3 0 0 0 0 0 0 1 0 4 ... 1027 0 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)
12242 2 0 0 0 0 0 0 1 0 8 ... 2338 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
13319 1 0 0 0 0 0 0 7 0 4 ... 235 0 0 0 0 0 0 1 1 POINT (-75.18744029819194 39.94735998737308)
14578 4 0 0 0 0 0 0 1 0 4 ... 1314 0 0 0 0 1 0 4 1 POINT (-75.18744029819194 39.94735998737308)
14717 0 0 0 0 0 0 0 1 0 8 ... 621 0 0 0 0 0 0 2 2 POINT (-75.18744029819194 39.94735998737308)
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
27576 1 0 0 0 0 0 0 1 0 4 ... 1051 0 0 0 0 0 1 2 1 POINT (-75.18744029819194 39.94735998737308)
27592 1 0 0 0 0 0 0 7 0 2 ... 1049 0 0 0 0 0 0 1 2 POINT (-75.18744029819194 39.94735998737308)
28374 2 0 1 0 0 0 0 1 0 4 ... 1514 0 0 0 0 0 0 4 1 POINT (-75.18744029819194 39.94735998737308)
28513 0 0 0 0 0 0 0 1 0 5 ... 1248 0 0 0 1 0 1 2 1 POINT (-75.18744029819194 39.94735998737308)
28795 1 0 0 0 0 0 0 1 0 12 ... 1935 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
29490 0 0 0 0 0 0 0 7 0 12 ... 1549 0 0 0 1 0 0 1 1 POINT (-75.18744029819194 39.94735998737308)
29874 0 0 0 0 0 0 0 7 0 4 ... 629 0 0 0 0 0 0 1 1 POINT (-75.18744029819194 39.94735998737308)
29951 2 0 0 0 0 0 0 1 0 1 ... 2219 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
30463 1 0 0 0 0 0 0 1 0 12 ... 2107 0 0 0 0 1 1 3 1 POINT (-75.18744029819194 39.94735998737308)
30949 2 0 0 0 0 0 0 1 0 12 ... 915 0 0 0 2 0 0 4 1 POINT (-75.18744029819194 39.94735998737308)
31358 3 0 0 0 0 0 0 1 0 6 ... 750 1 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)
31733 1 0 0 0 0 0 0 1 0 5 ... 1706 0 0 0 1 0 0 2 2 POINT (-75.18744029819194 39.94735998737308)
32052 2 0 0 0 0 0 0 1 0 2 ... 1210 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
32202 2 0 0 0 0 0 0 1 0 5 ... 1012 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
32379 2 0 0 0 0 0 0 1 0 7 ... 1210 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
33036 1 0 0 0 0 0 0 1 0 4 ... 232 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
34231 2 0 0 0 0 0 0 1 0 7 ... 1755 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
34293 3 0 0 0 0 0 0 1 0 7 ... 835 0 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)
35021 1 0 0 0 0 0 0 7 0 3 ... 1853 0 0 0 0 0 0 1 2 POINT (-75.18744029819194 39.94735998737308)
35615 1 0 0 0 0 0 0 1 0 7 ... 1545 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
36050 2 0 0 0 0 0 0 1 0 12 ... 900 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
36717 2 0 0 0 0 0 0 1 0 12 ... 725 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
37720 2 0 0 0 0 0 0 1 0 8 ... 1207 0 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)
39636 2 0 0 0 0 0 0 1 0 1 ... 1702 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
40277 1 0 0 0 0 0 0 1 0 1 ... 645 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
40357 1 0 0 0 0 0 0 1 0 2 ... 1343 0 0 0 0 0 0 2 1 POINT (-75.18744029819194 39.94735998737308)
41201 1 0 0 0 0 0 0 1 0 7 ... 1339 0 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)
41403 1 0 0 0 0 0 0 7 0 8 ... 148 0 0 0 0 1 0 1 1 POINT (-75.18744029819194 39.94735998737308)
41760 3 0 0 0 0 0 0 1 0 8 ... 1816 0 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)
43054 3 0 0 0 0 0 0 1 0 4 ... 2006 0 0 0 0 0 0 3 1 POINT (-75.18744029819194 39.94735998737308)

90 rows × 54 columns


In [30]:
print(street_gpd.shape)
street_gpd.head().T


(41022, 31)
Out[30]:
0 1 2 3 4
CLASS 3 3 3 5 4
FNODE_ 2 2 1 6 5
LENGTH 449.863 540.083 446.104 447.261 148.216
LPOLY_ 0 0 0 0 0
L_F_ADD 1500 400 1600 1600 350
L_HUNDRED 1500 400 1600 1600 300
L_T_ADD 1598 498 1698 1698 398
MULTI_REP 0 0 0 0 0
NEWSEGDATE None None None None None
ONEWAY FT TF FT TF TF
PRE_DIR None N None None N
RESPONSIBL FAM FAM FAM None None
RPOLY_ 0 0 0 0 0
R_F_ADD 1501 401 1601 1601 351
R_HUNDRED 1500 400 1600 1600 300
R_T_ADD 1599 499 1699 1699 399
SEG_ID 420708 422065 420702 420732 420718
SHAPE_LEN 449.863 540.083 446.104 447.261 148.216
STCL2_ 1 2 3 4 5
STCL2_ID 85205 86540 85199 85229 85215
STNAME CALLOWHILL ST N 15TH ST CALLOWHILL ST CARLTON ST N 17TH ST
STREETLABE CALLOWHILL ST N 15TH ST CALLOWHILL ST CARLTON ST N 17TH ST
ST_CODE 20880 88070 20880 21440 88110
ST_NAME CALLOWHILL 15TH CALLOWHILL CARLTON 17TH
ST_TYPE ST ST ST ST ST
SUF_DIR None None None None None
TNODE_ 1 3 4 5 4
UPDATE_ 1997-02-10 1997-02-10 1997-02-10 1997-02-10 1997-06-27
ZIP_LEFT 19130 19130 19130 19103 19103
ZIP_RIGHT 19130 19130 19130 19103 19103
geometry LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16529687686355 39.960013687935... LINESTRING (-75.16537539004391 39.959601782423... LINESTRING (-75.16694872703492 39.959805718080...

In [31]:
vpi_data_gpd = gpd.read_file('../philly/Vehicle & Pedestrian Investigations.geojson')
print(vpi_data_gpd.shape)
vpi_data_gpd.head().T


(1296689, 29)
Out[31]:
0 1 2 3 4
:@computed_region_bbgf_pidf 10 14 14 5 5
:created_at 2016-09-28T13:00:32 2016-09-28T13:00:32 2016-09-28T13:00:32 2016-09-28T13:00:32 2016-09-28T13:00:32
:id row-tywx_zg46-cw84 row-z92m.d7r6~4rji row-4sb2-6s78_xnur row-aa32.djwa-fx39 row-xwa5_hp4e-frtk
:updated_at 2016-09-28T13:00:32 2016-09-28T13:00:32 2016-09-28T13:00:32 2016-09-28T13:00:32 2016-09-28T13:00:32
:version rv-y6zc~yrni.ujrq rv-z6a6-xsyb~mudr rv-fwuf_8mhe-vjan rv-kpt5-325y_zr5u rv-buwm-pueh_vce5
age 51 28 28 62 30
datetimeoccur 2016-09-08T10:25:00 2016-09-09T01:34:00 2016-09-09T00:38:00 2016-09-08T10:10:00 2016-09-08T11:20:00
dcnumber None None None None None
districtoccur 14 18 18 06 06
gender Male Female Female Male Male
geometry POINT (-75.166124 40.030666) POINT (-75.232809 39.960947) POINT (-75.240836 39.961337) POINT (-75.156342 39.953572) POINT (-75.157916 39.953782)
id 1555287 1556358 1556159 1555244 1556582
individual_arrested 0 1 0 0 0
individual_contraband 0 1 0 0 0
individual_frisked 0 0 0 0 0
individual_searched 0 1 0 0 0
inside_or_outside Outside Outside Outside Outside Outside
location 0 BLOCK COLLOM ST 0 BLOCK S 56TH ST 0 BLOCK S 60TH ST 100 BLOCK N 10TH ST 100 BLOCK N 11TH ST
objectid 2426243 2426326 2426332 2426344 2426346
point_x -75.16612389 -75.23280889 -75.24083602 -75.15634199 -75.15791644
point_y 40.03066595 39.96094721 39.96133678 39.9535721 39.95378192
psa 142 182 181 062 062
race Black - Non-Latino Black - Non-Latino Black - Non-Latino Black - Non-Latino White - Non-Latino
stopcode 2701 2701 2701 2702 2702
stoptype pedestrian pedestrian pedestrian vehicle vehicle
vehicle_contraband 0 0 0 0 0
vehicle_frisked 0 0 0 0 0
vehicle_searched 0 0 0 0 0
weekday THURSDAY FRIDAY FRIDAY THURSDAY THURSDAY

In [32]:
vpi_data_gpd.info()


<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1296689 entries, 0 to 1296688
Data columns (total 29 columns):
:@computed_region_bbgf_pidf    1198809 non-null object
:created_at                    1296689 non-null object
:id                            1296689 non-null object
:updated_at                    1296689 non-null object
:version                       1296689 non-null object
age                            1294550 non-null object
datetimeoccur                  1296689 non-null object
dcnumber                       0 non-null object
districtoccur                  1296688 non-null object
gender                         1296555 non-null object
geometry                       1202146 non-null object
id                             1296689 non-null object
individual_arrested            1296689 non-null object
individual_contraband          1296689 non-null object
individual_frisked             1296689 non-null object
individual_searched            1296689 non-null object
inside_or_outside              1296689 non-null object
location                       1264246 non-null object
objectid                       1296689 non-null object
point_x                        1202146 non-null object
point_y                        1202146 non-null object
psa                            1296686 non-null object
race                           1296689 non-null object
stopcode                       1296689 non-null object
stoptype                       1296689 non-null object
vehicle_contraband             1296689 non-null object
vehicle_frisked                1296689 non-null object
vehicle_searched               1296689 non-null object
weekday                        1296689 non-null object
dtypes: object(29)
memory usage: 286.9+ MB

In [2]:
street_gpd = gpd.read_file('../philly/GeoJSON_data/Street_Centerline_qgis3.geojson')

In [23]:
philly311_csv = pd.read_csv('../philly/311_Requests.csv')
print(philly311_csv.shape)
philly311_csv.head().T


(1110623, 17)
Out[23]:
0 1 2 3 4
Service Request ID 10895664 10895669 10894010 10894151 10894702
Requested Date/Time 08/18/2016 06:32:14 PM 08/18/2016 06:35:32 PM 08/18/2016 09:43:32 AM 08/18/2016 10:19:40 AM 08/18/2016 12:36:47 PM
Service Name Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Illegal Dumping Rubbish/Recyclable Material Collection
Service Code SR-ST03 SR-ST03 SR-ST03 SR-ST02 SR-ST03
Agency Responsible Streets Department Streets Department Streets Department Streets Department Streets Department
Status Closed Closed Closed Closed Closed
Service Notice 2 Business Days 2 Business Days 2 Business Days 5 Business Days 2 Business Days
Updated Date/Time 08/22/2016 06:15:23 AM 08/22/2016 06:15:26 AM 08/22/2016 06:15:29 AM 08/22/2016 06:15:32 AM 08/22/2016 06:15:35 AM
Expected Date/Time 08/21/2016 08:00:00 PM 08/21/2016 08:00:00 PM 08/21/2016 08:00:00 PM 08/24/2016 08:00:00 PM 08/21/2016 08:00:00 PM
Address 4500 N LAWRENCE ST 4400 N LAWRENCE ST 435 W CAREY ST 3255 N 11TH ST 6500 W WALNUT PARK DR
Zipcode 19140 19140 NaN 19140 NaN
Media URL NaN NaN NaN NaN NaN
Location POINT (-75.133247 40.019631) POINT (-75.133589 40.01808) POINT (-75.135892 40.008402) POINT (-75.147295 40.002129) POINT (-75.120826 40.050084)
Latitude 40.0196 40.0181 40.0084 40.0021 40.0501
Longitude -75.1332 -75.1336 -75.1359 -75.1473 -75.1208
Zipcodes 41 41 41 41 9
Census Tracts 2010 - 2013 65 337 337 196 304

In [3]:
#data = DataFrame.from_csv(os.path.join(directory, filename), index_col=False)
points = [Point(row['Longitude'], row['Latitude']) for key, row in philly311_csv.iterrows()]
geo_df = GeoDataFrame(philly311_csv,geometry=points)
geo_df.to_file('../philly/GeoJSON_data/311_Requests.geojson', driver='GeoJSON')

In [3]:
philly311_gpd = gpd.read_file('../philly/GeoJSON_data/311_Requests.geojson')

In [5]:
print(philly311_gpd.shape)
philly311_gpd.head().T


(1110623, 18)
Out[5]:
0 1 2 3 4
Address 4500 N LAWRENCE ST 4400 N LAWRENCE ST 435 W CAREY ST 3255 N 11TH ST 6500 W WALNUT PARK DR
Agency Responsible Streets Department Streets Department Streets Department Streets Department Streets Department
Census Tracts 2010 - 2013 65 337 337 196 304
Expected Date/Time 08/21/2016 08:00:00 PM 08/21/2016 08:00:00 PM 08/21/2016 08:00:00 PM 08/24/2016 08:00:00 PM 08/21/2016 08:00:00 PM
Latitude 40.0196 40.0181 40.0084 40.0021 40.0501
Location POINT (-75.133247 40.019631) POINT (-75.133589 40.01808) POINT (-75.135892 40.008402) POINT (-75.147295 40.002129) POINT (-75.120826 40.050084)
Longitude -75.1332 -75.1336 -75.1359 -75.1473 -75.1208
Media URL None None None None None
Requested Date/Time 08/18/2016 06:32:14 PM 08/18/2016 06:35:32 PM 08/18/2016 09:43:32 AM 08/18/2016 10:19:40 AM 08/18/2016 12:36:47 PM
Service Code SR-ST03 SR-ST03 SR-ST03 SR-ST02 SR-ST03
Service Name Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Illegal Dumping Rubbish/Recyclable Material Collection
Service Notice 2 Business Days 2 Business Days 2 Business Days 5 Business Days 2 Business Days
Service Request ID 10895664 10895669 10894010 10894151 10894702
Status Closed Closed Closed Closed Closed
Updated Date/Time 08/22/2016 06:15:23 AM 08/22/2016 06:15:26 AM 08/22/2016 06:15:29 AM 08/22/2016 06:15:32 AM 08/22/2016 06:15:35 AM
Zipcode 19140 19140 None 19140 None
Zipcodes 41 41 41 41 9
geometry POINT (-75.13324674099999 40.019630884) POINT (-75.13358943099999 40.018080355) POINT (-75.135892102 40.008401585) POINT (-75.14729510799999 40.002128888) POINT (-75.12082581 40.050084238)

In [54]:
len(philly311_gpd['Expected Date/Time'])


Out[54]:
1110623

In [53]:
philly311_gpd[philly311_gpd['Expected Date/Time'] == NaN]


Out[53]:
Address Agency Responsible Census Tracts 2010 - 2013 Expected Date/Time Latitude Location Longitude Media URL Requested Date/Time Service Code Service Name Service Notice Service Request ID Status Updated Date/Time Zipcode Zipcodes geometry

In [57]:
philly311_gpd[philly311_gpd['Expected Date/Time'].str.contains('2016', na=False)]


Out[57]:
Address Agency Responsible Census Tracts 2010 - 2013 Expected Date/Time Latitude Location Longitude Media URL Requested Date/Time Service Code Service Name Service Notice Service Request ID Status Updated Date/Time Zipcode Zipcodes geometry
0 4500 N LAWRENCE ST Streets Department 65.0 08/21/2016 08:00:00 PM 40.019631 POINT (-75.133247 40.019631) -75.133247 None 08/18/2016 06:32:14 PM SR-ST03 Rubbish/Recyclable Material Collection 2 Business Days 10895664 Closed 08/22/2016 06:15:23 AM 19140 41.0 POINT (-75.13324674099999 40.019630884)
1 4400 N LAWRENCE ST Streets Department 337.0 08/21/2016 08:00:00 PM 40.018080 POINT (-75.133589 40.01808) -75.133589 None 08/18/2016 06:35:32 PM SR-ST03 Rubbish/Recyclable Material Collection 2 Business Days 10895669 Closed 08/22/2016 06:15:26 AM 19140 41.0 POINT (-75.13358943099999 40.018080355)
2 435 W CAREY ST Streets Department 337.0 08/21/2016 08:00:00 PM 40.008402 POINT (-75.135892 40.008402) -75.135892 None 08/18/2016 09:43:32 AM SR-ST03 Rubbish/Recyclable Material Collection 2 Business Days 10894010 Closed 08/22/2016 06:15:29 AM None 41.0 POINT (-75.135892102 40.008401585)
3 3255 N 11TH ST Streets Department 196.0 08/24/2016 08:00:00 PM 40.002129 POINT (-75.147295 40.002129) -75.147295 None 08/18/2016 10:19:40 AM SR-ST02 Illegal Dumping 5 Business Days 10894151 Closed 08/22/2016 06:15:32 AM 19140 41.0 POINT (-75.14729510799999 40.002128888)
4 6500 W WALNUT PARK DR Streets Department 304.0 08/21/2016 08:00:00 PM 40.050084 POINT (-75.120826 40.050084) -75.120826 None 08/18/2016 12:36:47 PM SR-ST03 Rubbish/Recyclable Material Collection 2 Business Days 10894702 Closed 08/22/2016 06:15:35 AM None 9.0 POINT (-75.12082581 40.050084238)
5 215 W ASHDALE ST Streets Department 64.0 08/21/2016 08:00:00 PM 40.027507 POINT (-75.128343 40.027507) -75.128343 None 08/18/2016 03:22:16 PM SR-ST03 Rubbish/Recyclable Material Collection 2 Business Days 10895291 Closed 08/22/2016 06:15:41 AM None 9.0 POINT (-75.12834329200001 40.027507442)
6 3119 N BROAD ST Streets Department 197.0 08/21/2016 08:00:00 PM 40.000762 POINT (-75.1531 40.000762) -75.153100 https://d17aqltn7cihbm.cloudfront.net/uploads/... 08/18/2016 03:35:41 PM SR-ST03 Rubbish/Recyclable Material Collection 2 Business Days 10895342 Closed 08/22/2016 06:15:44 AM None 33.0 POINT (-75.15310022199999 40.000761795)
7 S 22ND ST & MOORE ST Streets Department 151.0 09/11/2016 08:00:00 PM 39.929731 POINT (-75.181946 39.929731) -75.181946 None 09/07/2016 12:00:19 PM SR-ST01 Street Defect 3 Business Days 10925505 Closed 10/22/2016 07:31:06 AM None 46.0 POINT (-75.18194554300001 39.929730972)
8 158 DIAMOND ST Streets Department 24.0 08/18/2016 08:00:00 PM 39.981899 POINT (-75.134993 39.981899) -75.134993 None 08/17/2016 02:39:55 PM SR-ST03 Rubbish/Recyclable Material Collection 2 Business Days 10893003 Closed 08/22/2016 06:30:26 AM 19122 11.0 POINT (-75.13499281599999 39.9818993)
9 158 DIAMOND ST Streets Department 24.0 08/18/2016 08:00:00 PM 39.981899 POINT (-75.134993 39.981899) -75.134993 None 08/17/2016 02:41:10 PM SR-ST03 Rubbish/Recyclable Material Collection 2 Business Days 10893006 Closed 08/22/2016 06:30:29 AM 19122 11.0 POINT (-75.13499281599999 39.9818993)
10 1001 POPLAR ST Streets Department 179.0 08/24/2016 08:00:00 PM 39.968628 POINT (-75.153274 39.968628) -75.153274 None 08/19/2016 01:25:33 AM SR-ST02 Illegal Dumping 5 Business Days 10895773 Closed 08/22/2016 06:30:32 AM None 12.0 POINT (-75.15327389300001 39.968628368)
11 N 7TH ST & MARKET ST Streets Department 148.0 08/21/2016 08:00:00 PM 39.950938 POINT (-75.152002 39.950938) -75.152002 None 08/19/2016 09:09:39 AM SR-ST25 Traffic Signal Emergency 1 Business Days 10895980 Closed 08/22/2016 06:30:34 AM None 21.0 POINT (-75.152001849 39.950937692)
12 2410 N 6TH ST Streets Department 30.0 08/24/2016 08:00:00 PM 39.988103 POINT (-75.143105 39.988103) -75.143105 None 08/18/2016 10:09:52 AM SR-ST02 Illegal Dumping 5 Business Days 10894112 Closed 08/22/2016 06:30:36 AM 19133 34.0 POINT (-75.143104624 39.988102857)
13 2236 FRANKFORD AVE Streets Department 27.0 08/21/2016 08:00:00 PM 39.980433 POINT (-75.128643 39.980433) -75.128643 None 08/18/2016 10:09:58 AM SR-ST03 Rubbish/Recyclable Material Collection 2 Business Days 10894113 Closed 08/22/2016 06:30:39 AM None 14.0 POINT (-75.12864297899999 39.980433165)
14 802 W INDIANA AVE Streets Department 366.0 08/24/2016 08:00:00 PM 39.997314 POINT (-75.144286 39.997314) -75.144286 None 08/18/2016 10:49:48 AM SR-ST02 Illegal Dumping 5 Business Days 10894272 Closed 08/22/2016 06:30:41 AM 19133 34.0 POINT (-75.14428571399999 39.99731393)
15 GERMANTOWN AVE & E TULPEHOCKEN ST Streets Department 381.0 08/18/2016 08:00:00 PM 40.042095 POINT (-75.179982 40.042095) -75.179982 None 08/18/2016 10:36:40 AM SR-ST25 Traffic Signal Emergency 1 Business Days 10894221 Closed 08/22/2016 06:30:43 AM None 45.0 POINT (-75.17998167099999 40.042094772)
16 E ROOSEVELT BLVD & RYAN AVE Streets Department 73.0 08/21/2016 08:00:00 PM 40.053993 POINT (-75.048882 40.053993) -75.048882 None 08/19/2016 12:39:50 PM SR-ST25 Traffic Signal Emergency 1 Business Days 10896703 Closed 08/22/2016 06:30:46 AM None 27.0 POINT (-75.04888205100001 40.053992761)
17 2801 ORMES ST Streets Department 127.0 08/24/2016 08:00:00 PM 39.992159 POINT (-75.125898 39.992159) -75.125898 https://d17aqltn7cihbm.cloudfront.net/uploads/... 08/18/2016 11:59:42 AM SR-ST02 Illegal Dumping 5 Business Days 10894554 Closed 08/22/2016 06:30:48 AM None 35.0 POINT (-75.12589782800001 39.992159329)
18 2556 E LEHIGH AVE Streets Department 339.0 08/21/2016 08:00:00 PM 39.979571 POINT (-75.115665 39.979571) -75.115665 None 08/18/2016 12:23:16 PM SR-ST03 Rubbish/Recyclable Material Collection 2 Business Days 10894654 Closed 08/22/2016 06:30:50 AM 19125 14.0 POINT (-75.11566479699999 39.979570733)
19 185 W ATLANTIC ST Streets Department 116.0 08/24/2016 08:00:00 PM 40.004266 POINT (-75.13116 40.004266) -75.131160 None 08/18/2016 01:15:55 PM SR-ST02 Illegal Dumping 5 Business Days 10894823 Closed 08/22/2016 06:30:53 AM None 41.0 POINT (-75.13115956499998 40.00426592199999)
20 358 W INDIANA AVE Streets Department 166.0 08/24/2016 08:00:00 PM 39.996374 POINT (-75.137101 39.996374) -75.137101 https://d17aqltn7cihbm.cloudfront.net/uploads/... 08/18/2016 01:46:09 PM SR-ST02 Illegal Dumping 5 Business Days 10894952 Closed 08/22/2016 06:30:55 AM None 34.0 POINT (-75.13710128599999 39.996374367)
21 N 12TH ST & CALLOWHILL ST Streets Department 294.0 08/24/2016 08:00:00 PM 39.959114 POINT (-75.158311 39.959114) -75.158311 None 08/18/2016 03:58:16 PM SR-ST02 Illegal Dumping 5 Business Days 10895425 Closed 08/22/2016 06:30:58 AM None 12.0 POINT (-75.15831109400001 39.959113891)
22 2642 BRADDOCK ST Streets Department 27.0 08/24/2016 08:00:00 PM 39.986803 POINT (-75.124189 39.986803) -75.124189 https://d17aqltn7cihbm.cloudfront.net/uploads/... 08/18/2016 05:37:01 PM SR-ST02 Illegal Dumping 5 Business Days 10895601 Closed 08/22/2016 06:31:00 AM None 14.0 POINT (-75.12418933900001 39.986802981)
23 2939 N 8TH ST Streets Department 366.0 08/24/2016 08:00:00 PM 39.996795 POINT (-75.144216 39.996795) -75.144216 None 08/18/2016 06:17:04 PM SR-ST02 Illegal Dumping 5 Business Days 10895651 Closed 08/22/2016 06:31:03 AM 19133 34.0 POINT (-75.144216247 39.996795328)
24 2172 E NORRIS ST Streets Department 24.0 06/20/2016 08:00:00 PM 39.978950 POINT (-75.131078 39.97895) -75.131078 https://d17aqltn7cihbm.cloudfront.net/uploads/... 06/14/2016 09:13:38 AM SR-ST02 Illegal Dumping 5 Business Days 10783983 Closed 08/22/2016 06:45:27 AM None 14.0 POINT (-75.13107823199999 39.978950016)
25 210 E LIPPINCOTT ST Streets Department 96.0 06/22/2016 08:00:00 PM 39.997179 POINT (-75.126095 39.997179) -75.126095 https://d17aqltn7cihbm.cloudfront.net/uploads/... 06/16/2016 02:37:14 PM SR-ST02 Illegal Dumping 5 Business Days 10789990 Closed 08/22/2016 06:45:30 AM None 35.0 POINT (-75.12609503100001 39.997179121)
26 4701 WOODLAND AVE Streets Department 347.0 06/19/2016 08:00:00 PM 39.942999 POINT (-75.211093 39.942999) -75.211093 None 06/20/2016 04:29:59 AM SR-ST25 Traffic Signal Emergency None 10792903 Closed 08/22/2016 06:45:32 AM None 44.0 POINT (-75.21109311500003 39.942999207)
27 2343 MOORE ST Streets Department 151.0 11/29/2016 07:00:00 PM 39.930093 POINT (-75.184797 39.930093) -75.184797 https://d17aqltn7cihbm.cloudfront.net/uploads/... 09/19/2016 03:02:11 PM SR-ST01 Street Defect 46 Business Days 10946522 Closed 10/22/2016 07:31:07 AM None 46.0 POINT (-75.18479705 39.930093432)
28 2336 MOORE ST Streets Department 154.0 11/30/2016 07:00:00 PM 39.930072 POINT (-75.184722 39.930072) -75.184722 None 09/21/2016 09:40:46 AM SR-ST01 Street Defect 46 Business Days 10950045 Closed 10/22/2016 07:31:09 AM None 46.0 POINT (-75.18472173500002 39.930072148)
29 2637 EARP ST Streets Department 151.0 12/04/2016 07:00:00 PM 39.936029 POINT (-75.188489 39.936029) -75.188489 None 09/22/2016 12:34:39 PM SR-ST01 Street Defect 46 Business Days 10953184 Closed 10/22/2016 07:31:11 AM None 47.0 POINT (-75.188488679 39.936029205)
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1110589 6919 LINMORE AVE License & Inspections 8.0 09/13/2016 08:00:00 PM 39.919788 POINT (-75.238772 39.919788) -75.238772 None 08/17/2016 09:39:03 AM SR-LI27 Vacant House or Commercial 20 Business Days 10891824 Closed 08/20/2016 08:47:13 PM None 43.0 POINT (-75.23877243299999 39.919788369)
1110590 6919 LINMORE AVE License & Inspections 8.0 09/11/2016 08:00:00 PM 39.919788 POINT (-75.238772 39.919788) -75.238772 None 08/15/2016 09:24:23 AM SR-LI27 Vacant House or Commercial 20 Business Days 10887126 Closed 08/20/2016 08:47:14 PM None 43.0 POINT (-75.23877243299999 39.919788369)
1110591 5861 HAZEL AVE License & Inspections 95.0 09/04/2016 08:00:00 PM 39.953197 POINT (-75.24043 39.953197) -75.240430 None 08/08/2016 01:14:28 PM SR-LI30 Zoning Residential 20 Business Days 10876974 Closed 08/20/2016 08:47:18 PM None 44.0 POINT (-75.240430327 39.953196572)
1110592 1537 S VOGDES ST Community Life Improvement Program 285.0 12/08/2016 07:00:00 PM 39.937538 POINT (-75.223854 39.937538) -75.223854 None 08/05/2016 04:31:02 PM SR-CL02 Vacant Lot Clean-Up 90 Business Days 10874798 Closed 08/20/2016 08:47:20 PM None 44.0 POINT (-75.223853631 39.937537645)
1110593 1328 S CHADWICK ST License & Inspections 257.0 08/30/2016 08:00:00 PM 39.933884 POINT (-75.172028 39.933884) -75.172028 None 08/17/2016 01:12:05 PM SR-LI06 Construction Site Task Force 10 Business Days 10892683 Closed 08/20/2016 08:47:22 PM 19146 47.0 POINT (-75.17202777999999 39.933884327)
1110594 6125 CARPENTER ST License & Inspections 165.0 02/18/2016 07:00:00 PM 39.949032 POINT (-75.246534 39.949032) -75.246534 None 01/22/2016 12:27:08 PM SR-LI16 Fire Residential or Commercial 20 Business Days 10522275 Open 08/20/2016 08:47:23 PM None 44.0 POINT (-75.24653444499999 39.949031618)
1110595 1430 S 56TH ST Philly311 Contact Center 273.0 07/27/2016 08:00:00 PM 39.940268 POINT (-75.228646 39.940268) -75.228646 None 06/30/2016 01:32:43 PM SR-IR01 Information Request None 10813962 Open 08/20/2016 08:47:24 PM None 44.0 POINT (-75.228645574 39.940268127)
1110596 6300 CITY AVE License & Inspections 279.0 08/31/2016 08:00:00 PM 39.989675 POINT (-75.251068 39.989675) -75.251068 None 08/18/2016 02:37:13 PM SR-LI06 Construction Site Task Force 10 Business Days 10895131 Closed 08/20/2016 08:47:51 PM None 26.0 POINT (-75.25106848 39.989674543)
1110597 1432 S 56TH ST License & Inspections 273.0 07/27/2016 08:00:00 PM 39.940228 POINT (-75.228599 39.940228) -75.228599 None 06/30/2016 01:31:20 PM SR-LI27 Vacant House or Commercial 20 Business Days 10813956 Closed 08/20/2016 08:47:58 PM None 44.0 POINT (-75.228598658 39.940227542)
1110598 2145 S 58TH ST Community Life Improvement Program 344.0 11/30/2016 07:00:00 PM 39.932697 POINT (-75.224917 39.932697) -75.224917 https://d17aqltn7cihbm.cloudfront.net/uploads/... 07/28/2016 11:05:14 AM SR-CL02 Vacant Lot Clean-Up 90 Business Days 10859598 Open 08/20/2016 08:48:01 PM None 44.0 POINT (-75.224916713 39.932696859)
1110599 1504 S 17TH ST License & Inspections 257.0 08/31/2016 08:00:00 PM 39.932008 POINT (-75.173 39.932008) -75.173000 None 08/18/2016 12:01:18 PM SR-LI02 Building Construction 10 Business Days 10894561 Closed 08/20/2016 08:48:27 PM 19146 47.0 POINT (-75.17300036100001 39.932008048)
1110600 2755 N 47TH ST Streets Department 355.0 06/22/2016 08:00:00 PM 40.001945 POINT (-75.223458 40.001945) -75.223458 None 06/20/2016 09:52:00 AM SR-ST10 Dead Animal in Street 3 Business Days 10793368 Closed 08/20/2016 08:48:39 PM None 32.0 POINT (-75.223457733 40.001945296)
1110601 1429 S 53RD ST License & Inspections 347.0 09/01/2016 08:00:00 PM 39.940076 POINT (-75.22086 39.940076) -75.220860 None 08/05/2016 09:07:51 AM SR-LI27 Vacant House or Commercial 20 Business Days 10873182 Closed 08/20/2016 08:48:45 PM 19143 44.0 POINT (-75.220860007 39.940076284)
1110602 1415 S CHADWICK ST License & Inspections 257.0 08/30/2016 08:00:00 PM 39.932943 POINT (-75.172203 39.932943) -75.172203 None 08/17/2016 01:13:19 PM SR-LI06 Construction Site Task Force 10 Business Days 10892687 Closed 08/20/2016 08:48:47 PM 19146 47.0 POINT (-75.172203258 39.93294334)
1110603 5707 CHESTER AVE Community Life Improvement Program 285.0 12/05/2016 07:00:00 PM 39.936517 POINT (-75.227432 39.936517) -75.227432 None 08/02/2016 10:09:38 AM SR-CL02 Vacant Lot Clean-Up 90 Business Days 10866858 Closed 08/20/2016 08:48:48 PM None 44.0 POINT (-75.22743167100001 39.936517469)
1110604 5443 HADFIELD ST License & Inspections 345.0 04/06/2016 08:00:00 PM 39.943872 POINT (-75.229648 39.943872) -75.229648 None 03/10/2016 07:48:55 PM SR-LI27 Vacant House or Commercial 20 Business Days 10624380 Open 08/20/2016 08:48:49 PM None 44.0 POINT (-75.229647884 39.943872024)
1110605 5215 W BERKS ST License & Inspections 353.0 07/25/2016 08:00:00 PM 39.986730 POINT (-75.22779 39.98673) -75.227790 None 06/28/2016 12:36:57 PM SR-LI27 Vacant House or Commercial 20 Business Days 10808787 Closed 08/20/2016 08:48:56 PM None 32.0 POINT (-75.22779014299999 39.986730004)
1110606 1745 N BAMBREY ST License & Inspections 187.0 09/01/2016 08:00:00 PM 39.982231 POINT (-75.176454 39.982231) -75.176454 None 08/19/2016 12:04:09 PM SR-LI06 Construction Site Task Force 10 Business Days 10896576 Closed 08/20/2016 08:49:01 PM None 10.0 POINT (-75.176453814 39.98223131)
1110607 2814 CAMBRIDGE ST Parks & Recreation 361.0 09/08/2016 08:00:00 PM 39.973699 POINT (-75.182467 39.973699) -75.182467 None 08/20/2016 09:49:48 PM SR-PR08 Street Trees 15 Business Days 10897823 Open 08/20/2016 09:49:56 PM None 31.0 POINT (-75.182467199 39.973699354)
1110608 N 8TH ST & BROWN ST Streets Department 357.0 06/26/2016 08:00:00 PM 39.965519 POINT (-75.151054 39.965519) -75.151054 None 06/21/2016 08:32:17 AM SR-ST02 Illegal Dumping 5 Business Days 10795362 Closed 08/21/2016 06:30:21 AM 19123 12.0 POINT (-75.15105387600001 39.965518579)
1110609 2000 BRIDGE ST Philly311 Contact Center 230.0 08/22/2016 04:00:10 AM 40.015825 POINT (-75.071432 40.015825) -75.071432 None 08/21/2016 07:30:10 AM SR-MI01 Miscellaneous None 10897847 Open 08/21/2016 07:30:18 AM None 13.0 POINT (-75.07143223 40.015824907)
1110610 4951 UNRUH AVE Community Life Improvement Program 318.0 08/29/2016 08:00:00 PM 40.020947 POINT (-75.043656 40.020947) -75.043656 https://d17aqltn7cihbm.cloudfront.net/uploads/... 08/21/2016 07:47:22 AM SR-CL01 Graffiti Removal None 10897850 Closed 08/21/2016 07:47:49 AM None 36.0 POINT (-75.04365569300001 40.020946558)
1110611 1198 S 4TH ST Parks & Recreation 215.0 08/20/2016 08:00:00 PM 39.933411 POINT (-75.151019 39.933411) -75.151019 None 08/21/2016 11:17:13 AM SR-PR01 Parks and Rec Safety and Maintenance 0 0 10897895 Open 08/21/2016 11:17:18 AM None 48.0 POINT (-75.15101909500002 39.933411423)
1110612 2834 S 8TH ST Streets Department 336.0 07/03/2016 08:00:00 PM 39.913396 POINT (-75.162031 39.913396) -75.162031 None 06/20/2016 06:23:38 PM SR-ST04 Street Light Outage 10 Business Days 10795126 Closed 08/21/2016 12:00:30 PM None 22.0 POINT (-75.162030652 39.913395637)
1110613 2814 CAMBRIDGE ST Parks & Recreation 361.0 09/08/2016 08:00:00 PM 39.973699 POINT (-75.182467 39.973699) -75.182467 None 08/21/2016 01:02:10 PM SR-PR08 Street Trees 15 Business Days 10897951 Open 08/21/2016 01:02:16 PM None 31.0 POINT (-75.182467199 39.973699354)
1110614 4507 SPRUCE ST Parks & Recreation 291.0 09/08/2016 08:00:00 PM 39.952934 POINT (-75.2126 39.952934) -75.212600 None 08/21/2016 02:28:11 PM SR-PR08 Street Trees 15 Business Days 10897976 Open 08/21/2016 02:28:18 PM None 40.0 POINT (-75.212599651 39.952934083)
1110615 1045 LUKENS ST Parks & Recreation 292.0 09/08/2016 08:00:00 PM 40.128054 POINT (-75.000268 40.128054) -75.000268 None 08/21/2016 07:57:20 PM SR-PR08 Street Trees 15 Business Days 10898038 Open 08/21/2016 07:57:25 PM None 7.0 POINT (-75.000267529 40.128053663)
1110616 2814 CAMBRIDGE ST Parks & Recreation 361.0 09/08/2016 08:00:00 PM 39.973699 POINT (-75.182467 39.973699) -75.182467 None 08/21/2016 08:04:56 PM SR-PR08 Street Trees 15 Business Days 10898043 Open 08/21/2016 08:05:03 PM None 31.0 POINT (-75.182467199 39.973699354)
1110617 2017 PIERCE ST License & Inspections 217.0 02/24/2016 07:00:00 PM 39.929778 POINT (-75.178886 39.929778) -75.178886 None 02/04/2016 09:05:52 AM SR-LI03 Building Dangerous 15 Business Days 10564918 Open 08/21/2016 08:45:46 PM 19145 46.0 POINT (-75.17888564899999 39.929777858)
1110622 2100 RIDGE AVE Philly311 Contact Center 177.0 08/22/2016 04:00:11 AM 39.977982 POINT (-75.1698 39.977982) -75.169800 None 08/21/2016 11:57:01 PM SR-MI01 Miscellaneous None 10898075 Open 08/21/2016 11:57:28 PM None 10.0 POINT (-75.16979963499998 39.977982414)

277129 rows × 18 columns


In [59]:
print(philly311_gpd[philly311_gpd['Expected Date/Time'].str.contains('2013', na=False)].shape[0])
print(philly311_gpd[philly311_gpd['Expected Date/Time'].str.contains('2014', na=False)].shape[0])
print(philly311_gpd[philly311_gpd['Expected Date/Time'].str.contains('2015', na=False)].shape[0])
print(philly311_gpd[philly311_gpd['Expected Date/Time'].str.contains('2016', na=False)].shape[0])


0
2720
137971
277129

In [60]:
print(philly311_gpd[philly311_gpd['Requested Date/Time'].str.contains('2013', na=False)].shape[0])
print(philly311_gpd[philly311_gpd['Requested Date/Time'].str.contains('2014', na=False)].shape[0])
print(philly311_gpd[philly311_gpd['Requested Date/Time'].str.contains('2015', na=False)].shape[0])
print(philly311_gpd[philly311_gpd['Requested Date/Time'].str.contains('2016', na=False)].shape[0])


0
25903
569889
514831

In [4]:
philly311_gpd = philly311_gpd.drop('Media URL', 1)
philly311_gpd['Zipcode'] = philly311_gpd['Zipcode'].fillna(0)
philly311_gpd['Service Notice'] = philly311_gpd['Service Notice'].fillna('-')

In [17]:
print(sum(isnan(philly311_gpd['Latitude'])==0))
philly311_gpd.info()


329488
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1110623 entries, 0 to 1110622
Data columns (total 17 columns):
Address                      329983 non-null object
Agency Responsible           1095993 non-null object
Census Tracts 2010 - 2013    329282 non-null float64
Expected Date/Time           419829 non-null object
Latitude                     329488 non-null float64
Location                     329488 non-null object
Longitude                    329488 non-null float64
Requested Date/Time          1110623 non-null object
Service Code                 1081747 non-null object
Service Name                 1110623 non-null object
Service Notice               1110623 non-null object
Service Request ID           1110623 non-null int64
Status                       1110623 non-null object
Updated Date/Time            1110623 non-null object
Zipcode                      1110623 non-null object
Zipcodes                     329279 non-null float64
geometry                     329488 non-null object
dtypes: float64(4), int64(1), object(12)
memory usage: 144.0+ MB

In [18]:
philly311_gpd[isnan(philly311_gpd['Latitude'])==0].shape


Out[18]:
(329488, 17)

In [19]:
philly311_gpd[isnan(philly311_gpd['Latitude'])==0].head().T


Out[19]:
0 1 2 3 4
Address 4500 N LAWRENCE ST 4400 N LAWRENCE ST 435 W CAREY ST 3255 N 11TH ST 6500 W WALNUT PARK DR
Agency Responsible Streets Department Streets Department Streets Department Streets Department Streets Department
Census Tracts 2010 - 2013 65 337 337 196 304
Expected Date/Time 08/21/2016 08:00:00 PM 08/21/2016 08:00:00 PM 08/21/2016 08:00:00 PM 08/24/2016 08:00:00 PM 08/21/2016 08:00:00 PM
Latitude 40.0196 40.0181 40.0084 40.0021 40.0501
Location POINT (-75.133247 40.019631) POINT (-75.133589 40.01808) POINT (-75.135892 40.008402) POINT (-75.147295 40.002129) POINT (-75.120826 40.050084)
Longitude -75.1332 -75.1336 -75.1359 -75.1473 -75.1208
Requested Date/Time 08/18/2016 06:32:14 PM 08/18/2016 06:35:32 PM 08/18/2016 09:43:32 AM 08/18/2016 10:19:40 AM 08/18/2016 12:36:47 PM
Service Code SR-ST03 SR-ST03 SR-ST03 SR-ST02 SR-ST03
Service Name Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Illegal Dumping Rubbish/Recyclable Material Collection
Service Notice 2 Business Days 2 Business Days 2 Business Days 5 Business Days 2 Business Days
Service Request ID 10895664 10895669 10894010 10894151 10894702
Status Closed Closed Closed Closed Closed
Updated Date/Time 08/22/2016 06:15:23 AM 08/22/2016 06:15:26 AM 08/22/2016 06:15:29 AM 08/22/2016 06:15:32 AM 08/22/2016 06:15:35 AM
Zipcode 19140 19140 0 19140 0
Zipcodes 41 41 41 41 9
geometry POINT (-75.13324674099999 40.019630884) POINT (-75.13358943099999 40.018080355) POINT (-75.135892102 40.008401585) POINT (-75.14729510799999 40.002128888) POINT (-75.12082581 40.050084238)

In [62]:
philly311 = philly311_gpd[isnan(philly311_gpd['Latitude'])==0]

In [63]:
print(philly311[philly311['Expected Date/Time'].str.contains('2013', na=False)].shape[0])
print(philly311[philly311['Expected Date/Time'].str.contains('2014', na=False)].shape[0])
print(philly311[philly311['Expected Date/Time'].str.contains('2015', na=False)].shape[0])
print(philly311[philly311['Expected Date/Time'].str.contains('2016', na=False)].shape[0])


0
2715
137705
155087

In [64]:
print(philly311[philly311['Requested Date/Time'].str.contains('2013', na=False)].shape[0])
print(philly311[philly311['Requested Date/Time'].str.contains('2014', na=False)].shape[0])
print(philly311[philly311['Requested Date/Time'].str.contains('2015', na=False)].shape[0])
print(philly311[philly311['Requested Date/Time'].str.contains('2016', na=False)].shape[0])


0
6103
154287
169098

In [16]:
philly_seg_buffer_gp = street_gpd[['SEG_ID','STCL2_ID','geometry']].copy()
philly_seg_buffer_gp.geometry = philly_seg_buffer_gp.buffer(0.0001)
philly_311_buffer_gpd = philly311[['Agency Responsible','Service Request ID', 'geometry','Service Name']].copy()
philly_311_buffer_gpd.geometry = philly_311_buffer_gpd.buffer(0.0001)
joined = sjoin(philly_311_buffer_gpd, philly_seg_buffer_gp, how='left', op='intersects')


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-7f45ab8e82f0> in <module>()
      1 philly_seg_buffer_gp = street_gpd[['SEG_ID','STCL2_ID','geometry', 'SHAPE_LEN']].copy()
      2 philly_seg_buffer_gp.geometry = philly_seg_buffer_gp.buffer(0.0001)
----> 3 philly_311_buffer_gpd = philly311[['Agency Responsible','Service Request ID', 'geometry','Service Name']].copy()
      4 philly_311_buffer_gpd.geometry = philly_311_buffer_gpd.buffer(0.0001)
      5 joined = sjoin(philly_311_buffer_gpd, philly_seg_buffer_gp, how='left', op='intersects')

NameError: name 'philly311' is not defined

In [25]:
join_gb = joined.reset_index().groupby('Service Request ID').count()
print(join_gb[join_gb.SEG_ID==0].shape[0], ' points spatially match no segment')
print(join_gb[join_gb.SEG_ID==1].shape[0], ' points spatially match 1 segment')
print(join_gb[join_gb.SEG_ID>1].shape[0], ' points spatially match multiple segments')


420  points spatially match no segment
202092  points spatially match 1 segment
126976  points spatially match multiple segments

In [26]:
joined.head().T


Out[26]:
0 0 0 0 1
Agency Responsible Streets Department Streets Department Streets Department Streets Department Streets Department
Service Request ID 10895664 10895664 10895664 10895664 10895669
geometry POLYGON ((-75.13314674099999 40.019630884, -75... POLYGON ((-75.13314674099999 40.019630884, -75... POLYGON ((-75.13314674099999 40.019630884, -75... POLYGON ((-75.13314674099999 40.019630884, -75... POLYGON ((-75.13348943099999 40.018080355, -75...
Service Name Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection
index_right 3113 3119 3117 3125 8721
SEG_ID 640566 640606 640553 640552 640725
STCL2_ID 4186 4226 4173 4172 4345

In [4]:
philly311_gpd = philly311_gpd.drop('Media URL', 1)
philly311_gpd['Zipcode'] = philly311_gpd['Zipcode'].fillna(0)
philly311_gpd['Service Notice'] = philly311_gpd['Service Notice'].fillna('-')
philly_311_filtered = philly311_gpd[philly311_gpd['Requested Date/Time'].str.contains('2015|2016', na=False)]

In [5]:
philly_311_filtered = philly311_gpd[philly311_gpd['Requested Date/Time'].str.contains('2015|2016', na=False)]

In [32]:
philly_311_filtered[pd.isnull(philly_311_filtered['Latitude'])==0].to_csv('../philly/311_Requests_2015-16.csv')

In [21]:
print(philly_311_filtered.shape)
print(philly_311_filtered[isnan(philly_311_filtered['Latitude'])==0].shape)


(1084720, 17)
(323385, 17)

In [5]:
philly311_filtered = philly_311_filtered[isnan(philly_311_filtered['Latitude'])==0]
philly311_filtered.head().T


Out[5]:
0 1 2 3 4
Address 4500 N LAWRENCE ST 4400 N LAWRENCE ST 435 W CAREY ST 3255 N 11TH ST 6500 W WALNUT PARK DR
Agency Responsible Streets Department Streets Department Streets Department Streets Department Streets Department
Census Tracts 2010 - 2013 65 337 337 196 304
Expected Date/Time 08/21/2016 08:00:00 PM 08/21/2016 08:00:00 PM 08/21/2016 08:00:00 PM 08/24/2016 08:00:00 PM 08/21/2016 08:00:00 PM
Latitude 40.0196 40.0181 40.0084 40.0021 40.0501
Location POINT (-75.133247 40.019631) POINT (-75.133589 40.01808) POINT (-75.135892 40.008402) POINT (-75.147295 40.002129) POINT (-75.120826 40.050084)
Longitude -75.1332 -75.1336 -75.1359 -75.1473 -75.1208
Requested Date/Time 08/18/2016 06:32:14 PM 08/18/2016 06:35:32 PM 08/18/2016 09:43:32 AM 08/18/2016 10:19:40 AM 08/18/2016 12:36:47 PM
Service Code SR-ST03 SR-ST03 SR-ST03 SR-ST02 SR-ST03
Service Name Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Illegal Dumping Rubbish/Recyclable Material Collection
Service Notice 2 Business Days 2 Business Days 2 Business Days 5 Business Days 2 Business Days
Service Request ID 10895664 10895669 10894010 10894151 10894702
Status Closed Closed Closed Closed Closed
Updated Date/Time 08/22/2016 06:15:23 AM 08/22/2016 06:15:26 AM 08/22/2016 06:15:29 AM 08/22/2016 06:15:32 AM 08/22/2016 06:15:35 AM
Zipcode 19140 19140 0 19140 0
Zipcodes 41 41 41 41 9
geometry POINT (-75.13324674099999 40.019630884) POINT (-75.13358943099999 40.018080355) POINT (-75.135892102 40.008401585) POINT (-75.14729510799999 40.002128888) POINT (-75.12082581 40.050084238)

In [23]:
philly311_filtered.shape


Out[23]:
(323385, 17)

In [10]:
philly_311_filtered.head().T


Out[10]:
0 1 2 3 4
Address 4500 N LAWRENCE ST 4400 N LAWRENCE ST 435 W CAREY ST 3255 N 11TH ST 6500 W WALNUT PARK DR
Agency Responsible Streets Department Streets Department Streets Department Streets Department Streets Department
Census Tracts 2010 - 2013 65 337 337 196 304
Expected Date/Time 08/21/2016 08:00:00 PM 08/21/2016 08:00:00 PM 08/21/2016 08:00:00 PM 08/24/2016 08:00:00 PM 08/21/2016 08:00:00 PM
Latitude 40.0196 40.0181 40.0084 40.0021 40.0501
Location POINT (-75.133247 40.019631) POINT (-75.133589 40.01808) POINT (-75.135892 40.008402) POINT (-75.147295 40.002129) POINT (-75.120826 40.050084)
Longitude -75.1332 -75.1336 -75.1359 -75.1473 -75.1208
Requested Date/Time 08/18/2016 06:32:14 PM 08/18/2016 06:35:32 PM 08/18/2016 09:43:32 AM 08/18/2016 10:19:40 AM 08/18/2016 12:36:47 PM
Service Code SR-ST03 SR-ST03 SR-ST03 SR-ST02 SR-ST03
Service Name Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Illegal Dumping Rubbish/Recyclable Material Collection
Service Notice 2 Business Days 2 Business Days 2 Business Days 5 Business Days 2 Business Days
Service Request ID 10895664 10895669 10894010 10894151 10894702
Status Closed Closed Closed Closed Closed
Updated Date/Time 08/22/2016 06:15:23 AM 08/22/2016 06:15:26 AM 08/22/2016 06:15:29 AM 08/22/2016 06:15:32 AM 08/22/2016 06:15:35 AM
Zipcode 19140 19140 0 19140 0
Zipcodes 41 41 41 41 9
geometry POINT (-75.13324674099999 40.019630884) POINT (-75.13358943099999 40.018080355) POINT (-75.135892102 40.008401585) POINT (-75.14729510799999 40.002128888) POINT (-75.12082581 40.050084238)

In [6]:
philly_seg_buffer_gp = street_gpd[['SEG_ID','STCL2_ID','geometry', 'SHAPE_LEN']].copy()
philly_seg_buffer_gp.geometry = philly_seg_buffer_gp.buffer(0.0001)
philly_311_filtered_buffer_gpd = philly311_filtered[['Service Request ID', 'geometry','Service Name', 'Requested Date/Time']].dropna().copy()
philly_311_filtered_buffer_gpd.geometry = philly_311_filtered_buffer_gpd.buffer(0.0001)
joined_filtered = sjoin(philly_311_filtered_buffer_gpd, philly_seg_buffer_gp, how='left', op='intersects')

In [25]:
gb = joined_filtered.reset_index().groupby('SEG_ID').count()

print(gb[gb['Service Request ID']==0].shape[0], ' points spatially match no segment')
print(gb[gb['Service Request ID']==1].shape[0], ' points spatially match 1 segment')
print(gb[gb['Service Request ID']>1].shape[0], ' points spatially match multiple segments')


0  points spatially match no segment
1610  points spatially match 1 segment
37278  points spatially match multiple segments

In [27]:
print(joined_filtered.shape)
joined_filtered.head().T


(649764, 8)
Out[27]:
0 0 0 0 1
Service Request ID 10895664 10895664 10895664 10895664 10895669
geometry POLYGON ((-75.13314674099999 40.019630884, -75... POLYGON ((-75.13314674099999 40.019630884, -75... POLYGON ((-75.13314674099999 40.019630884, -75... POLYGON ((-75.13314674099999 40.019630884, -75... POLYGON ((-75.13348943099999 40.018080355, -75...
Service Name Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection
Requested Date/Time 08/18/2016 06:32:14 PM 08/18/2016 06:32:14 PM 08/18/2016 06:32:14 PM 08/18/2016 06:32:14 PM 08/18/2016 06:35:32 PM
index_right 3113 3119 3117 3125 8721
SEG_ID 640566 640606 640553 640552 640725
STCL2_ID 4186 4226 4173 4172 4345
SHAPE_LEN 262.761 140.018 266.558 274.852 166.097

In [27]:
unique_agencies = set()
for agency in joined['Agency Responsible']:
    unique_agencies.add(agency)
print(len(unique_agencies), unique_agencies)


9 {'Streets Department', 'Parks & Recreation', 'Water Department (PWD)', 'Philly311 Contact Center', 'Fire Department', None, 'License & Inspections', 'Community Life Improvement Program', 'Police Department'}

In [28]:
unique_services = set()
for service in joined['Service Name']:
    unique_services.add(service)
print(len(unique_services), unique_services)


51 {'Building Construction', 'Line Striping', 'Traffic (Other)', 'Zoning Business', 'Stop Sign Repair', 'Inlet Cleaning', 'Parks and Rec Safety and Maintenance', 'Abandoned Bike', 'Newsstand/Outdoor Café', 'Dangerous Sidewalk', 'Zoning Residential', 'Miscellaneous', 'Daycare Residential or Commercial', 'Street Trees', 'Hydrant Request', 'Graffiti Removal', 'Sanitation / Dumpster Violation', 'Manhole Cover', 'Maintenance Residential or Commercial', 'Illegal Dumping', 'Building Dangerous', 'Police Complaint', 'Fire Residential or Commercial', 'Abandoned Vehicle', 'Complaint (Streets)', 'Boarding Room House', 'Smoke Detector', 'Other Dangerous', 'Tree Dangerous', 'Vacant Lot Clean-Up', 'Information Request', 'Street Defect', 'Rubbish/Recyclable Material Collection', 'License Residential', 'Newsstand Outdoor Cafe', 'Complaints against Fire or EMS', 'No Heat Residential', 'Traffic Signal Emergency', 'Emergency Air Conditioning', 'Other (Streets)', 'Dead Animal in Street', 'Construction Site Task Force', 'Alley Light Outage', 'Shoveling', 'Hydrant Knocked Down (No Water)', 'Street Paving', 'Vacant House or Commercial', 'Salting', 'Street Light Outage', 'Infestation Residential', 'No Heat (Residential)'}

In [7]:
unique_services = set()
for service in joined_filtered['Service Name']:
    unique_services.add(service)
print(len(unique_services), unique_services)


51 {'Police Complaint', 'Shoveling', 'No Heat (Residential)', 'Street Trees', 'Stop Sign Repair', 'Infestation Residential', 'Street Paving', 'Traffic (Other)', 'Manhole Cover', 'Line Striping', 'Street Defect', 'Illegal Dumping', 'Vacant Lot Clean-Up', 'Other Dangerous', 'Street Light Outage', 'Other (Streets)', 'No Heat Residential', 'Newsstand/Outdoor Café', 'Dangerous Sidewalk', 'Zoning Residential', 'Information Request', 'Miscellaneous', 'Abandoned Vehicle', 'Hydrant Request', 'Building Construction', 'Complaint (Streets)', 'Newsstand Outdoor Cafe', 'Alley Light Outage', 'Zoning Business', 'Traffic Signal Emergency', 'Hydrant Knocked Down (No Water)', 'Salting', 'Complaints against Fire or EMS', 'Fire Residential or Commercial', 'Emergency Air Conditioning', 'Construction Site Task Force', 'Sanitation / Dumpster Violation', 'Parks and Rec Safety and Maintenance', 'Boarding Room House', 'Maintenance Residential or Commercial', 'Abandoned Bike', 'License Residential', 'Daycare Residential or Commercial', 'Inlet Cleaning', 'Rubbish/Recyclable Material Collection', 'Vacant House or Commercial', 'Building Dangerous', 'Graffiti Removal', 'Dead Animal in Street', 'Tree Dangerous', 'Smoke Detector'}

In [32]:
philly_311_statistic = joined[['SEG_ID', 'Agency Responsible', 'Service Name']].dropna()

In [33]:
print(philly_311_statistic.shape)
philly_311_statistic.head().T


(661603, 3)
Out[33]:
0 0 0 0 1
SEG_ID 640566 640606 640553 640552 640725
Agency Responsible Streets Department Streets Department Streets Department Streets Department Streets Department
Service Name Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection

In [8]:
philly_311_filtered_statistic = joined_filtered[['SEG_ID', 'Service Name', 'Requested Date/Time', 'SHAPE_LEN']]
print(philly_311_filtered_statistic.shape)
philly_311_filtered_statistic.head().T


(649764, 4)
Out[8]:
0 0 0 0 1
SEG_ID 640566 640606 640553 640552 640725
Service Name Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection
Requested Date/Time 08/18/2016 06:32:14 PM 08/18/2016 06:32:14 PM 08/18/2016 06:32:14 PM 08/18/2016 06:32:14 PM 08/18/2016 06:35:32 PM
SHAPE_LEN 262.761 140.018 266.558 274.852 166.097

In [31]:
philly_311_filtered_statistic.drop_duplicates().shape


Out[31]:
(649144, 4)

In [34]:
philly_311_filtered_statistic.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 649764 entries, 0 to 1110622
Data columns (total 4 columns):
SEG_ID                 649350 non-null float64
Service Name           649764 non-null object
Requested Date/Time    649764 non-null object
SHAPE_LEN              649350 non-null float64
dtypes: float64(2), object(2)
memory usage: 24.8+ MB

In [9]:
from pandas.tseries.resample import TimeGrouper

In [10]:
philly_311_filtered_statistic['Requested Date/Time'] = pd.DatetimeIndex(philly_311_filtered_statistic['Requested Date/Time'])
philly_311_filtered_statistic.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 649764 entries, 0 to 1110622
Data columns (total 4 columns):
SEG_ID                 649350 non-null float64
Service Name           649764 non-null object
Requested Date/Time    649764 non-null datetime64[ns]
SHAPE_LEN              649350 non-null float64
dtypes: datetime64[ns](1), float64(2), object(1)
memory usage: 24.8+ MB
//anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [45]:
philly_311_filtered_statistic.head().T


Out[45]:
0 0 0 0 1
SEG_ID 640566 640606 640553 640552 640725
Service Name Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection Rubbish/Recyclable Material Collection
Requested Date/Time 2016-08-18 18:32:14 2016-08-18 18:32:14 2016-08-18 18:32:14 2016-08-18 18:32:14 2016-08-18 18:35:32
SHAPE_LEN 262.761 140.018 266.558 274.852 166.097

In [47]:
philly_311_filtered_statistic['Requested Date/Time'].dt.month


Out[47]:
0           8
0           8
0           8
0           8
1           8
1           8
1           8
1           8
2           8
3           8
4           8
5           8
6           8
7           9
7           9
7           9
7           9
8           8
8           8
8           8
9           8
9           8
9           8
10          8
11          8
11          8
11          8
11          8
12          8
13          8
           ..
1110606     8
1110607     8
1110608     6
1110608     6
1110608     6
1110608     6
1110609     8
1110609     8
1110609     8
1110609     8
1110610     8
1110611     8
1110611     8
1110611     8
1110611     8
1110612     6
1110613     8
1110614     8
1110615     8
1110616     8
1110617     2
1110618     6
1110618     6
1110618     6
1110620    10
1110621    11
1110622     8
1110622     8
1110622     8
1110622     8
Name: Requested Date/Time, dtype: int64

In [34]:
philly_311_statistic.groupby(['SEG_ID', 'Agency Responsible', 'Service Name']).size().reset_index()


Out[34]:
SEG_ID Agency Responsible Service Name 0
0 100006.0 Community Life Improvement Program Vacant Lot Clean-Up 1
1 100006.0 License & Inspections Maintenance Residential or Commercial 2
2 100006.0 Police Department Abandoned Vehicle 1
3 100006.0 Streets Department Illegal Dumping 6
4 100007.0 Streets Department Illegal Dumping 2
5 100007.0 Streets Department Rubbish/Recyclable Material Collection 1
6 100008.0 Community Life Improvement Program Vacant Lot Clean-Up 1
7 100008.0 Streets Department Illegal Dumping 5
8 100008.0 Streets Department Salting 1
9 100008.0 Streets Department Street Light Outage 6
10 100009.0 Police Department Abandoned Vehicle 1
11 100009.0 Streets Department Illegal Dumping 1
12 100009.0 Streets Department Street Defect 2
13 100011.0 Streets Department Illegal Dumping 6
14 100012.0 Community Life Improvement Program Vacant Lot Clean-Up 1
15 100012.0 License & Inspections Vacant House or Commercial 1
16 100012.0 License & Inspections Zoning Business 1
17 100012.0 Streets Department Illegal Dumping 3
18 100012.0 Streets Department Street Light Outage 1
19 100015.0 Community Life Improvement Program Vacant Lot Clean-Up 3
20 100015.0 Fire Department Smoke Detector 1
21 100015.0 Parks & Recreation Street Trees 1
22 100015.0 Streets Department Illegal Dumping 2
23 100015.0 Streets Department Rubbish/Recyclable Material Collection 1
24 100016.0 License & Inspections Maintenance Residential or Commercial 1
25 100016.0 Streets Department Illegal Dumping 5
26 100016.0 Streets Department Stop Sign Repair 1
27 100020.0 Parks & Recreation Parks and Rec Safety and Maintenance 1
28 100020.0 Parks & Recreation Street Trees 2
29 100020.0 Streets Department Illegal Dumping 1
... ... ... ... ...
285023 1180077.0 Streets Department Street Light Outage 1
285024 1180077.0 Streets Department Traffic (Other) 1
285025 1180080.0 Streets Department Street Defect 2
285026 1180080.0 Streets Department Street Light Outage 2
285027 1180081.0 Parks & Recreation Street Trees 2
285028 1180081.0 Police Department Abandoned Vehicle 1
285029 1180081.0 Streets Department Dangerous Sidewalk 1
285030 1180081.0 Streets Department Illegal Dumping 1
285031 1180081.0 Streets Department Salting 2
285032 1180081.0 Streets Department Street Light Outage 2
285033 1180082.0 License & Inspections Construction Site Task Force 1
285034 1180082.0 Parks & Recreation Parks and Rec Safety and Maintenance 1
285035 1180082.0 Police Department Abandoned Vehicle 3
285036 1180082.0 Streets Department Illegal Dumping 1
285037 1180082.0 Streets Department Rubbish/Recyclable Material Collection 1
285038 1180082.0 Streets Department Salting 1
285039 1180082.0 Streets Department Street Light Outage 2
285040 1180083.0 Police Department Abandoned Vehicle 1
285041 1180085.0 License & Inspections Maintenance Residential or Commercial 2
285042 1180094.0 Streets Department Rubbish/Recyclable Material Collection 1
285043 1180094.0 Streets Department Street Light Outage 1
285044 1180097.0 Streets Department Rubbish/Recyclable Material Collection 1
285045 1180098.0 Streets Department Rubbish/Recyclable Material Collection 1
285046 1180100.0 Streets Department Street Light Outage 4
285047 1180101.0 Streets Department Rubbish/Recyclable Material Collection 1
285048 1180101.0 Streets Department Street Light Outage 4
285049 1180102.0 Streets Department Rubbish/Recyclable Material Collection 2
285050 1180102.0 Streets Department Street Light Outage 4
285051 1180103.0 Streets Department Rubbish/Recyclable Material Collection 2
285052 1180104.0 Streets Department Rubbish/Recyclable Material Collection 2

285053 rows × 4 columns


In [35]:
philly_311_statistic.groupby(['Agency Responsible', 'Service Name']).size().reset_index()


Out[35]:
Agency Responsible Service Name 0
0 Community Life Improvement Program Building Dangerous 1
1 Community Life Improvement Program Graffiti Removal 48511
2 Community Life Improvement Program Illegal Dumping 1
3 Community Life Improvement Program License Residential 1
4 Community Life Improvement Program Maintenance Residential or Commercial 1
5 Community Life Improvement Program Rubbish/Recyclable Material Collection 1
6 Community Life Improvement Program Sanitation / Dumpster Violation 1
7 Community Life Improvement Program Street Defect 7
8 Community Life Improvement Program Vacant Lot Clean-Up 23014
9 Fire Department Complaints against Fire or EMS 266
10 Fire Department Smoke Detector 8151
11 License & Inspections Alley Light Outage 2
12 License & Inspections Boarding Room House 1951
13 License & Inspections Building Construction 7749
14 License & Inspections Building Dangerous 9535
15 License & Inspections Complaint (Streets) 1
16 License & Inspections Construction Site Task Force 12169
17 License & Inspections Dangerous Sidewalk 5
18 License & Inspections Daycare Residential or Commercial 170
19 License & Inspections Dead Animal in Street 4
20 License & Inspections Emergency Air Conditioning 2
21 License & Inspections Fire Residential or Commercial 4140
22 License & Inspections Illegal Dumping 70
23 License & Inspections Infestation Residential 2515
24 License & Inspections Information Request 63
25 License & Inspections Inlet Cleaning 6
26 License & Inspections License Residential 3922
27 License & Inspections Maintenance Residential or Commercial 75609
28 License & Inspections No Heat (Residential) 4111
29 License & Inspections No Heat Residential 16
... ... ... ...
51 Philly311 Contact Center Street Light Outage 3
52 Police Department Abandoned Vehicle 47607
53 Police Department Information Request 37
54 Police Department Police Complaint 645
55 Streets Department Abandoned Bike 717
56 Streets Department Alley Light Outage 4717
57 Streets Department Complaint (Streets) 5368
58 Streets Department Dangerous Sidewalk 5376
59 Streets Department Dead Animal in Street 3960
60 Streets Department Illegal Dumping 66562
61 Streets Department Information Request 65
62 Streets Department Line Striping 1684
63 Streets Department Manhole Cover 2393
64 Streets Department Newsstand Outdoor Cafe 251
65 Streets Department Newsstand/Outdoor Café 11
66 Streets Department Other (Streets) 10210
67 Streets Department Rubbish/Recyclable Material Collection 65723
68 Streets Department Salting 29585
69 Streets Department Sanitation / Dumpster Violation 14832
70 Streets Department Shoveling 3604
71 Streets Department Stop Sign Repair 5622
72 Streets Department Street Defect 51286
73 Streets Department Street Light Outage 27539
74 Streets Department Street Paving 2453
75 Streets Department Traffic (Other) 7139
76 Streets Department Traffic Signal Emergency 28414
77 Water Department (PWD) Hydrant Knocked Down (No Water) 97
78 Water Department (PWD) Hydrant Request 1843
79 Water Department (PWD) Information Request 103
80 Water Department (PWD) Inlet Cleaning 8139

81 rows × 3 columns


In [11]:
philly_311_filtered_typemerge = philly_311_filtered_statistic.groupby(['SEG_ID', philly_311_filtered_statistic['Requested Date/Time'].dt.month, 'Service Name']).size().reset_index()

In [12]:
philly_311_filtered_typemerge.columns = ['SEG_ID', 'Month', 'Service Name', 'count']
print(philly_311_filtered_typemerge.shape)
philly_311_filtered_typemerge.head().T


(479157, 4)
Out[12]:
0 1 2 3 4
SEG_ID 100006 100006 100006 100006 100006
Month 2 3 4 5 6
Service Name Illegal Dumping Illegal Dumping Illegal Dumping Maintenance Residential or Commercial Vacant Lot Clean-Up
count 1 3 1 1 1

In [16]:
philly_311_filtered_details = philly_311_filtered_statistic.groupby(['SEG_ID', philly_311_filtered_statistic['Requested Date/Time'].dt.month, 'SHAPE_LEN']).size().reset_index()

In [17]:
philly_311_filtered_details.columns = ['SEG_ID', 'Month', 'SHAPE_LEN', 'monthly_311_request_count']
print(philly_311_filtered_details.shape)
philly_311_filtered_details.head().T


(259881, 4)
Out[17]:
0 1 2 3 4
SEG_ID 100006.000000 100006.000000 100006.000000 100006.000000 100006.000000
Month 2.000000 3.000000 4.000000 5.000000 6.000000
SHAPE_LEN 735.818883 735.818883 735.818883 735.818883 735.818883
monthly_311_request_count 1.000000 3.000000 1.000000 1.000000 1.000000

In [18]:
philly_311_filtered_details['monthly_311_request_count/len'] = philly_311_filtered_details['monthly_311_request_count']/philly_311_filtered_details['SHAPE_LEN']

In [19]:
print(philly_311_filtered_details.shape)
philly_311_filtered_details.head().T


(259881, 5)
Out[19]:
0 1 2 3 4
SEG_ID 100006.000000 100006.000000 100006.000000 100006.000000 100006.000000
Month 2.000000 3.000000 4.000000 5.000000 6.000000
SHAPE_LEN 735.818883 735.818883 735.818883 735.818883 735.818883
monthly_311_request_count 1.000000 3.000000 1.000000 1.000000 1.000000
monthly_311_request_count/len 0.001359 0.004077 0.001359 0.001359 0.001359

In [20]:
for service in unique_services:
    if(service is not None):
        service_data = philly_311_filtered_typemerge[philly_311_filtered_typemerge['Service Name']==service]
        service_count = service + '_count'
        service_count_len = service_count + '/len'
        service_data_count = service_data[['SEG_ID', 'Month', 'count']]
        service_data_count.columns = ['SEG_ID', 'Month', service_count]
        philly_311_filtered_details = philly_311_filtered_details.merge(service_data_count, left_on = ['SEG_ID', 'Month'], right_on = ['SEG_ID', 'Month'], right_index=False, how='left')
        philly_311_filtered_details[service_count_len] = philly_311_filtered_details[service_count]/philly_311_filtered_details['SHAPE_LEN']

In [22]:
philly_311_filtered_details = philly_311_filtered_details.fillna(0)
print(philly_311_filtered_details.shape)
philly_311_filtered_details.head().T


(259881, 107)
Out[22]:
0 1 2 3 4
SEG_ID 100006.000000 100006.000000 100006.000000 100006.000000 100006.000000
Month 2.000000 3.000000 4.000000 5.000000 6.000000
SHAPE_LEN 735.818883 735.818883 735.818883 735.818883 735.818883
monthly_311_request_count 1.000000 3.000000 1.000000 1.000000 1.000000
monthly_311_request_count/len 0.001359 0.004077 0.001359 0.001359 0.001359
Police Complaint_count 0.000000 0.000000 0.000000 0.000000 0.000000
Police Complaint_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Shoveling_count 0.000000 0.000000 0.000000 0.000000 0.000000
Shoveling_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
No Heat (Residential)_count 0.000000 0.000000 0.000000 0.000000 0.000000
No Heat (Residential)_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Street Trees_count 0.000000 0.000000 0.000000 0.000000 0.000000
Street Trees_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Stop Sign Repair_count 0.000000 0.000000 0.000000 0.000000 0.000000
Stop Sign Repair_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Infestation Residential_count 0.000000 0.000000 0.000000 0.000000 0.000000
Infestation Residential_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Street Paving_count 0.000000 0.000000 0.000000 0.000000 0.000000
Street Paving_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Traffic (Other)_count 0.000000 0.000000 0.000000 0.000000 0.000000
Traffic (Other)_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Manhole Cover_count 0.000000 0.000000 0.000000 0.000000 0.000000
Manhole Cover_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Line Striping_count 0.000000 0.000000 0.000000 0.000000 0.000000
Line Striping_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Street Defect_count 0.000000 0.000000 0.000000 0.000000 0.000000
Street Defect_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Illegal Dumping_count 1.000000 3.000000 1.000000 0.000000 0.000000
Illegal Dumping_count/len 0.001359 0.004077 0.001359 0.000000 0.000000
Vacant Lot Clean-Up_count 0.000000 0.000000 0.000000 0.000000 1.000000
... ... ... ... ... ...
Sanitation / Dumpster Violation_count 0.000000 0.000000 0.000000 0.000000 0.000000
Sanitation / Dumpster Violation_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Parks and Rec Safety and Maintenance_count 0.000000 0.000000 0.000000 0.000000 0.000000
Parks and Rec Safety and Maintenance_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Boarding Room House_count 0.000000 0.000000 0.000000 0.000000 0.000000
Boarding Room House_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Maintenance Residential or Commercial_count 0.000000 0.000000 0.000000 1.000000 0.000000
Maintenance Residential or Commercial_count/len 0.000000 0.000000 0.000000 0.001359 0.000000
Abandoned Bike_count 0.000000 0.000000 0.000000 0.000000 0.000000
Abandoned Bike_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
License Residential_count 0.000000 0.000000 0.000000 0.000000 0.000000
License Residential_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Daycare Residential or Commercial_count 0.000000 0.000000 0.000000 0.000000 0.000000
Daycare Residential or Commercial_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Inlet Cleaning_count 0.000000 0.000000 0.000000 0.000000 0.000000
Inlet Cleaning_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Rubbish/Recyclable Material Collection_count 0.000000 0.000000 0.000000 0.000000 0.000000
Rubbish/Recyclable Material Collection_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Vacant House or Commercial_count 0.000000 0.000000 0.000000 0.000000 0.000000
Vacant House or Commercial_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Building Dangerous_count 0.000000 0.000000 0.000000 0.000000 0.000000
Building Dangerous_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Graffiti Removal_count 0.000000 0.000000 0.000000 0.000000 0.000000
Graffiti Removal_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Dead Animal in Street_count 0.000000 0.000000 0.000000 0.000000 0.000000
Dead Animal in Street_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Tree Dangerous_count 0.000000 0.000000 0.000000 0.000000 0.000000
Tree Dangerous_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Smoke Detector_count 0.000000 0.000000 0.000000 0.000000 0.000000
Smoke Detector_count/len 0.000000 0.000000 0.000000 0.000000 0.000000

107 rows × 5 columns


In [23]:
philly_311_filtered_details.drop_duplicates().shape


Out[23]:
(259881, 107)

In [26]:
from statsmodels import robust

In [27]:
for column in philly_311_filtered_details.columns:
    if('_count' in column):
        print(column, robust.mad(philly_311_filtered_details[column]))


monthly_311_request_count 1.48260221851
monthly_311_request_count/len 0.00493050307814
Police Complaint_count 0.0
Police Complaint_count/len 0.0
Shoveling_count 0.0
Shoveling_count/len 0.0
No Heat (Residential)_count 0.0
No Heat (Residential)_count/len 0.0
Street Trees_count 0.0
Street Trees_count/len 0.0
Stop Sign Repair_count 0.0
Stop Sign Repair_count/len 0.0
Infestation Residential_count 0.0
Infestation Residential_count/len 0.0
Street Paving_count 0.0
Street Paving_count/len 0.0
Traffic (Other)_count 0.0
Traffic (Other)_count/len 0.0
Manhole Cover_count 0.0
Manhole Cover_count/len 0.0
Line Striping_count 0.0
Line Striping_count/len 0.0
Street Defect_count 0.0
Street Defect_count/len 0.0
Illegal Dumping_count 0.0
Illegal Dumping_count/len 0.0
Vacant Lot Clean-Up_count 0.0
Vacant Lot Clean-Up_count/len 0.0
Other Dangerous_count 0.0
Other Dangerous_count/len 0.0
Street Light Outage_count 0.0
Street Light Outage_count/len 0.0
Other (Streets)_count 0.0
Other (Streets)_count/len 0.0
No Heat Residential_count 0.0
No Heat Residential_count/len 0.0
Newsstand/Outdoor Café_count 0.0
Newsstand/Outdoor Café_count/len 0.0
Dangerous Sidewalk_count 0.0
Dangerous Sidewalk_count/len 0.0
Zoning Residential_count 0.0
Zoning Residential_count/len 0.0
Information Request_count 0.0
Information Request_count/len 0.0
Miscellaneous_count 0.0
Miscellaneous_count/len 0.0
Abandoned Vehicle_count 0.0
Abandoned Vehicle_count/len 0.0
Hydrant Request_count 0.0
Hydrant Request_count/len 0.0
Building Construction_count 0.0
Building Construction_count/len 0.0
Complaint (Streets)_count 0.0
Complaint (Streets)_count/len 0.0
Newsstand Outdoor Cafe_count 0.0
Newsstand Outdoor Cafe_count/len 0.0
Alley Light Outage_count 0.0
Alley Light Outage_count/len 0.0
Zoning Business_count 0.0
Zoning Business_count/len 0.0
Traffic Signal Emergency_count 0.0
Traffic Signal Emergency_count/len 0.0
Hydrant Knocked Down (No Water)_count 0.0
Hydrant Knocked Down (No Water)_count/len 0.0
Salting_count 0.0
Salting_count/len 0.0
Complaints against Fire or EMS_count 0.0
Complaints against Fire or EMS_count/len 0.0
Fire Residential or Commercial_count 0.0
Fire Residential or Commercial_count/len 0.0
Emergency Air Conditioning_count 0.0
Emergency Air Conditioning_count/len 0.0
Construction Site Task Force_count 0.0
Construction Site Task Force_count/len 0.0
Sanitation / Dumpster Violation_count 0.0
Sanitation / Dumpster Violation_count/len 0.0
Parks and Rec Safety and Maintenance_count 0.0
Parks and Rec Safety and Maintenance_count/len 0.0
Boarding Room House_count 0.0
Boarding Room House_count/len 0.0
Maintenance Residential or Commercial_count 0.0
Maintenance Residential or Commercial_count/len 0.0
Abandoned Bike_count 0.0
Abandoned Bike_count/len 0.0
License Residential_count 0.0
License Residential_count/len 0.0
Daycare Residential or Commercial_count 0.0
Daycare Residential or Commercial_count/len 0.0
Inlet Cleaning_count 0.0
Inlet Cleaning_count/len 0.0
Rubbish/Recyclable Material Collection_count 0.0
Rubbish/Recyclable Material Collection_count/len 0.0
Vacant House or Commercial_count 0.0
Vacant House or Commercial_count/len 0.0
Building Dangerous_count 0.0
Building Dangerous_count/len 0.0
Graffiti Removal_count 0.0
Graffiti Removal_count/len 0.0
Dead Animal in Street_count 0.0
Dead Animal in Street_count/len 0.0
Tree Dangerous_count 0.0
Tree Dangerous_count/len 0.0
Smoke Detector_count 0.0
Smoke Detector_count/len 0.0

In [42]:
philly_311_filtered_details.mean()


Out[42]:
SEG_ID                                              551829.374098
Month                                                    6.079894
SHAPE_LEN                                              371.222712
monthly_311_request_count                                2.498644
monthly_311_request_count/len                            0.009738
Police Complaint_count                                   0.002463
Police Complaint_count/len                               0.000009
Shoveling_count                                          0.013899
Shoveling_count/len                                      0.000055
No Heat (Residential)_count                              0.014753
No Heat (Residential)_count/len                          0.000053
Street Trees_count                                       0.045040
Street Trees_count/len                                   0.000158
Stop Sign Repair_count                                   0.021121
Stop Sign Repair_count/len                               0.000086
Infestation Residential_count                            0.009431
Infestation Residential_count/len                        0.000032
Street Paving_count                                      0.009285
Street Paving_count/len                                  0.000034
Traffic (Other)_count                                    0.026993
Traffic (Other)_count/len                                0.000110
Manhole Cover_count                                      0.008923
Manhole Cover_count/len                                  0.000035
Line Striping_count                                      0.006368
Line Striping_count/len                                  0.000024
Street Defect_count                                      0.194793
Street Defect_count/len                                  0.000770
Illegal Dumping_count                                    0.251419
Illegal Dumping_count/len                                0.001091
Vacant Lot Clean-Up_count                                0.087652
                                                        ...      
Sanitation / Dumpster Violation_count                    0.056014
Sanitation / Dumpster Violation_count/len                0.000223
Parks and Rec Safety and Maintenance_count               0.020059
Parks and Rec Safety and Maintenance_count/len           0.000071
Boarding Room House_count                                0.007353
Boarding Room House_count/len                            0.000028
Maintenance Residential or Commercial_count              0.286320
Maintenance Residential or Commercial_count/len          0.000992
Abandoned Bike_count                                     0.002717
Abandoned Bike_count/len                                 0.000014
License Residential_count                                0.014934
License Residential_count/len                            0.000053
Daycare Residential or Commercial_count                  0.000635
Daycare Residential or Commercial_count/len              0.000002
Inlet Cleaning_count                                     0.030468
Inlet Cleaning_count/len                                 0.000136
Rubbish/Recyclable Material Collection_count             0.248425
Rubbish/Recyclable Material Collection_count/len         0.000982
Vacant House or Commercial_count                         0.076685
Vacant House or Commercial_count/len                     0.000278
Building Dangerous_count                                 0.036055
Building Dangerous_count/len                             0.000139
Graffiti Removal_count                                   0.180991
Graffiti Removal_count/len                               0.000811
Dead Animal in Street_count                              0.014872
Dead Animal in Street_count/len                          0.000054
Tree Dangerous_count                                     0.002809
Tree Dangerous_count/len                                 0.000009
Smoke Detector_count                                     0.030660
Smoke Detector_count/len                                 0.000098
dtype: float64

In [44]:
philly_311_filtered_details.mad()


Out[44]:
SEG_ID                                              169294.574445
Month                                                    2.765901
SHAPE_LEN                                              189.561113
monthly_311_request_count                                1.498016
monthly_311_request_count/len                            0.006895
Police Complaint_count                                   0.004914
Police Complaint_count/len                               0.000018
Shoveling_count                                          0.027498
Shoveling_count/len                                      0.000109
No Heat (Residential)_count                              0.029116
No Heat (Residential)_count/len                          0.000104
Street Trees_count                                       0.086701
Street Trees_count/len                                   0.000304
Stop Sign Repair_count                                   0.041538
Stop Sign Repair_count/len                               0.000169
Infestation Residential_count                            0.018696
Infestation Residential_count/len                        0.000064
Street Paving_count                                      0.018414
Street Paving_count/len                                  0.000068
Traffic (Other)_count                                    0.052659
Traffic (Other)_count/len                                0.000215
Manhole Cover_count                                      0.017703
Manhole Cover_count/len                                  0.000070
Line Striping_count                                      0.012663
Line Striping_count/len                                  0.000048
Street Defect_count                                      0.336872
Street Defect_count/len                                  0.001333
Illegal Dumping_count                                    0.414588
Illegal Dumping_count/len                                0.001801
Vacant Lot Clean-Up_count                                0.164458
                                                        ...      
Sanitation / Dumpster Violation_count                    0.107073
Sanitation / Dumpster Violation_count/len                0.000426
Parks and Rec Safety and Maintenance_count               0.039463
Parks and Rec Safety and Maintenance_count/len           0.000140
Boarding Room House_count                                0.014605
Boarding Room House_count/len                            0.000055
Maintenance Residential or Commercial_count              0.453083
Maintenance Residential or Commercial_count/len          0.001571
Abandoned Bike_count                                     0.005420
Abandoned Bike_count/len                                 0.000029
License Residential_count                                0.029460
License Residential_count/len                            0.000104
Daycare Residential or Commercial_count                  0.001269
Daycare Residential or Commercial_count/len              0.000004
Inlet Cleaning_count                                     0.059361
Inlet Cleaning_count/len                                 0.000265
Rubbish/Recyclable Material Collection_count             0.411170
Rubbish/Recyclable Material Collection_count/len         0.001625
Vacant House or Commercial_count                         0.143834
Vacant House or Commercial_count/len                     0.000522
Building Dangerous_count                                 0.069842
Building Dangerous_count/len                             0.000269
Graffiti Removal_count                                   0.319255
Graffiti Removal_count/len                               0.001432
Dead Animal in Street_count                              0.029350
Dead Animal in Street_count/len                          0.000107
Tree Dangerous_count                                     0.005603
Tree Dangerous_count/len                                 0.000018
Smoke Detector_count                                     0.059701
Smoke Detector_count/len                                 0.000191
dtype: float64

In [54]:
philly_311_filtered_details.std()


Out[54]:
SEG_ID                                              202007.886754
Month                                                    3.239961
SHAPE_LEN                                              311.557862
monthly_311_request_count                                2.172181
monthly_311_request_count/len                            0.012373
Police Complaint_count                                   0.051618
Police Complaint_count/len                               0.000244
Shoveling_count                                          0.155597
Shoveling_count/len                                      0.000752
No Heat (Residential)_count                              0.138143
No Heat (Residential)_count/len                          0.000619
Street Trees_count                                       0.256032
Street Trees_count/len                                   0.001110
Stop Sign Repair_count                                   0.185982
Stop Sign Repair_count/len                               0.001107
Infestation Residential_count                            0.103648
Infestation Residential_count/len                        0.000457
Street Paving_count                                      0.108982
Street Paving_count/len                                  0.000518
Traffic (Other)_count                                    0.178901
Traffic (Other)_count/len                                0.001207
Manhole Cover_count                                      0.105094
Manhole Cover_count/len                                  0.000520
Line Striping_count                                      0.087780
Line Striping_count/len                                  0.000432
Street Defect_count                                      0.618036
Street Defect_count/len                                  0.003442
Illegal Dumping_count                                    0.710043
Illegal Dumping_count/len                                0.004063
Vacant Lot Clean-Up_count                                0.423710
                                                        ...      
Sanitation / Dumpster Violation_count                    0.300362
Sanitation / Dumpster Violation_count/len                0.001528
Parks and Rec Safety and Maintenance_count               0.177224
Parks and Rec Safety and Maintenance_count/len           0.000923
Boarding Room House_count                                0.091398
Boarding Room House_count/len                            0.000453
Maintenance Residential or Commercial_count              0.687932
Maintenance Residential or Commercial_count/len          0.003002
Abandoned Bike_count                                     0.058260
Abandoned Bike_count/len                                 0.000436
License Residential_count                                0.143198
License Residential_count/len                            0.000608
Daycare Residential or Commercial_count                  0.025493
Daycare Residential or Commercial_count/len              0.000104
Inlet Cleaning_count                                     0.208068
Inlet Cleaning_count/len                                 0.001244
Rubbish/Recyclable Material Collection_count             0.668857
Rubbish/Recyclable Material Collection_count/len         0.003465
Vacant House or Commercial_count                         0.340648
Vacant House or Commercial_count/len                     0.001537
Building Dangerous_count                                 0.217325
Building Dangerous_count/len                             0.001093
Graffiti Removal_count                                   0.632787
Graffiti Removal_count/len                               0.003814
Dead Animal in Street_count                              0.136237
Dead Animal in Street_count/len                          0.000678
Tree Dangerous_count                                     0.055758
Tree Dangerous_count/len                                 0.000214
Smoke Detector_count                                     0.221599
Smoke Detector_count/len                                 0.000826
dtype: float64

In [53]:
col_median = philly_311_filtered_details.median()
col_mad = philly_311_filtered_details.mad()
for column in philly_311_filtered_details.columns:
    if('_count' in column and 'len' not in column):
#         col_mad = robust.mad(philly_311_filtered_details[column])
        print(column, col_median[column], col_mad[column])
        outliers = philly_311_filtered_details[philly_311_filtered_details[column]>(col_median[column] + 3*col_mad[column])][[column]]
        print('shape:', outliers.shape, 'min:', min(outliers[column]), 'max:', max(outliers[column]))


monthly_311_request_count 2.0 1.49801579696
shape: (13356, 1) min: 7 max: 79
Police Complaint_count 0.0 0.00491365661968
shape: (616, 1) min: 1.0 max: 3.0
Shoveling_count 0.0 0.0274975241036
shape: (2803, 1) min: 1.0 max: 21.0
No Heat (Residential)_count 0.0 0.029116497819
shape: (3429, 1) min: 1.0 max: 11.0
Street Trees_count 0.0 0.0867005017647
shape: (9749, 1) min: 1.0 max: 10.0
Stop Sign Repair_count 0.0 0.0415382651126
shape: (4332, 1) min: 1.0 max: 9.0
Infestation Residential_count 0.0 0.0186963409589
shape: (2289, 1) min: 1.0 max: 6.0
Street Paving_count 0.0 0.0184142639036
shape: (2180, 1) min: 1.0 max: 8.0
Traffic (Other)_count 0.0 0.0526592396162
shape: (6388, 1) min: 1.0 max: 5.0
Manhole Cover_count 0.0 0.0177027613045
shape: (2095, 1) min: 1.0 max: 5.0
Line Striping_count 0.0 0.0126625945567
shape: (1510, 1) min: 1.0 max: 5.0
Street Defect_count 0.0 0.336871867497
shape: (9058, 1) min: 2.0 max: 20.0
Illegal Dumping_count 0.0 0.414588085471
shape: (11577, 1) min: 2.0 max: 39.0
Vacant Lot Clean-Up_count 0.0 0.164457860911
shape: (16078, 1) min: 1.0 max: 21.0
Other Dangerous_count 0.0 0.00676677497352
shape: (802, 1) min: 1.0 max: 6.0
Street Light Outage_count 0.0 0.188608829341
shape: (21604, 1) min: 1.0 max: 18.0
Other (Streets)_count 0.0 0.0750782700913
shape: (9132, 1) min: 1.0 max: 9.0
No Heat Residential_count 0.0 6.15649817938e-05
shape: (7, 1) min: 1.0 max: 2.0
Newsstand/Outdoor Café_count 0.0 8.46505469986e-05
shape: (11, 1) min: 1.0 max: 1.0
Dangerous Sidewalk_count 0.0 0.0396699820818
shape: (4667, 1) min: 1.0 max: 6.0
Zoning Residential_count 0.0 0.02863615932
shape: (3446, 1) min: 1.0 max: 7.0
Information Request_count 0.0 0.0655910424386
shape: (7494, 1) min: 1.0 max: 25.0
Miscellaneous_count 0.0 0.0418290602945
shape: (4958, 1) min: 1.0 max: 8.0
Abandoned Vehicle_count 0.0 0.312145489476
shape: (34798, 1) min: 1.0 max: 23.0
Hydrant Request_count 0.0 0.0136864664901
shape: (1536, 1) min: 1.0 max: 7.0
Building Construction_count 0.0 0.0567196594276
shape: (6793, 1) min: 1.0 max: 12.0
Complaint (Streets)_count 0.0 0.0394599096807
shape: (4950, 1) min: 1.0 max: 5.0
Newsstand Outdoor Cafe_count 0.0 0.00188395476985
shape: (210, 1) min: 1.0 max: 3.0
Alley Light Outage_count 0.0 0.0348547981559
shape: (4064, 1) min: 1.0 max: 7.0
Zoning Business_count 0.0 0.0462912336985
shape: (5452, 1) min: 1.0 max: 6.0
Traffic Signal Emergency_count 0.0 0.200746189401
shape: (16242, 1) min: 1.0 max: 32.0
Hydrant Knocked Down (No Water)_count 0.0 0.000715458823375
shape: (92, 1) min: 1.0 max: 2.0
Salting_count 0.0 0.216359928942
shape: (12939, 1) min: 1.0 max: 39.0
Complaints against Fire or EMS_count 0.0 0.00191455836923
shape: (231, 1) min: 1.0 max: 5.0
Fire Residential or Commercial_count 0.0 0.0305753920159
shape: (3804, 1) min: 1.0 max: 6.0
Emergency Air Conditioning_count 0.0 1.53915415775e-05
shape: (2, 1) min: 1.0 max: 1.0
Construction Site Task Force_count 0.0 0.088689266981
shape: (10010, 1) min: 1.0 max: 9.0
Sanitation / Dumpster Violation_count 0.0 0.107073421673
shape: (11494, 1) min: 1.0 max: 15.0
Parks and Rec Safety and Maintenance_count 0.0 0.0394633612716
shape: (4243, 1) min: 1.0 max: 20.0
Boarding Room House_count 0.0 0.0146052648366
shape: (1793, 1) min: 1.0 max: 4.0
Maintenance Residential or Commercial_count 0.0 0.453083343125
shape: (12840, 1) min: 2.0 max: 19.0
Abandoned Bike_count 0.0 0.00541995932444
shape: (636, 1) min: 1.0 max: 4.0
License Residential_count 0.0 0.0294604425
shape: (3542, 1) min: 1.0 max: 16.0
Daycare Residential or Commercial_count 0.0 0.00126901551345
shape: (163, 1) min: 1.0 max: 2.0
Inlet Cleaning_count 0.0 0.0593608483536
shape: (6716, 1) min: 1.0 max: 17.0
Rubbish/Recyclable Material Collection_count 0.0 0.411169530741
shape: (12558, 1) min: 2.0 max: 31.0
Vacant House or Commercial_count 0.0 0.143833873954
shape: (16159, 1) min: 1.0 max: 25.0
Building Dangerous_count 0.0 0.0698418639864
shape: (8174, 1) min: 1.0 max: 7.0
Graffiti Removal_count 0.0 0.319254701063
shape: (30675, 1) min: 1.0 max: 31.0
Dead Animal in Street_count 0.0 0.029350089466
shape: (3445, 1) min: 1.0 max: 5.0
Tree Dangerous_count 0.0 0.00560295342328
shape: (694, 1) min: 1.0 max: 4.0
Smoke Detector_count 0.0 0.0597012463269
shape: (6862, 1) min: 1.0 max: 14.0

In [55]:
col_mean = philly_311_filtered_details.mean()
col_std = philly_311_filtered_details.std()
for column in philly_311_filtered_details.columns:
    if('_count' in column and 'len' not in column):
#         col_mad = robust.mad(philly_311_filtered_details[column])
        print(column, col_mean[column], col_std[column])
        outliers = philly_311_filtered_details[philly_311_filtered_details[column]>(col_mean[column] + 3*col_std[column])][[column]]
        print('shape:', outliers.shape, 'min:', min(outliers[column]), 'max:', max(outliers[column]))


monthly_311_request_count 2.49864360996 2.17218073353
shape: (3954, 1) min: 10 max: 79
Police Complaint_count 0.00246266560464 0.0516178122894
shape: (616, 1) min: 1.0 max: 3.0
Shoveling_count 0.0138986690062 0.155596905932
shape: (2803, 1) min: 1.0 max: 21.0
No Heat (Residential)_count 0.0147529061378 0.138143283835
shape: (3429, 1) min: 1.0 max: 11.0
Street Trees_count 0.0450398451599 0.25603241174
shape: (9749, 1) min: 1.0 max: 10.0
Stop Sign Repair_count 0.0211212054748 0.185981972021
shape: (4332, 1) min: 1.0 max: 9.0
Infestation Residential_count 0.00943123968278 0.103648317891
shape: (2289, 1) min: 1.0 max: 6.0
Street Paving_count 0.0092850189125 0.108981658471
shape: (2180, 1) min: 1.0 max: 8.0
Traffic (Other)_count 0.0269931237759 0.178901390131
shape: (6388, 1) min: 1.0 max: 5.0
Manhole Cover_count 0.00892331490182 0.105093954328
shape: (2095, 1) min: 1.0 max: 5.0
Line Striping_count 0.006368299337 0.0877799938629
shape: (1510, 1) min: 1.0 max: 5.0
Street Defect_count 0.194793001412 0.61803606479
shape: (3251, 1) min: 3.0 max: 20.0
Illegal Dumping_count 0.251418918659 0.710043440373
shape: (4136, 1) min: 3.0 max: 39.0
Vacant Lot Clean-Up_count 0.0876516559502 0.423710380892
shape: (4013, 1) min: 2.0 max: 21.0
Other Dangerous_count 0.0033938610364 0.0658531878688
shape: (802, 1) min: 1.0 max: 6.0
Street Light Outage_count 0.102854768144 0.392816303737
shape: (3589, 1) min: 2.0 max: 18.0
Other (Streets)_count 0.0389062686383 0.216610685336
shape: (9132, 1) min: 1.0 max: 9.0
No Heat Residential_count 3.0783320058e-05 0.00620309201298
shape: (7, 1) min: 1.0 max: 2.0
Newsstand/Outdoor Café_count 4.23270650798e-05 0.00650580020935
shape: (11, 1) min: 1.0 max: 1.0
Dangerous Sidewalk_count 0.0201977058731 0.159205041853
shape: (4667, 1) min: 1.0 max: 6.0
Zoning Residential_count 0.0145104874924 0.13243845806
shape: (3446, 1) min: 1.0 max: 7.0
Information Request_count 0.0337693021037 0.231301174236
shape: (7494, 1) min: 1.0 max: 25.0
Miscellaneous_count 0.0213212970552 0.166412160404
shape: (4958, 1) min: 1.0 max: 8.0
Abandoned Vehicle_count 0.180201707705 0.552667053516
shape: (7971, 1) min: 2.0 max: 23.0
Hydrant Request_count 0.00688391994798 0.09879769511
shape: (1536, 1) min: 1.0 max: 7.0
Building Construction_count 0.0291210207749 0.189370571392
shape: (6793, 1) min: 1.0 max: 12.0
Complaint (Streets)_count 0.0201130517429 0.149158393685
shape: (4950, 1) min: 1.0 max: 5.0
Newsstand Outdoor Cafe_count 0.000942739176777 0.0355679651954
shape: (210, 1) min: 1.0 max: 3.0
Alley Light Outage_count 0.0177042569484 0.15094926549
shape: (4064, 1) min: 1.0 max: 7.0
Zoning Business_count 0.0236415898046 0.172563471605
shape: (5452, 1) min: 1.0 max: 6.0
Traffic Signal Emergency_count 0.107064387162 0.581382970666
shape: (5370, 1) min: 2.0 max: 32.0
Hydrant Knocked Down (No Water)_count 0.000357856095675 0.0191160997808
shape: (92, 1) min: 1.0 max: 2.0
Salting_count 0.11384826132 0.733178217205
shape: (3347, 1) min: 3.0 max: 39.0
Complaints against Fire or EMS_count 0.000958130836806 0.0344687147167
shape: (231, 1) min: 1.0 max: 5.0
Fire Residential or Commercial_count 0.0155147933092 0.131639423396
shape: (3804, 1) min: 1.0 max: 6.0
Emergency Air Conditioning_count 7.69583001451e-06 0.00277413056677
shape: (2, 1) min: 1.0 max: 1.0
Construction Site Task Force_count 0.0461211092769 0.252669237978
shape: (10010, 1) min: 1.0 max: 9.0
Sanitation / Dumpster Violation_count 0.0560140987606 0.300361505321
shape: (11494, 1) min: 1.0 max: 15.0
Parks and Rec Safety and Maintenance_count 0.0200591809328 0.177224330955
shape: (4243, 1) min: 1.0 max: 20.0
Boarding Room House_count 0.00735336557886 0.0913983284837
shape: (1793, 1) min: 1.0 max: 4.0
Maintenance Residential or Commercial_count 0.286319507775 0.687931799406
shape: (4056, 1) min: 3.0 max: 19.0
Abandoned Bike_count 0.00271662799512 0.0582596761004
shape: (636, 1) min: 1.0 max: 4.0
License Residential_count 0.0149337581432 0.143198393553
shape: (3542, 1) min: 1.0 max: 16.0
Daycare Residential or Commercial_count 0.000634905976197 0.0254930781071
shape: (163, 1) min: 1.0 max: 2.0
Inlet Cleaning_count 0.0304677910274 0.208067584111
shape: (6716, 1) min: 1.0 max: 17.0
Rubbish/Recyclable Material Collection_count 0.248425240783 0.668856953731
shape: (4098, 1) min: 3.0 max: 31.0
Vacant House or Commercial_count 0.0766850981796 0.340647954989
shape: (2759, 1) min: 2.0 max: 25.0
Building Dangerous_count 0.036054963618 0.217324930479
shape: (8174, 1) min: 1.0 max: 7.0
Graffiti Removal_count 0.180990530281 0.63278681462
shape: (3559, 1) min: 3.0 max: 31.0
Dead Animal in Street_count 0.014872191503 0.13623715532
shape: (3445, 1) min: 1.0 max: 5.0
Tree Dangerous_count 0.00280897795529 0.0557578044906
shape: (694, 1) min: 1.0 max: 4.0
Smoke Detector_count 0.0306601867778 0.221599017343
shape: (6862, 1) min: 1.0 max: 14.0

In [56]:
from scipy import stats

In [72]:
philly_311_filtered_details[philly_311_filtered_details['Police Complaint_count']==0].shape


Out[72]:
(259265, 107)

In [70]:
min(stats.zscore(philly_311_filtered_details).T[3])


Out[70]:
-0.68992716405346677

In [74]:
min(stats.zscore(philly_311_filtered_details['monthly_311_request_count']))


Out[74]:
-0.68992716405346677

In [28]:
philly_311_filtered_details[philly_311_filtered_details.Month==0]


Out[28]:
SEG_ID Month SHAPE_LEN monthly_311_request_count monthly_311_request_count/len Police Complaint_count Police Complaint_count/len Shoveling_count Shoveling_count/len No Heat (Residential)_count ... Building Dangerous_count Building Dangerous_count/len Graffiti Removal_count Graffiti Removal_count/len Dead Animal in Street_count Dead Animal in Street_count/len Tree Dangerous_count Tree Dangerous_count/len Smoke Detector_count Smoke Detector_count/len

0 rows × 107 columns


In [29]:
philly_311_filtered_details[philly_311_filtered_details.monthly_311_request_count==0]


Out[29]:
SEG_ID Month SHAPE_LEN monthly_311_request_count monthly_311_request_count/len Police Complaint_count Police Complaint_count/len Shoveling_count Shoveling_count/len No Heat (Residential)_count ... Building Dangerous_count Building Dangerous_count/len Graffiti Removal_count Graffiti Removal_count/len Dead Animal in Street_count Dead Animal in Street_count/len Tree Dangerous_count Tree Dangerous_count/len Smoke Detector_count Smoke Detector_count/len

0 rows × 107 columns


In [37]:
philly_311_typemerge = philly_311_statistic.groupby(['SEG_ID', 'Service Name']).size().reset_index()
print(philly_311_typemerge.shape)
philly_311_typemerge.head().T


(284729, 3)
Out[37]:
0 1 2 3 4
SEG_ID 100006 100006 100006 100006 100007
Service Name Abandoned Vehicle Illegal Dumping Maintenance Residential or Commercial Vacant Lot Clean-Up Illegal Dumping
0 1 6 2 1 2

In [38]:
philly_311_typemerge.columns = ['SEG_ID', 'Service Name', 'count']

In [44]:
philly_311_details = philly_311_statistic.groupby('SEG_ID').size().reset_index()
philly_311_details.columns = ['SEG_ID', 'philly_311_count']
print(philly_311_details.shape)
philly_311_details.head().T


(38930, 2)
Out[44]:
0 1 2 3 4
SEG_ID 100006.0 100007.0 100008.0 100009.0 100011.0
philly_311_count 10.0 3.0 13.0 4.0 6.0

In [45]:
for service in unique_services:
    if(service is not None):
        service_data = philly_311_typemerge[philly_311_typemerge['Service Name']==service]
        service_count = service + '_count'
        service_data_count = service_data[['SEG_ID', 'count']]
        service_data_count.columns = ['SEG_ID', service_count]
        philly_311_details = philly_311_details.merge(service_data_count, left_on = 'SEG_ID', right_on = 'SEG_ID', right_index=False, how='left')

In [46]:
print(philly_311_details.shape)
philly_311_details.head().T


(38930, 53)
Out[46]:
0 1 2 3 4
SEG_ID 100006.0 100007.0 100008.0 100009.0 100011.0
philly_311_count 10.0 3.0 13.0 4.0 6.0
Building Construction_count NaN NaN NaN NaN NaN
Line Striping_count NaN NaN NaN NaN NaN
Traffic (Other)_count NaN NaN NaN NaN NaN
Zoning Business_count NaN NaN NaN NaN NaN
Stop Sign Repair_count NaN NaN NaN NaN NaN
Inlet Cleaning_count NaN NaN NaN NaN NaN
Parks and Rec Safety and Maintenance_count NaN NaN NaN NaN NaN
Abandoned Bike_count NaN NaN NaN NaN NaN
Newsstand/Outdoor Café_count NaN NaN NaN NaN NaN
Dangerous Sidewalk_count NaN NaN NaN NaN NaN
Zoning Residential_count NaN NaN NaN NaN NaN
Miscellaneous_count NaN NaN NaN NaN NaN
Daycare Residential or Commercial_count NaN NaN NaN NaN NaN
Street Trees_count NaN NaN NaN NaN NaN
Hydrant Request_count NaN NaN NaN NaN NaN
Graffiti Removal_count NaN NaN NaN NaN NaN
Sanitation / Dumpster Violation_count NaN NaN NaN NaN NaN
Manhole Cover_count NaN NaN NaN NaN NaN
Maintenance Residential or Commercial_count 2.0 NaN NaN NaN NaN
Illegal Dumping_count 6.0 2.0 5.0 1.0 6.0
Building Dangerous_count NaN NaN NaN NaN NaN
Police Complaint_count NaN NaN NaN NaN NaN
Fire Residential or Commercial_count NaN NaN NaN NaN NaN
Abandoned Vehicle_count 1.0 NaN NaN 1.0 NaN
Complaint (Streets)_count NaN NaN NaN NaN NaN
Boarding Room House_count NaN NaN NaN NaN NaN
Smoke Detector_count NaN NaN NaN NaN NaN
Other Dangerous_count NaN NaN NaN NaN NaN
Tree Dangerous_count NaN NaN NaN NaN NaN
Vacant Lot Clean-Up_count 1.0 NaN 1.0 NaN NaN
Information Request_count NaN NaN NaN NaN NaN
Street Defect_count NaN NaN NaN 2.0 NaN
Rubbish/Recyclable Material Collection_count NaN 1.0 NaN NaN NaN
License Residential_count NaN NaN NaN NaN NaN
Newsstand Outdoor Cafe_count NaN NaN NaN NaN NaN
Complaints against Fire or EMS_count NaN NaN NaN NaN NaN
No Heat Residential_count NaN NaN NaN NaN NaN
Traffic Signal Emergency_count NaN NaN NaN NaN NaN
Emergency Air Conditioning_count NaN NaN NaN NaN NaN
Other (Streets)_count NaN NaN NaN NaN NaN
Dead Animal in Street_count NaN NaN NaN NaN NaN
Construction Site Task Force_count NaN NaN NaN NaN NaN
Alley Light Outage_count NaN NaN NaN NaN NaN
Shoveling_count NaN NaN NaN NaN NaN
Hydrant Knocked Down (No Water)_count NaN NaN NaN NaN NaN
Street Paving_count NaN NaN NaN NaN NaN
Vacant House or Commercial_count NaN NaN NaN NaN NaN
Salting_count NaN NaN 1.0 NaN NaN
Street Light Outage_count NaN NaN 6.0 NaN NaN
Infestation Residential_count NaN NaN NaN NaN NaN
No Heat (Residential)_count NaN NaN NaN NaN NaN

In [47]:
philly_311_details = philly_311_details.fillna(0)

In [48]:
print(philly_311_details.shape)
philly_311_details.head().T


(38930, 53)
Out[48]:
0 1 2 3 4
SEG_ID 100006.0 100007.0 100008.0 100009.0 100011.0
philly_311_count 10.0 3.0 13.0 4.0 6.0
Building Construction_count 0.0 0.0 0.0 0.0 0.0
Line Striping_count 0.0 0.0 0.0 0.0 0.0
Traffic (Other)_count 0.0 0.0 0.0 0.0 0.0
Zoning Business_count 0.0 0.0 0.0 0.0 0.0
Stop Sign Repair_count 0.0 0.0 0.0 0.0 0.0
Inlet Cleaning_count 0.0 0.0 0.0 0.0 0.0
Parks and Rec Safety and Maintenance_count 0.0 0.0 0.0 0.0 0.0
Abandoned Bike_count 0.0 0.0 0.0 0.0 0.0
Newsstand/Outdoor Café_count 0.0 0.0 0.0 0.0 0.0
Dangerous Sidewalk_count 0.0 0.0 0.0 0.0 0.0
Zoning Residential_count 0.0 0.0 0.0 0.0 0.0
Miscellaneous_count 0.0 0.0 0.0 0.0 0.0
Daycare Residential or Commercial_count 0.0 0.0 0.0 0.0 0.0
Street Trees_count 0.0 0.0 0.0 0.0 0.0
Hydrant Request_count 0.0 0.0 0.0 0.0 0.0
Graffiti Removal_count 0.0 0.0 0.0 0.0 0.0
Sanitation / Dumpster Violation_count 0.0 0.0 0.0 0.0 0.0
Manhole Cover_count 0.0 0.0 0.0 0.0 0.0
Maintenance Residential or Commercial_count 2.0 0.0 0.0 0.0 0.0
Illegal Dumping_count 6.0 2.0 5.0 1.0 6.0
Building Dangerous_count 0.0 0.0 0.0 0.0 0.0
Police Complaint_count 0.0 0.0 0.0 0.0 0.0
Fire Residential or Commercial_count 0.0 0.0 0.0 0.0 0.0
Abandoned Vehicle_count 1.0 0.0 0.0 1.0 0.0
Complaint (Streets)_count 0.0 0.0 0.0 0.0 0.0
Boarding Room House_count 0.0 0.0 0.0 0.0 0.0
Smoke Detector_count 0.0 0.0 0.0 0.0 0.0
Other Dangerous_count 0.0 0.0 0.0 0.0 0.0
Tree Dangerous_count 0.0 0.0 0.0 0.0 0.0
Vacant Lot Clean-Up_count 1.0 0.0 1.0 0.0 0.0
Information Request_count 0.0 0.0 0.0 0.0 0.0
Street Defect_count 0.0 0.0 0.0 2.0 0.0
Rubbish/Recyclable Material Collection_count 0.0 1.0 0.0 0.0 0.0
License Residential_count 0.0 0.0 0.0 0.0 0.0
Newsstand Outdoor Cafe_count 0.0 0.0 0.0 0.0 0.0
Complaints against Fire or EMS_count 0.0 0.0 0.0 0.0 0.0
No Heat Residential_count 0.0 0.0 0.0 0.0 0.0
Traffic Signal Emergency_count 0.0 0.0 0.0 0.0 0.0
Emergency Air Conditioning_count 0.0 0.0 0.0 0.0 0.0
Other (Streets)_count 0.0 0.0 0.0 0.0 0.0
Dead Animal in Street_count 0.0 0.0 0.0 0.0 0.0
Construction Site Task Force_count 0.0 0.0 0.0 0.0 0.0
Alley Light Outage_count 0.0 0.0 0.0 0.0 0.0
Shoveling_count 0.0 0.0 0.0 0.0 0.0
Hydrant Knocked Down (No Water)_count 0.0 0.0 0.0 0.0 0.0
Street Paving_count 0.0 0.0 0.0 0.0 0.0
Vacant House or Commercial_count 0.0 0.0 0.0 0.0 0.0
Salting_count 0.0 0.0 1.0 0.0 0.0
Street Light Outage_count 0.0 0.0 6.0 0.0 0.0
Infestation Residential_count 0.0 0.0 0.0 0.0 0.0
No Heat (Residential)_count 0.0 0.0 0.0 0.0 0.0

In [49]:
street_gpd[['SEG_ID', 'LENGTH']]


Out[49]:
SEG_ID LENGTH
0 420708 449.863074
1 422065 540.083021
2 420702 446.104120
3 420732 447.261056
4 420718 148.216438
5 420696 319.500352
6 420694 124.069018
7 420524 94.837989
8 422066 1028.697065
9 420562 446.277300
10 420568 448.559252
11 422341 223.095781
12 422975 112.226608
13 420515 441.187740
14 420502 101.568770
15 420488 445.023635
16 420525 446.787387
17 420554 449.023627
18 420586 152.766841
19 420587 447.025403
20 420573 149.831052
21 420512 103.098997
22 420503 447.499575
23 420493 101.221702
24 420402 454.642058
25 420418 444.112916
26 420440 454.073330
27 420463 447.944548
28 420478 129.313148
29 421808 253.397888
... ... ...
40992 741907 885.992828
40993 741903 512.037165
40994 741915 678.697781
40995 741908 257.293050
40996 741913 1122.488488
40997 840535 273.010041
40998 840531 247.792254
40999 840533 343.635411
41000 840529 181.356970
41001 741910 498.241151
41002 742463 511.517819
41003 742126 270.046479
41004 840017 577.410515
41005 741970 225.056910
41006 742114 187.142636
41007 742110 246.997136
41008 741820 246.796251
41009 741730 289.560017
41010 741718 182.961166
41011 741720 171.259505
41012 500021 195.506310
41013 422980 282.918303
41014 422981 281.687421
41015 400719 280.132471
41016 400657 288.947004
41017 401184 252.648180
41018 400675 261.884400
41019 400938 579.004831
41020 400824 241.523846
41021 400683 349.594759

41022 rows × 2 columns


In [50]:
philly_311_details = philly_311_details.merge(street_gpd[['SEG_ID', 'LENGTH']], left_on = 'SEG_ID', right_on = 'SEG_ID', right_index=False, how='left')

In [51]:
print(philly_311_details.shape)
philly_311_details.head().T


(38930, 54)
Out[51]:
0 1 2 3 4
SEG_ID 100006.000000 100007.000000 100008.000000 100009.000000 100011.000000
philly_311_count 10.000000 3.000000 13.000000 4.000000 6.000000
Building Construction_count 0.000000 0.000000 0.000000 0.000000 0.000000
Line Striping_count 0.000000 0.000000 0.000000 0.000000 0.000000
Traffic (Other)_count 0.000000 0.000000 0.000000 0.000000 0.000000
Zoning Business_count 0.000000 0.000000 0.000000 0.000000 0.000000
Stop Sign Repair_count 0.000000 0.000000 0.000000 0.000000 0.000000
Inlet Cleaning_count 0.000000 0.000000 0.000000 0.000000 0.000000
Parks and Rec Safety and Maintenance_count 0.000000 0.000000 0.000000 0.000000 0.000000
Abandoned Bike_count 0.000000 0.000000 0.000000 0.000000 0.000000
Newsstand/Outdoor Café_count 0.000000 0.000000 0.000000 0.000000 0.000000
Dangerous Sidewalk_count 0.000000 0.000000 0.000000 0.000000 0.000000
Zoning Residential_count 0.000000 0.000000 0.000000 0.000000 0.000000
Miscellaneous_count 0.000000 0.000000 0.000000 0.000000 0.000000
Daycare Residential or Commercial_count 0.000000 0.000000 0.000000 0.000000 0.000000
Street Trees_count 0.000000 0.000000 0.000000 0.000000 0.000000
Hydrant Request_count 0.000000 0.000000 0.000000 0.000000 0.000000
Graffiti Removal_count 0.000000 0.000000 0.000000 0.000000 0.000000
Sanitation / Dumpster Violation_count 0.000000 0.000000 0.000000 0.000000 0.000000
Manhole Cover_count 0.000000 0.000000 0.000000 0.000000 0.000000
Maintenance Residential or Commercial_count 2.000000 0.000000 0.000000 0.000000 0.000000
Illegal Dumping_count 6.000000 2.000000 5.000000 1.000000 6.000000
Building Dangerous_count 0.000000 0.000000 0.000000 0.000000 0.000000
Police Complaint_count 0.000000 0.000000 0.000000 0.000000 0.000000
Fire Residential or Commercial_count 0.000000 0.000000 0.000000 0.000000 0.000000
Abandoned Vehicle_count 1.000000 0.000000 0.000000 1.000000 0.000000
Complaint (Streets)_count 0.000000 0.000000 0.000000 0.000000 0.000000
Boarding Room House_count 0.000000 0.000000 0.000000 0.000000 0.000000
Smoke Detector_count 0.000000 0.000000 0.000000 0.000000 0.000000
Other Dangerous_count 0.000000 0.000000 0.000000 0.000000 0.000000
Tree Dangerous_count 0.000000 0.000000 0.000000 0.000000 0.000000
Vacant Lot Clean-Up_count 1.000000 0.000000 1.000000 0.000000 0.000000
Information Request_count 0.000000 0.000000 0.000000 0.000000 0.000000
Street Defect_count 0.000000 0.000000 0.000000 2.000000 0.000000
Rubbish/Recyclable Material Collection_count 0.000000 1.000000 0.000000 0.000000 0.000000
License Residential_count 0.000000 0.000000 0.000000 0.000000 0.000000
Newsstand Outdoor Cafe_count 0.000000 0.000000 0.000000 0.000000 0.000000
Complaints against Fire or EMS_count 0.000000 0.000000 0.000000 0.000000 0.000000
No Heat Residential_count 0.000000 0.000000 0.000000 0.000000 0.000000
Traffic Signal Emergency_count 0.000000 0.000000 0.000000 0.000000 0.000000
Emergency Air Conditioning_count 0.000000 0.000000 0.000000 0.000000 0.000000
Other (Streets)_count 0.000000 0.000000 0.000000 0.000000 0.000000
Dead Animal in Street_count 0.000000 0.000000 0.000000 0.000000 0.000000
Construction Site Task Force_count 0.000000 0.000000 0.000000 0.000000 0.000000
Alley Light Outage_count 0.000000 0.000000 0.000000 0.000000 0.000000
Shoveling_count 0.000000 0.000000 0.000000 0.000000 0.000000
Hydrant Knocked Down (No Water)_count 0.000000 0.000000 0.000000 0.000000 0.000000
Street Paving_count 0.000000 0.000000 0.000000 0.000000 0.000000
Vacant House or Commercial_count 0.000000 0.000000 0.000000 0.000000 0.000000
Salting_count 0.000000 0.000000 1.000000 0.000000 0.000000
Street Light Outage_count 0.000000 0.000000 6.000000 0.000000 0.000000
Infestation Residential_count 0.000000 0.000000 0.000000 0.000000 0.000000
No Heat (Residential)_count 0.000000 0.000000 0.000000 0.000000 0.000000
LENGTH 735.818898 735.209046 728.727137 281.855704 571.366785

In [52]:
street_gpd[street_gpd.SEG_ID==100007]


Out[52]:
CLASS FNODE_ LENGTH LPOLY_ L_F_ADD L_HUNDRED L_T_ADD MULTI_REP NEWSEGDATE ONEWAY ... STREETLABE ST_CODE ST_NAME ST_TYPE SUF_DIR TNODE_ UPDATE_ ZIP_LEFT ZIP_RIGHT geometry
12299 5 7085 735.209046 0 8500 8500 8598 0 None B ... HARLEY PL 40570 HARLEY PL None 7083 2006-04-07 19153 19153 LINESTRING (-75.24647412253933 39.892658786204...

1 rows × 31 columns


In [55]:
for service in unique_services:
    if(service is not None):
        service_count = service + '_count'
        service_countlen = service + '_count/len'
        philly_311_details[service_countlen] = philly_311_details[service_count]/philly_311_details.LENGTH

In [56]:
philly_311_details['philly_311_count/len'] = philly_311_details['philly_311_count']/philly_311_details.LENGTH

In [58]:
normalize('philly_311_count', philly_311_details)
normalize('philly_311_count/len', philly_311_details)

In [59]:
for service in unique_services:
    if(service is not None):
        service_count = service + '_count'
        service_countlen = service + '_count/len'
        normalize(service_count, philly_311_details)
        normalize(service_countlen, philly_311_details)

In [60]:
print(philly_311_details.shape)
philly_311_details.head().T


(38930, 210)
Out[60]:
0 1 2 3 4
SEG_ID 100006.0 100007.0 100008.000000 100009.0 100011.0
philly_311_count 10.0 3.0 13.000000 4.0 6.0
Building Construction_count 0.0 0.0 0.000000 0.0 0.0
Line Striping_count 0.0 0.0 0.000000 0.0 0.0
Traffic (Other)_count 0.0 0.0 0.000000 0.0 0.0
Zoning Business_count 0.0 0.0 0.000000 0.0 0.0
Stop Sign Repair_count 0.0 0.0 0.000000 0.0 0.0
Inlet Cleaning_count 0.0 0.0 0.000000 0.0 0.0
Parks and Rec Safety and Maintenance_count 0.0 0.0 0.000000 0.0 0.0
Abandoned Bike_count 0.0 0.0 0.000000 0.0 0.0
Newsstand/Outdoor Café_count 0.0 0.0 0.000000 0.0 0.0
Dangerous Sidewalk_count 0.0 0.0 0.000000 0.0 0.0
Zoning Residential_count 0.0 0.0 0.000000 0.0 0.0
Miscellaneous_count 0.0 0.0 0.000000 0.0 0.0
Daycare Residential or Commercial_count 0.0 0.0 0.000000 0.0 0.0
Street Trees_count 0.0 0.0 0.000000 0.0 0.0
Hydrant Request_count 0.0 0.0 0.000000 0.0 0.0
Graffiti Removal_count 0.0 0.0 0.000000 0.0 0.0
Sanitation / Dumpster Violation_count 0.0 0.0 0.000000 0.0 0.0
Manhole Cover_count 0.0 0.0 0.000000 0.0 0.0
Maintenance Residential or Commercial_count 2.0 0.0 0.000000 0.0 0.0
Illegal Dumping_count 6.0 2.0 5.000000 1.0 6.0
Building Dangerous_count 0.0 0.0 0.000000 0.0 0.0
Police Complaint_count 0.0 0.0 0.000000 0.0 0.0
Fire Residential or Commercial_count 0.0 0.0 0.000000 0.0 0.0
Abandoned Vehicle_count 1.0 0.0 0.000000 1.0 0.0
Complaint (Streets)_count 0.0 0.0 0.000000 0.0 0.0
Boarding Room House_count 0.0 0.0 0.000000 0.0 0.0
Smoke Detector_count 0.0 0.0 0.000000 0.0 0.0
Other Dangerous_count 0.0 0.0 0.000000 0.0 0.0
... ... ... ... ... ...
No Heat Residential_count_norm 0.0 0.0 0.000000 0.0 0.0
No Heat Residential_count/len_norm 0.0 0.0 0.000000 0.0 0.0
Traffic Signal Emergency_count_norm 0.0 0.0 0.000000 0.0 0.0
Traffic Signal Emergency_count/len_norm 0.0 0.0 0.000000 0.0 0.0
Emergency Air Conditioning_count_norm 0.0 0.0 0.000000 0.0 0.0
Emergency Air Conditioning_count/len_norm 0.0 0.0 0.000000 0.0 0.0
Other (Streets)_count_norm 0.0 0.0 0.000000 0.0 0.0
Other (Streets)_count/len_norm 0.0 0.0 0.000000 0.0 0.0
Dead Animal in Street_count_norm 0.0 0.0 0.000000 0.0 0.0
Dead Animal in Street_count/len_norm 0.0 0.0 0.000000 0.0 0.0
Construction Site Task Force_count_norm 0.0 0.0 0.000000 0.0 0.0
Construction Site Task Force_count/len_norm 0.0 0.0 0.000000 0.0 0.0
Alley Light Outage_count_norm 0.0 0.0 0.000000 0.0 0.0
Alley Light Outage_count/len_norm 0.0 0.0 0.000000 0.0 0.0
Shoveling_count_norm 0.0 0.0 0.000000 0.0 0.0
Shoveling_count/len_norm 0.0 0.0 0.000000 0.0 0.0
Hydrant Knocked Down (No Water)_count_norm 0.0 0.0 0.000000 0.0 0.0
Hydrant Knocked Down (No Water)_count/len_norm 0.0 0.0 0.000000 0.0 0.0
Street Paving_count_norm 0.0 0.0 0.000000 0.0 0.0
Street Paving_count/len_norm 0.0 0.0 0.000000 0.0 0.0
Vacant House or Commercial_count_norm 0.0 0.0 0.000000 0.0 0.0
Vacant House or Commercial_count/len_norm 0.0 0.0 0.000000 0.0 0.0
Salting_count_norm 0.0 0.0 0.186652 0.0 0.0
Salting_count/len_norm 0.0 0.0 0.004128 0.0 0.0
Street Light Outage_count_norm 0.0 0.0 0.527507 0.0 0.0
Street Light Outage_count/len_norm 0.0 0.0 0.008452 0.0 0.0
Infestation Residential_count_norm 0.0 0.0 0.000000 0.0 0.0
Infestation Residential_count/len_norm 0.0 0.0 0.000000 0.0 0.0
No Heat (Residential)_count_norm 0.0 0.0 0.000000 0.0 0.0
No Heat (Residential)_count/len_norm 0.0 0.0 0.000000 0.0 0.0

210 rows × 5 columns


In [61]:
philly_311_details.to_csv('../philly/Philly_311_Details.csv')

In [24]:
philly_311_filtered_details.to_csv('../philly/Philly_311_Details_2015-16.csv')

In [75]:
for column in philly_311_filtered_details.columns:
    if('_count' in column):
        col_norm = column + '_norm'
        philly_311_filtered_details[col_norm] = stats.zscore(philly_311_filtered_details[column])

In [76]:
print(philly_311_filtered_details.shape)
philly_311_filtered_details.head().T


(259881, 211)
Out[76]:
0 1 2 3 4
SEG_ID 100006.000000 100006.000000 100006.000000 100006.000000 100006.000000
Month 2.000000 3.000000 4.000000 5.000000 6.000000
SHAPE_LEN 735.818883 735.818883 735.818883 735.818883 735.818883
monthly_311_request_count 1.000000 3.000000 1.000000 1.000000 1.000000
monthly_311_request_count/len 0.001359 0.004077 0.001359 0.001359 0.001359
Police Complaint_count 0.000000 0.000000 0.000000 0.000000 0.000000
Police Complaint_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Shoveling_count 0.000000 0.000000 0.000000 0.000000 0.000000
Shoveling_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
No Heat (Residential)_count 0.000000 0.000000 0.000000 0.000000 0.000000
No Heat (Residential)_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Street Trees_count 0.000000 0.000000 0.000000 0.000000 0.000000
Street Trees_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Stop Sign Repair_count 0.000000 0.000000 0.000000 0.000000 0.000000
Stop Sign Repair_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Infestation Residential_count 0.000000 0.000000 0.000000 0.000000 0.000000
Infestation Residential_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Street Paving_count 0.000000 0.000000 0.000000 0.000000 0.000000
Street Paving_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Traffic (Other)_count 0.000000 0.000000 0.000000 0.000000 0.000000
Traffic (Other)_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Manhole Cover_count 0.000000 0.000000 0.000000 0.000000 0.000000
Manhole Cover_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Line Striping_count 0.000000 0.000000 0.000000 0.000000 0.000000
Line Striping_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Street Defect_count 0.000000 0.000000 0.000000 0.000000 0.000000
Street Defect_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Illegal Dumping_count 1.000000 3.000000 1.000000 0.000000 0.000000
Illegal Dumping_count/len 0.001359 0.004077 0.001359 0.000000 0.000000
Vacant Lot Clean-Up_count 0.000000 0.000000 0.000000 0.000000 1.000000
... ... ... ... ... ...
Sanitation / Dumpster Violation_count_norm -0.186489 -0.186489 -0.186489 -0.186489 -0.186489
Sanitation / Dumpster Violation_count/len_norm -0.145794 -0.145794 -0.145794 -0.145794 -0.145794
Parks and Rec Safety and Maintenance_count_norm -0.113185 -0.113185 -0.113185 -0.113185 -0.113185
Parks and Rec Safety and Maintenance_count/len_norm -0.077007 -0.077007 -0.077007 -0.077007 -0.077007
Boarding Room House_count_norm -0.080454 -0.080454 -0.080454 -0.080454 -0.080454
Boarding Room House_count/len_norm -0.060947 -0.060947 -0.060947 -0.060947 -0.060947
Maintenance Residential or Commercial_count_norm -0.416204 -0.416204 -0.416204 1.037431 -0.416204
Maintenance Residential or Commercial_count/len_norm -0.330482 -0.330482 -0.330482 0.122176 -0.330482
Abandoned Bike_count_norm -0.046630 -0.046630 -0.046630 -0.046630 -0.046630
Abandoned Bike_count/len_norm -0.032966 -0.032966 -0.032966 -0.032966 -0.032966
License Residential_count_norm -0.104287 -0.104287 -0.104287 -0.104287 -0.104287
License Residential_count/len_norm -0.086630 -0.086630 -0.086630 -0.086630 -0.086630
Daycare Residential or Commercial_count_norm -0.024905 -0.024905 -0.024905 -0.024905 -0.024905
Daycare Residential or Commercial_count/len_norm -0.021147 -0.021147 -0.021147 -0.021147 -0.021147
Inlet Cleaning_count_norm -0.146432 -0.146432 -0.146432 -0.146432 -0.146432
Inlet Cleaning_count/len_norm -0.109299 -0.109299 -0.109299 -0.109299 -0.109299
Rubbish/Recyclable Material Collection_count_norm -0.371418 -0.371418 -0.371418 -0.371418 -0.371418
Rubbish/Recyclable Material Collection_count/len_norm -0.283251 -0.283251 -0.283251 -0.283251 -0.283251
Vacant House or Commercial_count_norm -0.225116 -0.225116 -0.225116 -0.225116 -0.225116
Vacant House or Commercial_count/len_norm -0.181217 -0.181217 -0.181217 -0.181217 -0.181217
Building Dangerous_count_norm -0.165904 -0.165904 -0.165904 -0.165904 -0.165904
Building Dangerous_count/len_norm -0.127278 -0.127278 -0.127278 -0.127278 -0.127278
Graffiti Removal_count_norm -0.286022 -0.286022 -0.286022 -0.286022 -0.286022
Graffiti Removal_count/len_norm -0.212658 -0.212658 -0.212658 -0.212658 -0.212658
Dead Animal in Street_count_norm -0.109164 -0.109164 -0.109164 -0.109164 -0.109164
Dead Animal in Street_count/len_norm -0.080124 -0.080124 -0.080124 -0.080124 -0.080124
Tree Dangerous_count_norm -0.050378 -0.050378 -0.050378 -0.050378 -0.050378
Tree Dangerous_count/len_norm -0.041106 -0.041106 -0.041106 -0.041106 -0.041106
Smoke Detector_count_norm -0.138359 -0.138359 -0.138359 -0.138359 -0.138359
Smoke Detector_count/len_norm -0.119088 -0.119088 -0.119088 -0.119088 -0.119088

211 rows × 5 columns


In [78]:
for column in philly_311_filtered_details.columns:
    if('_norm' in column):
        print(column, min(philly_311_filtered_details[column]), max(philly_311_filtered_details[column]))


monthly_311_request_count_norm -0.689927164053 35.2187561537
monthly_311_request_count/len_norm -0.779613092677 42.3705330788
Police Complaint_count_norm -0.0477097000725 58.0718741962
Police Complaint_count/len_norm -0.0365367943396 139.545227568
Shoveling_count_norm -0.0893250136533 134.87505797
Shoveling_count/len_norm -0.0729810036323 120.600319623
No Heat (Residential)_count_norm -0.106794439167 79.5208273908
No Heat (Residential)_count/len_norm -0.0850329274997 100.795963715
Street Trees_count_norm -0.175914961348 38.8817151711
Street Trees_count/len_norm -0.142219561423 63.4074541247
Stop Sign Repair_count_norm -0.11356609397 48.2783141396
Stop Sign Repair_count/len_norm -0.077681619574 253.774281143
Infestation Residential_count_norm -0.0909928691569 57.7971780718
Infestation Residential_count/len_norm -0.0708922696913 189.874606044
Street Paving_count_norm -0.0851981600099 73.321790722
Street Paving_count/len_norm -0.066479650637 150.539561166
Traffic (Other)_count_norm -0.150882984698 27.7975282387
Traffic (Other)_count/len_norm -0.0912494097477 290.894189486
Manhole Cover_count_norm -0.0849081388838 47.491659436
Manhole Cover_count/len_norm -0.0681539855817 103.206987297
Line Striping_count_norm -0.0725485535954 56.8881482951
Line Striping_count/len_norm -0.0561436307958 91.1827138265
Street Defect_count_norm -0.315181244728 32.0454520884
Street Defect_count/len_norm -0.223820981313 99.1461195226
Illegal Dumping_count_norm -0.354090169818 54.572232386
Illegal Dumping_count/len_norm -0.268451477023 60.3373109526
Vacant Lot Clean-Up_count_norm -0.206867304984 49.3553840589
Vacant Lot Clean-Up_count/len_norm -0.170259413745 82.6539386603
Other Dangerous_count_norm -0.0515368758278 91.0604007228
Other Dangerous_count/len_norm -0.0410178757012 207.438566183
Street Light Outage_count_norm -0.261839860143 45.5611936038
Street Light Outage_count/len_norm -0.1709886269 155.18371743
Other (Streets)_count_norm -0.179614147068 41.3696626197
Other (Streets)_count/len_norm -0.134652026566 75.9071345802
No Heat Residential_count_norm -0.00496258627467 322.415508326
No Heat Residential_count/len_norm -0.00442989470239 339.493509286
Newsstand/Outdoor Café_count_norm -0.00650606307501 153.702782846
Newsstand/Outdoor Café_count/len_norm -0.00406602418767 309.999280543
Dangerous Sidewalk_count_norm -0.126866238014 37.5604549295
Dangerous Sidewalk_count/len_norm -0.0966154770161 62.9263730743
Zoning Residential_count_norm -0.109564212862 52.7452754633
Zoning Residential_count/len_norm -0.088698798232 68.6964349438
Information Request_count_norm -0.145997387113 107.938400289
Information Request_count/len_norm -0.119044302357 101.856299531
Miscellaneous_count_norm -0.12812367813 47.9453787167
Miscellaneous_count/len_norm -0.0974914036087 153.389412489
Abandoned Vehicle_count_norm -0.32605897757 41.2903972682
Abandoned Vehicle_count/len_norm -0.244258676943 118.163122367
Hydrant Request_count_norm -0.0696770626552 70.7823145748
Hydrant Request_count/len_norm -0.0577214944687 76.1783915521
Building Construction_count_norm -0.153778259149 63.2141621731
Building Construction_count/len_norm -0.108211905281 218.007219637
Complaint (Streets)_count_norm -0.134843839108 33.3866328697
Complaint (Streets)_count/len_norm -0.101644524342 75.9938876334
Newsstand Outdoor Cafe_count_norm -0.0265053394365 84.3192185555
Newsstand Outdoor Cafe_count/len_norm -0.0208176928129 159.427640962
Alley Light Outage_count_norm -0.117286367398 46.2559996837
Alley Light Outage_count/len_norm -0.0854695099406 181.317233055
Zoning Business_count_norm -0.137002547933 34.6328794437
Zoning Business_count/len_norm -0.105893773368 133.619629989
Traffic Signal Emergency_count_norm -0.184155020962 54.8571227279
Traffic Signal Emergency_count/len_norm -0.131789112463 99.4042726687
Hydrant Knocked Down (No Water)_count_norm -0.0187201776659 104.605333414
Hydrant Knocked Down (No Water)_count/len_norm -0.015961216061 172.197705606
Salting_count_norm -0.155280773061 53.0378912543
Salting_count/len_norm -0.129431112944 117.384971593
Complaints against Fire or EMS_count_norm -0.0277971687686 145.031560597
Complaints against Fire or EMS_count/len_norm -0.022241283162 236.882622388
Fire Residential or Commercial_count_norm -0.11785848615 45.4612802627
Fire Residential or Commercial_count/len_norm -0.0908852685176 95.3223871801
Emergency Air Conditioning_count_norm -0.00277414657882 360.471219378
Emergency Air Conditioning_count/len_norm -0.00275720150833 398.050783935
Construction Site Task Force_count_norm -0.182535865391 35.4372229452
Construction Site Task Force_count/len_norm -0.139693250622 88.8054084638
Sanitation / Dumpster Violation_count_norm -0.186489298853 49.7534284127
Sanitation / Dumpster Violation_count/len_norm -0.145794420518 73.9825456726
Parks and Rec Safety and Maintenance_count_norm -0.11318547187 112.738353431
Parks and Rec Safety and Maintenance_count/len_norm -0.0770067773422 186.964623433
Boarding Room House_count_norm -0.0804542035772 43.6841065078
Boarding Room House_count/len_norm -0.0609471522109 159.553224
Maintenance Residential or Commercial_count_norm -0.416204133738 27.2028659131
Maintenance Residential or Commercial_count/len_norm -0.33048177417 76.9865384404
Abandoned Bike_count_norm -0.0466297343833 68.6116252304
Abandoned Bike_count/len_norm -0.0329661655272 218.308354334
License Residential_count_norm -0.104287391112 111.629024599
License Residential_count/len_norm -0.0866297439132 103.895879792
Daycare Residential or Commercial_count_norm -0.024905081884 78.4279141313
Daycare Residential or Commercial_count/len_norm -0.0211468545545 121.085527003
Inlet Cleaning_count_norm -0.146432467011 81.5579463291
Inlet Cleaning_count/len_norm -0.109299054031 142.655896164
Rubbish/Recyclable Material Collection_count_norm -0.371418309039 45.9763986193
Rubbish/Recyclable Material Collection_count/len_norm -0.28325084876 79.8759066076
Vacant House or Commercial_count_norm -0.225115825872 73.1645750058
Vacant House or Commercial_count/len_norm -0.181217234627 59.3781243153
Building Dangerous_count_norm -0.165903805454 32.0439924652
Building Dangerous_count/len_norm -0.127278239611 88.6309655539
Graffiti Removal_count_norm -0.286021886549 48.7037151414
Graffiti Removal_count/len_norm -0.212657672673 139.783174099
Dead Animal in Street_count_norm -0.109164200336 36.5916139982
Dead Animal in Street_count/len_norm -0.080123656397 134.208952019
Tree Dangerous_count_norm -0.0503782992411 71.6885958656
Tree Dangerous_count/len_norm -0.0411060326281 157.072526058
Smoke Detector_count_norm -0.138359123314 63.0389378853
Smoke Detector_count/len_norm -0.119087816845 74.5557075665

In [82]:
for column in philly_311_filtered_details.columns:
    if('_count' in column):
        zscore_nonzero = stats.zscore(philly_311_filtered_details[philly_311_filtered_details[column]>0][column])
        print(column, min(zscore_nonzero), max(zscore_nonzero))


monthly_311_request_count -0.689927164053 35.2187561537
monthly_311_request_count/len -0.779613092677 42.3705330788
Police Complaint_count -0.179364382667 9.02800726089
Police Complaint_count/len -1.05513089828 9.13275478528
Shoveling_count -0.371975372009 25.4042166612
Shoveling_count/len -0.947297113637 16.5405448416
No Heat (Residential)_count -0.256128713431 21.4294356904
No Heat (Residential)_count/len -1.00273225035 16.0074091349
Street Trees_count -0.334397270157 14.6657821168
Street Trees_count/len -1.02481487056 16.6877814458
Stop Sign Repair_count -0.379115313581 10.9766498614
Stop Sign Repair_count/len -0.723466532879 40.090368779
Infestation Residential_count -0.245304232852 17.0849855511
Infestation Residential_count/len -1.04400604562 25.89997719
Street Paving_count -0.238419975602 15.3765535338
Street Paving_count/len -1.0259313961 18.910073485
Traffic (Other)_count -0.276760076888 11.0019855126
Traffic (Other)_count/len -0.690869864194 55.0411816463
Manhole Cover_count -0.271863079292 9.89872890493
Manhole Cover_count/len -1.11155146176 13.0065018324
Line Striping_count -0.264466033854 10.7519122039
Line Striping_count/len -1.0613128437 9.16172565428
Street Defect_count -0.432997403288 18.2805347201
Street Defect_count/len -0.726030067635 43.5929918352
Illegal Dumping_count -0.398101741377 34.5748809805
Illegal Dumping_count/len -0.776248027367 30.4305339284
Vacant Lot Clean-Up_count -0.412918779025 19.4017600127
Vacant Lot Clean-Up_count/len -0.891611218403 26.6041948007
Other Dangerous_count -0.223313890363 10.9702948641
Other Dangerous_count/len -0.98274733096 15.9663227625
Street Light Outage_count -0.352689121573 24.9168052799
Street Light Outage_count/len -0.704647966384 53.6969878016
Other (Streets)_count -0.274582569867 20.2156158205
Other (Streets)_count/len -0.992249210352 19.1018537383
No Heat Residential_count -0.408248290464 2.44948974278
No Heat Residential_count/len -0.819936625974 1.74353061612
Newsstand/Outdoor Café_count nan nan
Newsstand/Outdoor Café_count/len -0.729879347468 1.78296677
Dangerous Sidewalk_count -0.3032142567 11.8540102795
Dangerous Sidewalk_count/len -0.991539890586 11.0403561095
Zoning Residential_count -0.251045201764 15.7200643111
Zoning Residential_count/len -1.11798256102 11.105809008
Information Request_count -0.236449988055 32.9358971661
Information Request_count/len -0.950794736914 22.9816690753
Miscellaneous_count -0.247133999506 14.4647572301
Miscellaneous_count/len -0.95692242021 28.660944895
Abandoned Vehicle_count -0.409676114241 25.6544813954
Abandoned Vehicle_count/len -0.827792727306 54.4390512642
Hydrant Request_count -0.299257859911 10.6017715351
Hydrant Request_count/len -1.07031251783 7.70719398418
Building Construction_count -0.282414524025 26.9470828679
Building Construction_count/len -0.862290444537 46.0759108452
Complaint (Streets)_count -0.205375661542 14.4749062826
Complaint (Streets)_count/len -1.03998870486 14.2763268942
Newsstand Outdoor Cafe_count -0.367607311047 4.04368042152
Newsstand Outdoor Cafe_count/len -0.91822278622 5.57820033749
Alley Light Outage_count -0.298934065533 13.2750105414
Alley Light Outage_count/len -0.873459337144 29.9352109464
Zoning Business_count -0.302450787885 11.6120123303
Zoning Business_count/len -1.01069306406 26.9943233391
Traffic Signal Emergency_count -0.437469985045 18.5805763291
Traffic Signal Emergency_count/len -0.600541765821 28.324005926
Hydrant Knocked Down (No Water)_count -0.104828483672 9.53939201417
Hydrant Knocked Down (No Water)_count/len -1.45970240845 4.51535486109
Salting_count -0.532956843312 15.2073895426
Salting_count/len -0.686870665176 31.0881950086
Complaints against Fire or EMS_count -0.185873463243 9.35563098321
Complaints against Fire or EMS_count/len -0.917562830112 9.48061098326
Fire Residential or Commercial_count -0.216209435292 17.8202092457
Fire Residential or Commercial_count/len -1.0457518013 16.197695002
Emergency Air Conditioning_count nan nan
Emergency Air Conditioning_count/len -1.0 1.0
Construction Site Task Force_count -0.373781193938 14.7741935078
Construction Site Task Force_count/len -0.965572127472 23.3809868874
Sanitation / Dumpster Violation_count -0.374345595647 19.2920585405
Sanitation / Dumpster Violation_count/len -0.908651302935 20.2589699908
Parks and Rec Safety and Maintenance_count -0.345077491452 28.3344195481
Parks and Rec Safety and Maintenance_count/len -0.731552868476 29.0592926647
Boarding Room House_count -0.228885136308 10.2047856111
Boarding Room House_count/len -0.993298650287 18.35997407
Maintenance Residential or Commercial_count -0.420877888292 19.9775075185
Maintenance Residential or Commercial_count/len -0.911582610524 45.2015057225
Abandoned Bike_count -0.277158720172 7.27739610966
Abandoned Bike_count/len -0.816515301425 13.579640126
License Residential_count -0.16910742886 26.3343665987
License Residential_count/len -1.0108615941 16.861869389
Daycare Residential or Commercial_count -0.111455642515 8.97217922246
Daycare Residential or Commercial_count/len -1.37772740098 4.08206748073
Inlet Cleaning_count -0.315818090142 27.9175834858
Inlet Cleaning_count/len -0.89260234252 30.0384093149
Rubbish/Recyclable Material Collection_count -0.470508779765 31.5674575012
Rubbish/Recyclable Material Collection_count/len -0.84188335565 41.5775715908
Vacant House or Commercial_count -0.351810043066 35.8385251457
Vacant House or Commercial_count/len -0.920862200213 19.8824217263
Building Dangerous_count -0.305827466116 12.2351443133
Building Dangerous_count/len -0.957757149328 21.2221127408
Graffiti Removal_count -0.464467226767 25.6602174155
Graffiti Removal_count/len -0.750524036235 58.349466663
Dead Animal in Street_count -0.3065758768 9.75203265249
Dead Animal in Street_count/len -0.930120253613 20.4342357966
Tree Dangerous_count -0.21052991573 11.9651168773
Tree Dangerous_count/len -1.21262970204 12.057104392
Smoke Detector_count -0.217920361094 17.3587557074
Smoke Detector_count/len -0.984220940936 16.5067592569
monthly_311_request_count_norm -0.686662202655 30.1718206539
monthly_311_request_count/len_norm -0.634281887245 29.6022629217
Police Complaint_count_norm -0.179364382667 9.02800726089
Police Complaint_count/len_norm -1.05513089828 9.13275478528
Shoveling_count_norm -0.371975372009 25.4042166612
Shoveling_count/len_norm -0.947297113637 16.5405448416
No Heat (Residential)_count_norm -0.256128713431 21.4294356904
No Heat (Residential)_count/len_norm -1.00273225035 16.0074091349
Street Trees_count_norm -0.334397270157 14.6657821168
Street Trees_count/len_norm -1.00843309163 16.6875892758
Stop Sign Repair_count_norm -0.379115313581 10.9766498614
Stop Sign Repair_count/len_norm -0.723466532879 40.090368779
Infestation Residential_count_norm -0.245304232852 17.0849855511
Infestation Residential_count/len_norm -1.04400604562 25.89997719
Street Paving_count_norm -0.238419975602 15.3765535338
Street Paving_count/len_norm -1.0259313961 18.910073485
Traffic (Other)_count_norm -0.276760076888 11.0019855126
Traffic (Other)_count/len_norm -0.690869864194 55.0411816463
Manhole Cover_count_norm -0.271863079292 9.89872890493
Manhole Cover_count/len_norm -1.11155146176 13.0065018324
Line Striping_count_norm -0.264466033854 10.7519122039
Line Striping_count/len_norm -1.0613128437 9.16172565428
Street Defect_count_norm -0.432997403288 18.2805347201
Street Defect_count/len_norm -0.645008523538 43.4142380396
Illegal Dumping_count_norm -0.398101741377 34.5748809805
Illegal Dumping_count/len_norm -0.658577887584 30.2839920611
Vacant Lot Clean-Up_count_norm -0.412918779025 19.4017600127
Vacant Lot Clean-Up_count/len_norm -0.854276488857 26.5991094837
Other Dangerous_count_norm -0.223313890363 10.9702948641
Other Dangerous_count/len_norm -0.98274733096 15.9663227625
Street Light Outage_count_norm -0.352689121573 24.9168052799
Street Light Outage_count/len_norm -0.660529961606 53.6557444596
Other (Streets)_count_norm -0.274582569867 20.2156158205
Other (Streets)_count/len_norm -0.971685757172 19.1011592028
No Heat Residential_count_norm -0.408248290464 2.44948974278
No Heat Residential_count/len_norm -0.819936625974 1.74353061612
Newsstand/Outdoor Café_count_norm 1.0 1.0
Newsstand/Outdoor Café_count/len_norm -0.729879347468 1.78296677
Dangerous Sidewalk_count_norm -0.3032142567 11.8540102795
Dangerous Sidewalk_count/len_norm -0.991539890586 11.0403561095
Zoning Residential_count_norm -0.251045201764 15.7200643111
Zoning Residential_count/len_norm -1.11798256102 11.105809008
Information Request_count_norm -0.236449988055 32.9358971661
Information Request_count/len_norm -0.940997423865 22.9808218047
Miscellaneous_count_norm -0.247133999506 14.4647572301
Miscellaneous_count/len_norm -0.95692242021 28.660944895
Abandoned Vehicle_count_norm -0.409676114241 25.6544813954
Abandoned Vehicle_count/len_norm -0.739536955282 54.4009752643
Hydrant Request_count_norm -0.299257859911 10.6017715351
Hydrant Request_count/len_norm -1.07031251783 7.70719398418
Building Construction_count_norm -0.282414524025 26.9470828679
Building Construction_count/len_norm -0.862290444537 46.0759108452
Complaint (Streets)_count_norm -0.205375661542 14.4749062826
Complaint (Streets)_count/len_norm -1.03998870486 14.2763268942
Newsstand Outdoor Cafe_count_norm -0.367607311047 4.04368042152
Newsstand Outdoor Cafe_count/len_norm -0.91822278622 5.57820033749
Alley Light Outage_count_norm -0.298934065533 13.2750105414
Alley Light Outage_count/len_norm -0.873459337144 29.9352109464
Zoning Business_count_norm -0.302450787885 11.6120123303
Zoning Business_count/len_norm -1.01069306406 26.9943233391
Traffic Signal Emergency_count_norm -0.437469985045 18.5805763291
Traffic Signal Emergency_count/len_norm -0.576748568145 28.2686126944
Hydrant Knocked Down (No Water)_count_norm -0.104828483672 9.53939201417
Hydrant Knocked Down (No Water)_count/len_norm -1.45970240845 4.51535486109
Salting_count_norm -0.532956843312 15.2073895426
Salting_count/len_norm -0.668367788821 31.0818987293
Complaints against Fire or EMS_count_norm -0.185873463243 9.35563098321
Complaints against Fire or EMS_count/len_norm -0.917562830112 9.48061098326
Fire Residential or Commercial_count_norm -0.216209435292 17.8202092457
Fire Residential or Commercial_count/len_norm -1.0457518013 16.197695002
Emergency Air Conditioning_count_norm nan nan
Emergency Air Conditioning_count/len_norm -1.0 1.0
Construction Site Task Force_count_norm -0.373781193938 14.7741935078
Construction Site Task Force_count/len_norm -0.942947850427 23.3802307412
Sanitation / Dumpster Violation_count_norm -0.374345595647 19.2920585405
Sanitation / Dumpster Violation_count/len_norm -0.885551883495 20.2587373274
Parks and Rec Safety and Maintenance_count_norm -0.345077491452 28.3344195481
Parks and Rec Safety and Maintenance_count/len_norm -0.731552868476 29.0592926647
Boarding Room House_count_norm -0.228885136308 10.2047856111
Boarding Room House_count/len_norm -0.993298650287 18.35997407
Maintenance Residential or Commercial_count_norm -0.420877888292 19.9775075185
Maintenance Residential or Commercial_count/len_norm -0.755452575262 45.0891170217
Abandoned Bike_count_norm -0.277158720172 7.27739610966
Abandoned Bike_count/len_norm -0.816515301425 13.579640126
License Residential_count_norm -0.16910742886 26.3343665987
License Residential_count/len_norm -1.0108615941 16.861869389
Daycare Residential or Commercial_count_norm -0.111455642515 8.97217922246
Daycare Residential or Commercial_count/len_norm -1.37772740098 4.08206748073
Inlet Cleaning_count_norm -0.315818090142 27.9175834858
Inlet Cleaning_count/len_norm -0.89260234252 30.0384093149
Rubbish/Recyclable Material Collection_count_norm -0.470508779765 31.5674575012
Rubbish/Recyclable Material Collection_count/len_norm -0.725772449361 41.4797479607
Vacant House or Commercial_count_norm -0.351810043066 35.8385251457
Vacant House or Commercial_count/len_norm -0.920862200213 19.8824217263
Building Dangerous_count_norm -0.305827466116 12.2351443133
Building Dangerous_count/len_norm -0.957757149328 21.2221127408
Graffiti Removal_count_norm -0.464467226767 25.6602174155
Graffiti Removal_count/len_norm -0.675614755487 58.2044814528
Dead Animal in Street_count_norm -0.3065758768 9.75203265249
Dead Animal in Street_count/len_norm -0.930120253613 20.4342357966
Tree Dangerous_count_norm -0.21052991573 11.9651168773
Tree Dangerous_count/len_norm -1.21262970204 12.057104392
Smoke Detector_count_norm -0.217920361094 17.3587557074
Smoke Detector_count/len_norm -0.984220940936 16.5067592569

In [85]:
stats.zscore(philly_311_filtered_details[philly_311_filtered_details['Police Complaint_count']>0]['Police Complaint_count'])


Out[85]:
array([-0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438,  4.42432144,  4.42432144,
       -0.17936438,  4.42432144, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
        4.42432144, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
        4.42432144, -0.17936438,  4.42432144, -0.17936438,  4.42432144,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438,  4.42432144, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438,  4.42432144, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438,  9.02800726, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438,  9.02800726,  9.02800726,  4.42432144, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438,  4.42432144,  4.42432144,
        4.42432144, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438,  4.42432144, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438,  4.42432144, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438,  4.42432144,  4.42432144,
        4.42432144, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438, -0.17936438, -0.17936438, -0.17936438, -0.17936438,
       -0.17936438])

In [86]:
stats.zscore(philly_311_filtered_details['Police Complaint_count'])


Out[86]:
array([-0.0477097, -0.0477097, -0.0477097, ..., -0.0477097, -0.0477097,
       -0.0477097])

In [88]:
philly_311_filtered_details[philly_311_filtered_details['Police Complaint_count']>0]['Police Complaint_count'].shape


Out[88]:
(616,)

In [89]:
philly_311_filtered_details[philly_311_filtered_details['Police Complaint_count']==0]['Police Complaint_count'].shape


Out[89]:
(259265,)

In [91]:
for column in philly_311_filtered_details.columns:
    if('_count' in column and 'len' not in column):
        print(column, philly_311_filtered_details[philly_311_filtered_details[column]>0][column].shape[0], philly_311_filtered_details[philly_311_filtered_details[column]==0][column].shape[0])


monthly_311_request_count 259881 0
Police Complaint_count 616 259265
Shoveling_count 2803 257078
No Heat (Residential)_count 3429 256452
Street Trees_count 9749 250132
Stop Sign Repair_count 4332 255549
Infestation Residential_count 2289 257592
Street Paving_count 2180 257701
Traffic (Other)_count 6388 253493
Manhole Cover_count 2095 257786
Line Striping_count 1510 258371
Street Defect_count 35164 224717
Illegal Dumping_count 45610 214271
Vacant Lot Clean-Up_count 16078 243803
Other Dangerous_count 802 259079
Street Light Outage_count 21604 238277
Other (Streets)_count 9132 250749
No Heat Residential_count 7 259874
Newsstand/Outdoor Café_count 11 259870
Dangerous Sidewalk_count 4667 255214
Zoning Residential_count 3446 256435
Information Request_count 7494 252387
Miscellaneous_count 4958 254923
Abandoned Vehicle_count 34798 225083
Hydrant Request_count 1536 258345
Building Construction_count 6793 253088
Complaint (Streets)_count 4950 254931
Newsstand Outdoor Cafe_count 210 259671
Alley Light Outage_count 4064 255817
Zoning Business_count 5452 254429
Traffic Signal Emergency_count 16242 243639
Hydrant Knocked Down (No Water)_count 92 259789
Salting_count 12939 246942
Complaints against Fire or EMS_count 231 259650
Fire Residential or Commercial_count 3804 256077
Emergency Air Conditioning_count 2 259879
Construction Site Task Force_count 10010 249871
Sanitation / Dumpster Violation_count 11494 248387
Parks and Rec Safety and Maintenance_count 4243 255638
Boarding Room House_count 1793 258088
Maintenance Residential or Commercial_count 54258 205623
Abandoned Bike_count 636 259245
License Residential_count 3542 256339
Daycare Residential or Commercial_count 163 259718
Inlet Cleaning_count 6716 253165
Rubbish/Recyclable Material Collection_count 44816 215065
Vacant House or Commercial_count 16159 243722
Building Dangerous_count 8174 251707
Graffiti Removal_count 30675 229206
Dead Animal in Street_count 3445 256436
Tree Dangerous_count 694 259187
Smoke Detector_count 6862 253019
monthly_311_request_count_norm 88781 0
Police Complaint_count_norm 616 0
Shoveling_count_norm 2803 0
No Heat (Residential)_count_norm 3429 0
Street Trees_count_norm 9749 0
Stop Sign Repair_count_norm 4332 0
Infestation Residential_count_norm 2289 0
Street Paving_count_norm 2180 0
Traffic (Other)_count_norm 6388 0
Manhole Cover_count_norm 2095 0
Line Striping_count_norm 1510 0
Street Defect_count_norm 35164 0
Illegal Dumping_count_norm 45610 0
Vacant Lot Clean-Up_count_norm 16078 0
Other Dangerous_count_norm 802 0
Street Light Outage_count_norm 21604 0
Other (Streets)_count_norm 9132 0
No Heat Residential_count_norm 7 0
Newsstand/Outdoor Café_count_norm 11 0
Dangerous Sidewalk_count_norm 4667 0
Zoning Residential_count_norm 3446 0
Information Request_count_norm 7494 0
Miscellaneous_count_norm 4958 0
Abandoned Vehicle_count_norm 34798 0
Hydrant Request_count_norm 1536 0
Building Construction_count_norm 6793 0
Complaint (Streets)_count_norm 4950 0
Newsstand Outdoor Cafe_count_norm 210 0
Alley Light Outage_count_norm 4064 0
Zoning Business_count_norm 5452 0
Traffic Signal Emergency_count_norm 16242 0
Hydrant Knocked Down (No Water)_count_norm 92 0
Salting_count_norm 12939 0
Complaints against Fire or EMS_count_norm 231 0
Fire Residential or Commercial_count_norm 3804 0
Emergency Air Conditioning_count_norm 2 0
Construction Site Task Force_count_norm 10010 0
Sanitation / Dumpster Violation_count_norm 11494 0
Parks and Rec Safety and Maintenance_count_norm 4243 0
Boarding Room House_count_norm 1793 0
Maintenance Residential or Commercial_count_norm 54258 0
Abandoned Bike_count_norm 636 0
License Residential_count_norm 3542 0
Daycare Residential or Commercial_count_norm 163 0
Inlet Cleaning_count_norm 6716 0
Rubbish/Recyclable Material Collection_count_norm 44816 0
Vacant House or Commercial_count_norm 16159 0
Building Dangerous_count_norm 8174 0
Graffiti Removal_count_norm 30675 0
Dead Animal in Street_count_norm 3445 0
Tree Dangerous_count_norm 694 0
Smoke Detector_count_norm 6862 0

In [97]:
for column in philly_311_filtered_details.columns:
    if('_count' in column and 'len' not in column):
        zscore_nonzero = stats.zscore(np.log2(philly_311_filtered_details[column]+1))
        print(column, min(zscore_nonzero), max(zscore_nonzero))


monthly_311_request_count -0.923544502178 6.95645249993
Police Complaint_count -0.048410620452 39.9211347329
Shoveling_count -0.100325364848 36.0384429999
No Heat (Residential)_count -0.113403351964 28.8604463223
Street Trees_count -0.19109924165 15.7693184311
Stop Sign Repair_count -0.125273588941 21.8256784877
Infestation Residential_count -0.0932051619362 28.4661263482
Street Paving_count -0.0901284920828 32.1872963468
Traffic (Other)_count -0.156207925832 15.4208571784
Manhole Cover_count -0.088533644599 26.7422642289
Line Striping_count -0.0752102460994 31.7138040298
Street Defect_count -0.371526403533 9.57104298809
Illegal Dumping_count -0.432383878588 10.4171143647
Vacant Lot Clean-Up_count -0.242856028912 14.3106934957
Other Dangerous_count -0.0545739426539 47.1821755956
Street Light Outage_count -0.289900877533 12.906451024
Other (Streets)_count -0.187409715684 16.5470916086
No Heat Residential_count -0.00509980189539 276.939810921
Newsstand/Outdoor Café_count -0.00650606307501 153.702782846
Dangerous Sidewalk_count -0.132544947542 19.2609831298
Zoning Residential_count -0.114053021506 24.4307315757
Information Request_count -0.167099164029 24.9612386929
Miscellaneous_count -0.136542308117 21.2392837197
Abandoned Vehicle_count -0.373682794142 10.5139648433
Hydrant Request_count -0.0750135599273 35.0021250365
Building Construction_count -0.160809552785 21.2617308424
Complaint (Streets)_count -0.137964052341 18.0206397415
Newsstand Outdoor Cafe_count -0.0277574312344 62.8952751448
Alley Light Outage_count -0.123381109976 21.9627393382
Zoning Business_count -0.143404004184 17.8019245715
Traffic Signal Emergency_count -0.238136464498 14.4230757117
Hydrant Knocked Down (No Water)_count -0.0187843373125 83.5509350833
Salting_count -0.206672999106 14.495028202
Complaints against Fire or EMS_count -0.0293542041274 82.1291396886
Fire Residential or Commercial_count -0.120636858185 22.2644147996
Emergency Air Conditioning_count -0.00277414657882 360.471219378
Construction Site Task Force_count -0.194358325198 14.9526925579
Sanitation / Dumpster Violation_count -0.206633749717 16.2392988914
Parks and Rec Safety and Maintenance_count -0.124639597162 29.8182598729
Boarding Room House_count -0.0824287925195 26.6721727311
Maintenance Residential or Commercial_count -0.484087703852 7.96781381276
Abandoned Bike_count -0.0486017411687 43.4772164044
License Residential_count -0.11535513076 32.9382462194
Daycare Residential or Commercial_count -0.0250009381577 62.7022459669
Inlet Cleaning_count -0.158265603853 23.181405547
Rubbish/Recyclable Material Collection_count -0.429717918698 9.77679119544
Vacant House or Commercial_count -0.24858280355 16.4939678986
Building Dangerous_count -0.1759191436 15.3825501254
Graffiti Removal_count -0.341613907045 11.2042338611
Dead Animal in Street_count -0.113686548925 20.6575843987
Tree Dangerous_count -0.0513015677762 43.277690625
Smoke Detector_count -0.159063069875 21.7891784268
monthly_311_request_count_norm -1.0162417246 4.88532472294
Police Complaint_count_norm -0.0486975016795 27.4349396718
Shoveling_count_norm -0.103273791874 21.2952377654
No Heat (Residential)_count_norm -0.115094805465 17.1344757164
Street Trees_count_norm -0.195204902167 10.7093223124
Stop Sign Repair_count_norm -0.128697015149 14.6742961735
Infestation Residential_count_norm -0.0940445508694 17.7392349192
Street Paving_count_norm -0.091597645764 19.4345283591
Traffic (Other)_count_norm -0.158029193845 10.7165723628
Manhole Cover_count_norm -0.0898133696128 17.6297810545
Line Striping_count_norm -0.0762148554853 20.4327439822
Street Defect_count_norm -0.381574648119 7.42413035658
Illegal Dumping_count_norm -0.443617258135 7.97325860526
Vacant Lot Clean-Up_count_norm -0.249870519774 10.4478270266
Other Dangerous_count_norm -0.0554763226156 28.4136244587
Street Light Outage_count_norm -0.296182635419 8.87731732627
Other (Streets)_count_norm -0.18977415368 10.7248039471
No Heat Residential_count_norm -0.00518439951586 214.395859573
Newsstand/Outdoor Café_count_norm -0.00650606307501 153.702782846
Dangerous Sidewalk_count_norm -0.134510086318 12.9065972524
Zoning Residential_count_norm -0.11548464135 15.3844705987
Information Request_count_norm -0.170653048318 15.0436403969
Miscellaneous_count_norm -0.138695561235 13.6105316665
Abandoned Vehicle_count_norm -0.382791635076 7.70082663486
Hydrant Request_count_norm -0.0766853546068 21.894644656
Building Construction_count_norm -0.162949164853 13.0336557191
Complaint (Streets)_count_norm -0.139004194951 12.0647582798
Newsstand Outdoor Cafe_count_norm -0.0283528231896 44.806925838
Alley Light Outage_count_norm -0.125353607076 14.2699552774
Zoning Business_count_norm -0.145552627922 12.0547873848
Traffic Signal Emergency_count_norm -0.245507616006 11.6247859043
Hydrant Knocked Down (No Water)_count_norm -0.0188154819595 62.1147206228
Salting_count_norm -0.211889202115 12.8073387023
Complaints against Fire or EMS_count_norm -0.0297781377442 48.3328334663
Fire Residential or Commercial_count_norm -0.121590929329 14.199993126
Emergency Air Conditioning_count_norm -0.00277414657882 360.471219378
Construction Site Task Force_count_norm -0.198092780211 10.2306114991
Sanitation / Dumpster Violation_count_norm -0.211769198962 11.0285721011
Parks and Rec Safety and Maintenance_count_norm -0.127598491104 17.7630905763
Boarding Room House_count_norm -0.0831736556451 17.9393860177
Maintenance Residential or Commercial_count_norm -0.497043093724 6.02190517024
Abandoned Bike_count_norm -0.0493902893526 28.7174300014
License Residential_count_norm -0.117051607177 18.5587929949
Daycare Residential or Commercial_count_norm -0.0250469536871 47.1145786846
Inlet Cleaning_count_norm -0.161434246313 14.2057243379
Rubbish/Recyclable Material Collection_count_norm -0.440766488436 7.36148421904
Vacant House or Commercial_count_norm -0.253785183768 10.7940410355
Building Dangerous_count_norm -0.178852957592 10.5246987018
Graffiti Removal_count_norm -0.350978707966 8.74543300621
Dead Animal in Street_count_norm -0.115358591037 14.0727422025
Tree Dangerous_count_norm -0.0516802550769 27.7155021965
Smoke Detector_count_norm -0.163017715867 13.8282352565

In [98]:
for column in philly_311_filtered_details.columns:
    if('_count' in column):
        normalize(column, philly_311_filtered_details)

In [100]:
for column in philly_311_filtered_details.columns:
    if('_norm_norm' in column):
        philly_311_filtered_details = philly_311_filtered_details.drop(column,axis=1)

In [101]:
print(philly_311_filtered_details.shape)
philly_311_filtered_details.head().T


(259881, 211)
Out[101]:
0 1 2 3 4
SEG_ID 100006.000000 100006.000000 100006.000000 100006.000000 100006.000000
Month 2.000000 3.000000 4.000000 5.000000 6.000000
SHAPE_LEN 735.818883 735.818883 735.818883 735.818883 735.818883
monthly_311_request_count 1.000000 3.000000 1.000000 1.000000 1.000000
monthly_311_request_count/len 0.001359 0.004077 0.001359 0.001359 0.001359
Police Complaint_count 0.000000 0.000000 0.000000 0.000000 0.000000
Police Complaint_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Shoveling_count 0.000000 0.000000 0.000000 0.000000 0.000000
Shoveling_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
No Heat (Residential)_count 0.000000 0.000000 0.000000 0.000000 0.000000
No Heat (Residential)_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Street Trees_count 0.000000 0.000000 0.000000 0.000000 0.000000
Street Trees_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Stop Sign Repair_count 0.000000 0.000000 0.000000 0.000000 0.000000
Stop Sign Repair_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Infestation Residential_count 0.000000 0.000000 0.000000 0.000000 0.000000
Infestation Residential_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Street Paving_count 0.000000 0.000000 0.000000 0.000000 0.000000
Street Paving_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Traffic (Other)_count 0.000000 0.000000 0.000000 0.000000 0.000000
Traffic (Other)_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Manhole Cover_count 0.000000 0.000000 0.000000 0.000000 0.000000
Manhole Cover_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Line Striping_count 0.000000 0.000000 0.000000 0.000000 0.000000
Line Striping_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Street Defect_count 0.000000 0.000000 0.000000 0.000000 0.000000
Street Defect_count/len 0.000000 0.000000 0.000000 0.000000 0.000000
Illegal Dumping_count 1.000000 3.000000 1.000000 0.000000 0.000000
Illegal Dumping_count/len 0.001359 0.004077 0.001359 0.000000 0.000000
Vacant Lot Clean-Up_count 0.000000 0.000000 0.000000 0.000000 1.000000
... ... ... ... ... ...
Sanitation / Dumpster Violation_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Sanitation / Dumpster Violation_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Parks and Rec Safety and Maintenance_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Parks and Rec Safety and Maintenance_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Boarding Room House_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Boarding Room House_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Maintenance Residential or Commercial_count_norm 0.000000 0.000000 0.000000 0.231378 0.000000
Maintenance Residential or Commercial_count/len_norm 0.000000 0.000000 0.000000 0.006506 0.000000
Abandoned Bike_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Abandoned Bike_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
License Residential_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
License Residential_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Daycare Residential or Commercial_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Daycare Residential or Commercial_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Inlet Cleaning_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Inlet Cleaning_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Rubbish/Recyclable Material Collection_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Rubbish/Recyclable Material Collection_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Vacant House or Commercial_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Vacant House or Commercial_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Building Dangerous_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Building Dangerous_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Graffiti Removal_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Graffiti Removal_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Dead Animal in Street_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Dead Animal in Street_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Tree Dangerous_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Tree Dangerous_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Smoke Detector_count_norm 0.000000 0.000000 0.000000 0.000000 0.000000
Smoke Detector_count/len_norm 0.000000 0.000000 0.000000 0.000000 0.000000

211 rows × 5 columns


In [124]:
for column in philly_311_filtered_details.columns:
    if('Sign' in column):
        print(column)


Stop Sign Repair_count
Stop Sign Repair_count/len
Traffic Signal Emergency_count
Traffic Signal Emergency_count/len
Stop Sign Repair_count_norm
Stop Sign Repair_count/len_norm
Traffic Signal Emergency_count_norm
Traffic Signal Emergency_count/len_norm

In [103]:
for column in philly_311_filtered_details.columns:
    if('_norm' in column):
        print(column, philly_311_filtered_details[column].mean(), philly_311_filtered_details[column].median())


monthly_311_request_count_norm 0.25684188870433294 0.2507087200189668
monthly_311_request_count/len_norm 0.022484177587232104 0.014774571629217578
Police Complaint_count_norm 0.0012111876686117507 0.0
Police Complaint_count/len_norm 0.0002653119882305808 0.0
Shoveling_count_norm 0.00277611466543034 0.0
Shoveling_count/len_norm 0.0006285981300833229 0.0
No Heat (Residential)_count_norm 0.003913989795570009 0.0
No Heat (Residential)_count/len_norm 0.0008658065386161155 0.0
Street Trees_count_norm 0.011973323353346095 0.0
Street Trees_count/len_norm 0.002306862547113694 0.0
Stop Sign Repair_count_norm 0.005706977469752205 0.0
Stop Sign Repair_count/len_norm 0.0003448937779402623 0.0
Infestation Residential_count_norm 0.0032635624507890057 0.0
Infestation Residential_count/len_norm 0.0003879514406374133 0.0
Street Paving_count_norm 0.0027923073954205196 0.0
Street Paving_count/len_norm 0.0004566454129026689 0.0
Traffic (Other)_count_norm 0.010028071705865779 0.0
Traffic (Other)_count/len_norm 0.00036361386180594534 0.0
Manhole Cover_count_norm 0.003299702268129981 0.0
Manhole Cover_count/len_norm 0.000674929988478828 0.0
Line Striping_count_norm 0.002365919416267349 0.0
Line Striping_count/len_norm 0.000625009686712782 0.0
Street Defect_count_norm 0.03736724270150047 0.0
Street Defect_count/len_norm 0.002598504423035878 0.0
Illegal Dumping_count_norm 0.039852891709118335 0.0
Illegal Dumping_count/len_norm 0.0049159242276973405 0.0
Vacant Lot Clean-Up_count_norm 0.016687065138417972 0.0
Vacant Lot Clean-Up_count/len_norm 0.0022115546501957836 0.0
Other Dangerous_count_norm 0.0011553280695098319 0.0
Other Dangerous_count/len_norm 0.00020287693218577679 0.0
Street Light Outage_count_norm 0.021968259083796255 0.0
Street Light Outage_count/len_norm 0.0012759383806447938 0.0
Other (Streets)_count_norm 0.011199002112561926 0.0
Other (Streets)_count/len_norm 0.001838343968752919 0.0
No Heat Residential_count_norm 1.8414499410994825e-05 0.0
No Heat Residential_count/len_norm 1.306448748990345e-05 0.0
Newsstand/Outdoor Café_count_norm 4.232706507978652e-05 0.0
Newsstand/Outdoor Café_count/len_norm 1.3140028267990387e-05 0.0
Dangerous Sidewalk_count_norm 0.006834493807090921 0.0
Dangerous Sidewalk_count/len_norm 0.001567262409360834 0.0
Zoning Residential_count_norm 0.004646731408643151 0.0
Zoning Residential_count/len_norm 0.0013107023216245089 0.0
Information Request_count_norm 0.006649829566153646 0.0
Information Request_count/len_norm 0.0012284935474887607 0.0
Miscellaneous_count_norm 0.006387697389557205 0.0
Miscellaneous_count/len_norm 0.0006748046187454325 0.0
Abandoned Vehicle_count_norm 0.034321720043375074 0.0
Abandoned Vehicle_count/len_norm 0.0023659370190422004 0.0
Hydrant Request_count_norm 0.0021385313320560306 0.0
Hydrant Request_count/len_norm 0.0007680103042972108 0.0
Building Construction_count_norm 0.0075065585042168636 0.0
Building Construction_count/len_norm 0.0005475486111158329 0.0
Complaint (Streets)_count_norm 0.007597723586424906 0.0
Complaint (Streets)_count/len_norm 0.0013680521111033214 0.0
Newsstand Outdoor Cafe_count_norm 0.0004411330811316418 0.0
Newsstand Outdoor Cafe_count/len_norm 0.00013217833098126526 0.0
Alley Light Outage_count_norm 0.005586364081679968 0.0
Alley Light Outage_count/len_norm 0.000499824653006062 0.0
Zoning Business_count_norm 0.007991160684494618 0.0
Zoning Business_count/len_norm 0.0008339077736951383 0.0
Traffic Signal Emergency_count_norm 0.016242617706945166 0.0
Traffic Signal Emergency_count/len_norm 0.0015016383744905048 0.0
Hydrant Knocked Down (No Water)_count_norm 0.00022477444513066617 0.0
Hydrant Knocked Down (No Water)_count/len_norm 9.309711527794624e-05 0.0
Salting_count_norm 0.0140577608182034 0.0
Salting_count/len_norm 0.0012901012872440407 0.0
Complaints against Fire or EMS_count_norm 0.0003572875151012169 0.0
Complaints against Fire or EMS_count/len_norm 9.51675990055193e-05 0.0
Fire Residential or Commercial_count_norm 0.00538917041736114 0.0
Fire Residential or Commercial_count/len_norm 0.0009771315600753218 0.0
Emergency Air Conditioning_count_norm 7.695830014506639e-06 0.0
Emergency Air Conditioning_count/len_norm 6.927363882248013e-06 0.0
Construction Site Task Force_count_norm 0.012831430137717462 0.0
Construction Site Task Force_count/len_norm 0.0016552599041455762 0.0
Sanitation / Dumpster Violation_count_norm 0.012564428799885135 0.0
Sanitation / Dumpster Violation_count/len_norm 0.002065244926036989 0.0
Parks and Rec Safety and Maintenance_count_norm 0.00416257608206416 0.0
Parks and Rec Safety and Maintenance_count/len_norm 0.0004437139475274355 0.0
Boarding Room House_count_norm 0.003080920209056384 0.0
Boarding Room House_count/len_norm 0.00039402577409075455 0.0
Maintenance Residential or Commercial_count_norm 0.05727559684652339 0.0
Maintenance Residential or Commercial_count/len_norm 0.004729767258142824 0.0
Abandoned Bike_count_norm 0.001116618670007509 0.0
Abandoned Bike_count/len_norm 0.0001570415785726755 0.0
License Residential_count_norm 0.0034899413694246084 0.0
License Residential_count/len_norm 0.0008561963191240621 0.0
Daycare Residential or Commercial_count_norm 0.0003985658448482354 0.0
Daycare Residential or Commercial_count/len_norm 0.00017527762233645957 0.0
Inlet Cleaning_count_norm 0.006780969741606498 0.0
Inlet Cleaning_count/len_norm 0.0008270412624120675 0.0
Rubbish/Recyclable Material Collection_count_norm 0.042102340172558754 0.0
Rubbish/Recyclable Material Collection_count/len_norm 0.003978565972688632 0.0
Vacant House or Commercial_count_norm 0.014847367523203809 0.0
Vacant House or Commercial_count/len_norm 0.0031660933632440195 0.0
Building Dangerous_count_norm 0.011306969892617197 0.0
Building Dangerous_count/len_norm 0.0014959916813386779 0.0
Graffiti Removal_count_norm 0.029587598408089555 0.0
Graffiti Removal_count/len_norm 0.0018788699070412353 0.0
Dead Animal in Street_count_norm 0.005473259157405738 0.0
Dead Animal in Street_count/len_norm 0.0006208096593470852 0.0
Tree Dangerous_count_norm 0.0011840009467091763 0.0
Tree Dangerous_count/len_norm 0.00026532453745931576 0.0
Smoke Detector_count_norm 0.007247189707624379 0.0
Smoke Detector_count/len_norm 0.0016377102606168752 0.0

In [62]:
norm_count = 0
for column in philly_311_details.columns:
    if('_norm' in column):
        print(column)
        norm_count +=1
print(norm_count)


philly_311_count_norm
philly_311_count/len_norm
Building Construction_count_norm
Building Construction_count/len_norm
Line Striping_count_norm
Line Striping_count/len_norm
Traffic (Other)_count_norm
Traffic (Other)_count/len_norm
Zoning Business_count_norm
Zoning Business_count/len_norm
Stop Sign Repair_count_norm
Stop Sign Repair_count/len_norm
Inlet Cleaning_count_norm
Inlet Cleaning_count/len_norm
Parks and Rec Safety and Maintenance_count_norm
Parks and Rec Safety and Maintenance_count/len_norm
Abandoned Bike_count_norm
Abandoned Bike_count/len_norm
Newsstand/Outdoor Café_count_norm
Newsstand/Outdoor Café_count/len_norm
Dangerous Sidewalk_count_norm
Dangerous Sidewalk_count/len_norm
Zoning Residential_count_norm
Zoning Residential_count/len_norm
Miscellaneous_count_norm
Miscellaneous_count/len_norm
Daycare Residential or Commercial_count_norm
Daycare Residential or Commercial_count/len_norm
Street Trees_count_norm
Street Trees_count/len_norm
Hydrant Request_count_norm
Hydrant Request_count/len_norm
Graffiti Removal_count_norm
Graffiti Removal_count/len_norm
Sanitation / Dumpster Violation_count_norm
Sanitation / Dumpster Violation_count/len_norm
Manhole Cover_count_norm
Manhole Cover_count/len_norm
Maintenance Residential or Commercial_count_norm
Maintenance Residential or Commercial_count/len_norm
Illegal Dumping_count_norm
Illegal Dumping_count/len_norm
Building Dangerous_count_norm
Building Dangerous_count/len_norm
Police Complaint_count_norm
Police Complaint_count/len_norm
Fire Residential or Commercial_count_norm
Fire Residential or Commercial_count/len_norm
Abandoned Vehicle_count_norm
Abandoned Vehicle_count/len_norm
Complaint (Streets)_count_norm
Complaint (Streets)_count/len_norm
Boarding Room House_count_norm
Boarding Room House_count/len_norm
Smoke Detector_count_norm
Smoke Detector_count/len_norm
Other Dangerous_count_norm
Other Dangerous_count/len_norm
Tree Dangerous_count_norm
Tree Dangerous_count/len_norm
Vacant Lot Clean-Up_count_norm
Vacant Lot Clean-Up_count/len_norm
Information Request_count_norm
Information Request_count/len_norm
Street Defect_count_norm
Street Defect_count/len_norm
Rubbish/Recyclable Material Collection_count_norm
Rubbish/Recyclable Material Collection_count/len_norm
License Residential_count_norm
License Residential_count/len_norm
Newsstand Outdoor Cafe_count_norm
Newsstand Outdoor Cafe_count/len_norm
Complaints against Fire or EMS_count_norm
Complaints against Fire or EMS_count/len_norm
No Heat Residential_count_norm
No Heat Residential_count/len_norm
Traffic Signal Emergency_count_norm
Traffic Signal Emergency_count/len_norm
Emergency Air Conditioning_count_norm
Emergency Air Conditioning_count/len_norm
Other (Streets)_count_norm
Other (Streets)_count/len_norm
Dead Animal in Street_count_norm
Dead Animal in Street_count/len_norm
Construction Site Task Force_count_norm
Construction Site Task Force_count/len_norm
Alley Light Outage_count_norm
Alley Light Outage_count/len_norm
Shoveling_count_norm
Shoveling_count/len_norm
Hydrant Knocked Down (No Water)_count_norm
Hydrant Knocked Down (No Water)_count/len_norm
Street Paving_count_norm
Street Paving_count/len_norm
Vacant House or Commercial_count_norm
Vacant House or Commercial_count/len_norm
Salting_count_norm
Salting_count/len_norm
Street Light Outage_count_norm
Street Light Outage_count/len_norm
Infestation Residential_count_norm
Infestation Residential_count/len_norm
No Heat (Residential)_count_norm
No Heat (Residential)_count/len_norm
104

In [63]:
def gradient_color(percent):
    min_color = np.array([255,255,255])
    max_color = np.array([178,34,34])
    return '#%02x%02x%02x' % tuple([int(k) for k in min_color+(max_color-min_color)*percent])

def write_var(col_name, var_name, df,f):
    df['color']=df[col_name].apply(gradient_color)
    f.write('var %s = %s;\n' % (var_name, df.to_json()))

with open('../visualization/Volumes of Philly 311 Requests.js','w+') as f:
    for column in philly_311_details.columns:
        if('_norm' in column):
            write_var(column, column +'_var', philly_311_details,f)

In [104]:
print(philly_311_filtered_details.shape)
print(philly_311_filtered_details.drop_duplicates().shape)


(259881, 211)
(259881, 211)

In [105]:
philly_311_filtered_details.to_csv('../philly/Philly_311_Details_2015-16.csv')

In [3]:
philly_311_details = pd.read_csv('../philly/Philly_311_Details.csv')
philly_311_details.shape


Out[3]:
(38930, 211)

In [4]:
philly_311_details.drop_duplicates().shape


Out[4]:
(38930, 211)

In [2]:
street_gpd = gpd.read_file('../philly/GeoJSON_data/Street_Centerline_qgis3.geojson')
print(street_gpd.shape)
street_gpd.head().T


(41022, 31)
Out[2]:
0 1 2 3 4
CLASS 3 3 3 5 4
FNODE_ 2 2 1 6 5
LENGTH 449.863 540.083 446.104 447.261 148.216
LPOLY_ 0 0 0 0 0
L_F_ADD 1500 400 1600 1600 350
L_HUNDRED 1500 400 1600 1600 300
L_T_ADD 1598 498 1698 1698 398
MULTI_REP 0 0 0 0 0
NEWSEGDATE None None None None None
ONEWAY FT TF FT TF TF
PRE_DIR None N None None N
RESPONSIBL FAM FAM FAM None None
RPOLY_ 0 0 0 0 0
R_F_ADD 1501 401 1601 1601 351
R_HUNDRED 1500 400 1600 1600 300
R_T_ADD 1599 499 1699 1699 399
SEG_ID 420708 422065 420702 420732 420718
SHAPE_LEN 449.863 540.083 446.104 447.261 148.216
STCL2_ 1 2 3 4 5
STCL2_ID 85205 86540 85199 85229 85215
STNAME CALLOWHILL ST N 15TH ST CALLOWHILL ST CARLTON ST N 17TH ST
STREETLABE CALLOWHILL ST N 15TH ST CALLOWHILL ST CARLTON ST N 17TH ST
ST_CODE 20880 88070 20880 21440 88110
ST_NAME CALLOWHILL 15TH CALLOWHILL CARLTON 17TH
ST_TYPE ST ST ST ST ST
SUF_DIR None None None None None
TNODE_ 1 3 4 5 4
UPDATE_ 1997-02-10 1997-02-10 1997-02-10 1997-02-10 1997-06-27
ZIP_LEFT 19130 19130 19130 19103 19103
ZIP_RIGHT 19130 19130 19130 19103 19103
geometry LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16529687686355 39.960013687935... LINESTRING (-75.16537539004391 39.959601782423... LINESTRING (-75.16694872703492 39.959805718080...

In [110]:
collision_gpd = gpd.read_file('../philly/GeoJSON_data/GIS_HEALTH.Collisions_crash_2011_2014PUBV.geojson')
print(collision_gpd.shape)


(43488, 54)

In [4]:
collision_gpd.head().T


Out[4]:
0 1 2 3 4
AUTOMOBILE_COUNT 0 1 1 1 0
BELTED_DEATH_COUNT 0 0 0 0 0
BELTED_MAJ_INJ_COUNT 0 0 0 0 0
BICYCLE_COUNT 0 1 1 0 0
BICYCLE_DEATH_COUNT 0 0 0 0 0
BICYCLE_MAJ_INJ_COUNT 0 0 0 0 0
BUS_COUNT 0 0 0 0 0
COLLISION_TYPE 1 4 4 8 8
COMM_VEH_COUNT 1 0 0 0 0
CRASH_MONTH 8 8 8 8 8
CRASH_YEAR 2014 2014 2014 2014 2014
CRN 2014087880 2014087758 2014096425 2014088141 2014091439
DAY_OF_WEEK 6 6 7 7 7
FATAL_COUNT 0 0 0 0 0
HEAVY_TRUCK_COUNT 1 0 0 0 0
HOUR_OF_DAY 15 14 19 19 19
ILLUMINATION 1 1 1 1 1
INJURY_COUNT 5 1 1 1 1
INTERSECTION 0 0 1 0 0
INTERSECT_TYPE 0 0 1 0 0
LATITUDE 40.085 39.9904 40.0293 39.9535 40.0162
LENGTH 439.229 778.344 170.622 194.532 396.315
LOCATION_TYPE 0 0 0 0 0
LONGITUDE -75.038 -75.1028 -75.0548 -75.2405 -75.0906
MAJ_INJ_COUNT 0 0 0 0 0
MAX_SEVERITY_LEVEL 4 4 3 4 8
MCYCLE_DEATH_COUNT 0 0 0 0 0
MCYCLE_MAJ_INJ_COUNT 0 0 0 0 0
MIN_INJ_COUNT 5 1 0 1 0
MOD_INJ_COUNT 0 0 1 0 0
MOTORCYCLE_COUNT 0 0 0 0 0
OBJECTID 4001 4002 4003 4004 4005
PED_COUNT 0 0 0 1 1
PED_DEATH_COUNT 0 0 0 0 0
PED_MAJ_INJ_COUNT 0 0 0 0 0
PERSON_COUNT 7 3 2 2 2
RELATION_TO_ROAD 1 1 1 1 1
ROAD_CONDITION 1 0 0 0 0
SCH_BUS_IND N N N N N
SCH_ZONE_IND N N U U N
SEG_ID 960283 541001 760684 300613 640755
SMALL_TRUCK_COUNT 0 0 0 0 0
SUV_COUNT 2 0 0 0 1
TCD_TYPE 0 0 0 0 0
TIME_OF_DAY 1500 1450 1919 1910 1905
UNBELTED_OCC_COUNT 0 0 0 0 0
UNB_DEATH_COUNT 0 0 0 0 0
UNB_MAJ_INJ_COUNT 0 0 0 0 0
UNK_INJ_DEG_COUNT 0 0 0 0 1
UNK_INJ_PER_COUNT 0 0 0 0 0
VAN_COUNT 0 0 0 0 0
VEHICLE_COUNT 3 1 1 1 1
WEATHER 2 1 1 1 1
geometry POINT (-75.03795178519658 40.08497541304385) POINT (-75.10275539598592 39.990404578965) POINT (-75.05481628915624 40.02929723040301) POINT (-75.24050521636346 39.95347871767484) POINT (-75.09056881169107 40.01618353851767)

In [47]:
collision_gpd.groupby(['CRASH_YEAR', 'CRASH_MONTH']).sum()


Out[47]:
AUTOMOBILE_COUNT BELTED_DEATH_COUNT BELTED_MAJ_INJ_COUNT BICYCLE_COUNT BICYCLE_DEATH_COUNT BICYCLE_MAJ_INJ_COUNT BUS_COUNT COLLISION_TYPE COMM_VEH_COUNT CRN ... SUV_COUNT TCD_TYPE UNBELTED_OCC_COUNT UNB_DEATH_COUNT UNB_MAJ_INJ_COUNT UNK_INJ_DEG_COUNT UNK_INJ_PER_COUNT VAN_COUNT VEHICLE_COUNT WEATHER
CRASH_YEAR CRASH_MONTH
2011 1 790 0 2 3 0 0 22 3076 44 1367495703371 ... 169 749 103 2 1 264 102 48 1282 1427
10 1149 1 2 53 0 1 25 4394 46 1999071846869 ... 223 1156 217 3 8 399 235 87 1816 1456
11 1036 0 1 37 1 0 24 3897 45 1777940339108 ... 211 1183 277 0 2 371 217 94 1654 1201
12 1110 1 2 27 0 0 27 4389 63 1931386765695 ... 206 1251 152 4 2 357 213 80 1767 1341
2 855 0 6 10 0 2 23 3167 45 1449956436090 ... 177 750 125 4 2 288 122 59 1384 1172
3 985 0 1 20 0 2 20 3642 53 1695306387606 ... 236 995 129 0 0 329 131 74 1606 1032
4 1105 0 0 31 0 0 20 4168 40 1922565157714 ... 255 1075 169 1 0 405 192 89 1814 1270
5 1187 0 2 51 0 2 27 4460 54 2029162368316 ... 214 1214 197 0 7 423 172 101 1902 1285
6 1009 0 0 84 0 4 34 3956 56 1884380358546 ... 242 1076 199 0 2 352 168 98 1741 1121
7 1037 0 5 66 0 0 33 3675 65 1771768116370 ... 201 1054 256 1 8 372 197 80 1673 970
8 1035 0 6 74 1 2 20 4094 51 1878362850573 ... 221 1092 264 0 3 364 201 83 1709 1251
9 993 0 2 52 0 3 26 3813 53 1747655310845 ... 210 1048 250 0 1 334 212 75 1634 1206
2012 1 1020 4 7 15 0 0 18 3764 54 1760515032423 ... 211 1018 255 3 7 315 242 87 1660 1367
10 1163 3 2 42 1 0 24 4212 56 1998044231386 ... 240 1158 135 0 4 360 162 80 1903 1395
11 961 1 1 20 0 0 32 3507 45 1698381279543 ... 212 957 136 1 8 301 151 61 1598 1037
12 1032 1 4 27 0 0 23 4163 51 1839777951991 ... 227 1055 119 2 2 339 183 70 1696 1535
2 936 0 3 22 0 0 19 3485 35 1649864792851 ... 220 971 151 0 1 300 93 77 1562 1168
3 1062 1 4 54 1 0 24 4216 66 1951678281469 ... 253 1152 136 2 3 382 128 88 1868 1216
4 1059 0 5 42 0 1 12 4219 47 1977852708925 ... 268 1154 164 1 6 343 152 105 1894 1227
5 1170 2 4 56 1 1 23 4466 50 2120714086555 ... 280 1115 172 1 0 394 158 97 2012 1382
6 1037 0 4 66 0 0 22 4262 49 1991957502864 ... 299 1069 188 0 1 390 129 119 1864 1205
7 986 0 4 70 0 3 15 3646 37 1798819296841 ... 227 1027 137 0 2 320 130 108 1704 1034
8 1024 0 5 30 0 1 22 4111 54 1869239995372 ... 200 1029 152 4 2 322 135 84 1655 1147
9 1081 0 1 44 1 0 22 3825 51 1871259469188 ... 209 1060 140 1 4 361 153 72 1785 1213
2013 1 899 0 3 10 0 1 18 3568 39 1616453105334 ... 218 891 126 0 6 284 129 70 1499 1257
10 991 0 2 46 0 3 37 3694 54 1775571834452 ... 260 904 96 0 0 332 138 76 1727 1136
11 1001 1 3 32 0 1 23 4048 55 1880472971569 ... 289 1045 124 0 2 352 99 90 1809 1223
12 886 0 1 16 0 0 16 3734 54 1669514817772 ... 247 969 115 1 0 271 119 69 1578 1554
2 854 0 1 8 0 0 19 3383 43 1531915607237 ... 177 896 108 0 3 272 119 71 1454 1289
3 1038 0 3 15 0 0 18 4130 50 1890247667462 ... 287 1049 151 0 1 360 98 80 1840 1256
4 1070 0 2 48 0 1 17 4347 45 1984868500024 ... 252 1088 152 0 2 370 125 92 1870 1172
5 1138 1 5 59 0 3 19 4627 51 2117743637438 ... 268 1139 134 1 3 354 119 127 1955 1361
6 1152 0 1 70 0 2 18 4421 50 2061386235225 ... 253 1153 178 1 0 346 106 101 1954 1387
7 1048 0 4 71 0 1 23 3755 54 1809760878123 ... 229 997 126 1 1 351 122 76 1726 1088
8 932 0 3 82 0 2 16 3889 40 1805745319046 ... 231 991 153 0 1 365 120 88 1663 1058
9 1007 2 7 81 0 1 27 4413 60 1994991006644 ... 279 1150 125 1 2 385 134 94 1814 1123
2014 1 775 0 2 7 0 0 20 3451 63 1560869568345 ... 257 832 88 0 1 230 87 84 1493 1618
10 999 0 7 65 0 2 21 4497 70 2064492934243 ... 384 1231 157 2 1 379 138 126 1961 1310
11 1009 0 12 33 0 0 11 4201 52 1929564568815 ... 326 1087 143 3 0 360 109 122 1797 1298
12 970 0 4 36 0 1 19 4171 44 1887647014226 ... 362 994 109 2 0 310 138 100 1783 1419
2 596 2 0 9 0 0 27 2639 65 1228561495801 ... 176 640 74 0 2 249 76 67 1145 1220
3 881 0 4 13 0 0 19 3777 50 1740134421473 ... 278 905 133 1 4 307 99 105 1719 1247
4 944 0 3 44 1 2 24 4008 59 1836815049715 ... 300 965 156 1 4 343 125 112 1742 1127
5 936 0 2 48 1 2 16 3928 49 1796545967202 ... 311 924 149 1 4 362 111 108 1705 1137
6 934 0 3 81 0 2 20 3866 41 1836844430272 ... 348 1047 119 0 2 351 100 111 1777 1107
7 907 0 4 68 0 1 9 3777 46 1790529422735 ... 324 1034 112 1 3 324 136 103 1717 1052
8 936 1 5 74 0 2 11 3835 50 1836857351951 ... 305 1072 109 0 1 307 162 101 1702 1104
9 990 2 5 73 1 1 15 4048 49 1895289839787 ... 328 1115 127 2 3 371 155 104 1773 1098

48 rows × 46 columns


In [94]:
collision_gpd['CRASH_YEAR'].isin(['2013', '2014'])


Out[94]:
0         True
1         True
2         True
3         True
4         True
5         True
6         True
7         True
8         True
9         True
10        True
11        True
12        True
13        True
14        True
15        True
16        True
17        True
18        True
19        True
20        True
21        True
22        True
23        True
24        True
25        True
26        True
27        True
28        True
29        True
         ...  
43458    False
43459    False
43460    False
43461    False
43462    False
43463    False
43464    False
43465    False
43466    False
43467    False
43468    False
43469    False
43470    False
43471    False
43472    False
43473    False
43474    False
43475    False
43476    False
43477    False
43478    False
43479    False
43480    False
43481    False
43482    False
43483    False
43484    False
43485    False
43486    False
43487    False
Name: CRASH_YEAR, dtype: bool

In [95]:
collision_gpd[collision_gpd['CRASH_YEAR'].isin(['2013', '2014'])].groupby(collision_gpd.CRASH_YEAR).size()


Out[95]:
CRASH_YEAR
2013    10997
2014    10627
dtype: int64

In [111]:
collision_filtered_gpd = collision_gpd[collision_gpd['CRASH_YEAR'].isin(['2013', '2014'])]
print(collision_filtered_gpd.shape)
collision_filtered_gpd.head().T


(21624, 54)
Out[111]:
0 1 2 3 4
AUTOMOBILE_COUNT 0 1 1 1 0
BELTED_DEATH_COUNT 0 0 0 0 0
BELTED_MAJ_INJ_COUNT 0 0 0 0 0
BICYCLE_COUNT 0 1 1 0 0
BICYCLE_DEATH_COUNT 0 0 0 0 0
BICYCLE_MAJ_INJ_COUNT 0 0 0 0 0
BUS_COUNT 0 0 0 0 0
COLLISION_TYPE 1 4 4 8 8
COMM_VEH_COUNT 1 0 0 0 0
CRASH_MONTH 8 8 8 8 8
CRASH_YEAR 2014 2014 2014 2014 2014
CRN 2014087880 2014087758 2014096425 2014088141 2014091439
DAY_OF_WEEK 6 6 7 7 7
FATAL_COUNT 0 0 0 0 0
HEAVY_TRUCK_COUNT 1 0 0 0 0
HOUR_OF_DAY 15 14 19 19 19
ILLUMINATION 1 1 1 1 1
INJURY_COUNT 5 1 1 1 1
INTERSECTION 0 0 1 0 0
INTERSECT_TYPE 0 0 1 0 0
LATITUDE 40.085 39.9904 40.0293 39.9535 40.0162
LENGTH 439.229 778.344 170.622 194.532 396.315
LOCATION_TYPE 0 0 0 0 0
LONGITUDE -75.038 -75.1028 -75.0548 -75.2405 -75.0906
MAJ_INJ_COUNT 0 0 0 0 0
MAX_SEVERITY_LEVEL 4 4 3 4 8
MCYCLE_DEATH_COUNT 0 0 0 0 0
MCYCLE_MAJ_INJ_COUNT 0 0 0 0 0
MIN_INJ_COUNT 5 1 0 1 0
MOD_INJ_COUNT 0 0 1 0 0
MOTORCYCLE_COUNT 0 0 0 0 0
OBJECTID 4001 4002 4003 4004 4005
PED_COUNT 0 0 0 1 1
PED_DEATH_COUNT 0 0 0 0 0
PED_MAJ_INJ_COUNT 0 0 0 0 0
PERSON_COUNT 7 3 2 2 2
RELATION_TO_ROAD 1 1 1 1 1
ROAD_CONDITION 1 0 0 0 0
SCH_BUS_IND N N N N N
SCH_ZONE_IND N N U U N
SEG_ID 960283 541001 760684 300613 640755
SMALL_TRUCK_COUNT 0 0 0 0 0
SUV_COUNT 2 0 0 0 1
TCD_TYPE 0 0 0 0 0
TIME_OF_DAY 1500 1450 1919 1910 1905
UNBELTED_OCC_COUNT 0 0 0 0 0
UNB_DEATH_COUNT 0 0 0 0 0
UNB_MAJ_INJ_COUNT 0 0 0 0 0
UNK_INJ_DEG_COUNT 0 0 0 0 1
UNK_INJ_PER_COUNT 0 0 0 0 0
VAN_COUNT 0 0 0 0 0
VEHICLE_COUNT 3 1 1 1 1
WEATHER 2 1 1 1 1
geometry POINT (-75.03795178519658 40.08497541304385) POINT (-75.10275539598592 39.990404578965) POINT (-75.05481628915624 40.02929723040301) POINT (-75.24050521636346 39.95347871767484) POINT (-75.09056881169107 40.01618353851767)

In [14]:
collision_gpd[collision_gpd.SEG_ID==960283][['AUTOMOBILE_COUNT', 'CRASH_MONTH', 'CRASH_YEAR']]


Out[14]:
AUTOMOBILE_COUNT CRASH_MONTH CRASH_YEAR
0 0 8 2014
4447 1 10 2014
5149 1 1 2014
12654 2 10 2013
42203 2 8 2011

In [29]:
collision_gpd[['SEG_ID']].drop_duplicates().shape


Out[29]:
(13206, 1)

In [6]:
collision_filtered_gpd[['SEG_ID', 'CRASH_YEAR']].drop_duplicates().shape


Out[6]:
(11697, 2)

In [6]:
collision_col_count = collision_gpd.groupby('SEG_ID')['AUTOMOBILE_COUNT'].sum().reset_index()
print(collision_col_count.shape)
collision_col_count.head().T


(13205, 2)
Out[6]:
0 1 2 3 4
SEG_ID 100002.0 100003.0 100006.0 100007.0 100008.0
AUTOMOBILE_COUNT 1.0 20.0 2.0 0.0 1.0

In [7]:
collision_gpd.groupby(['SEG_ID', 'CRASH_YEAR'])['AUTOMOBILE_COUNT'].sum().reset_index()


Out[7]:
SEG_ID CRASH_YEAR AUTOMOBILE_COUNT
0 100002.0 2013 1
1 100003.0 2012 7
2 100003.0 2013 6
3 100003.0 2014 7
4 100006.0 2012 2
5 100007.0 2013 0
6 100008.0 2014 1
7 100023.0 2011 0
8 100024.0 2013 1
9 100040.0 2011 10
10 100040.0 2012 15
11 100040.0 2013 4
12 100040.0 2014 7
13 100044.0 2011 3
14 100045.0 2011 1
15 100045.0 2012 4
16 100045.0 2013 2
17 100045.0 2014 1
18 100047.0 2014 1
19 100048.0 2013 0
20 100057.0 2011 0
21 100063.0 2011 1
22 100066.0 2011 1
23 100066.0 2012 0
24 100066.0 2013 3
25 100066.0 2014 1
26 100070.0 2012 1
27 100070.0 2013 6
28 100075.0 2013 2
29 100098.0 2013 1
... ... ... ...
23463 1160159.0 2011 2
23464 1160159.0 2013 1
23465 1160159.0 2014 1
23466 1160179.0 2011 2
23467 1160179.0 2012 4
23468 1160179.0 2013 0
23469 1160179.0 2014 1
23470 1160180.0 2012 1
23471 1160180.0 2014 1
23472 1160196.0 2011 1
23473 1160196.0 2014 1
23474 1160212.0 2011 2
23475 1160212.0 2012 7
23476 1160212.0 2013 1
23477 1160214.0 2012 4
23478 1160214.0 2013 4
23479 1160214.0 2014 1
23480 1160215.0 2013 1
23481 1160215.0 2014 1
23482 1180024.0 2012 1
23483 1180024.0 2013 2
23484 1180041.0 2011 0
23485 1180041.0 2014 1
23486 1180042.0 2012 0
23487 1180058.0 2011 0
23488 1180058.0 2012 0
23489 1180058.0 2013 1
23490 1180073.0 2014 2
23491 1180077.0 2012 1
23492 1180092.0 2014 1

23493 rows × 3 columns


In [7]:
collision_col_count[collision_col_count.SEG_ID==960283]


Out[7]:
SEG_ID AUTOMOBILE_COUNT
12466 960283.0 6

In [30]:
collision_agg_count = collision_gpd[['SEG_ID', 'LENGTH']].drop_duplicates()
for column in collision_gpd.columns:
    if('_COUNT' in column):
        collision_col_count = collision_gpd.groupby('SEG_ID')[column].sum().reset_index()
        collision_agg_count = collision_agg_count.merge(collision_col_count, left_on = 'SEG_ID', right_on = 'SEG_ID', right_index=False, how='left')
print(collision_agg_count.shape)
collision_agg_count.head().T


(13206, 32)
Out[30]:
0 1 2 3 4
SEG_ID 960283.000000 541001.000000 760684.000000 300613.000000 640755.000000
LENGTH 439.229039 778.344098 170.621503 194.531579 396.314637
AUTOMOBILE_COUNT 6.000000 9.000000 3.000000 1.000000 0.000000
BELTED_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
BELTED_MAJ_INJ_COUNT 0.000000 2.000000 0.000000 0.000000 0.000000
BICYCLE_COUNT 0.000000 1.000000 1.000000 0.000000 0.000000
BICYCLE_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
BICYCLE_MAJ_INJ_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
BUS_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
COMM_VEH_COUNT 1.000000 0.000000 0.000000 0.000000 0.000000
FATAL_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
HEAVY_TRUCK_COUNT 1.000000 0.000000 0.000000 0.000000 0.000000
INJURY_COUNT 9.000000 8.000000 2.000000 1.000000 1.000000
MAJ_INJ_COUNT 0.000000 2.000000 0.000000 0.000000 0.000000
MCYCLE_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
MCYCLE_MAJ_INJ_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
MIN_INJ_COUNT 9.000000 4.000000 0.000000 1.000000 0.000000
MOD_INJ_COUNT 0.000000 2.000000 1.000000 0.000000 0.000000
MOTORCYCLE_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
PED_COUNT 0.000000 1.000000 0.000000 1.000000 1.000000
PED_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
PED_MAJ_INJ_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
PERSON_COUNT 17.000000 16.000000 3.000000 2.000000 2.000000
SMALL_TRUCK_COUNT 0.000000 1.000000 0.000000 0.000000 0.000000
SUV_COUNT 4.000000 0.000000 0.000000 0.000000 1.000000
UNBELTED_OCC_COUNT 0.000000 1.000000 0.000000 0.000000 0.000000
UNB_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
UNB_MAJ_INJ_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
UNK_INJ_DEG_COUNT 0.000000 0.000000 1.000000 0.000000 1.000000
UNK_INJ_PER_COUNT 0.000000 1.000000 0.000000 0.000000 0.000000
VAN_COUNT 0.000000 1.000000 0.000000 0.000000 0.000000
VEHICLE_COUNT 11.000000 11.000000 3.000000 1.000000 1.000000

In [112]:
collision_filtered_agg_count = collision_filtered_gpd[['SEG_ID', 'LENGTH', 'CRASH_MONTH']].drop_duplicates()
for column in collision_filtered_gpd.columns:
    if('_COUNT' in column):
        collision_col_count = collision_filtered_gpd.groupby(['SEG_ID', 'CRASH_MONTH'])[column].sum().reset_index()
        collision_filtered_agg_count = collision_filtered_agg_count.merge(collision_col_count, left_on = ['SEG_ID', 'CRASH_MONTH'], right_on = ['SEG_ID', 'CRASH_MONTH'], right_index=False, how='left')
print(collision_filtered_agg_count.shape)
collision_filtered_agg_count.head().T


(17599, 33)
Out[112]:
0 1 2 3 4
SEG_ID 960283 541001 760684 300613 640755
LENGTH 439.229 778.344 170.622 194.532 396.315
CRASH_MONTH 8 8 8 8 8
AUTOMOBILE_COUNT 0 1 1 1 0
BELTED_DEATH_COUNT 0 0 0 0 0
BELTED_MAJ_INJ_COUNT 0 0 0 0 0
BICYCLE_COUNT 0 1 1 0 0
BICYCLE_DEATH_COUNT 0 0 0 0 0
BICYCLE_MAJ_INJ_COUNT 0 0 0 0 0
BUS_COUNT 0 0 0 0 0
COMM_VEH_COUNT 1 0 0 0 0
FATAL_COUNT 0 0 0 0 0
HEAVY_TRUCK_COUNT 1 0 0 0 0
INJURY_COUNT 5 1 1 1 1
MAJ_INJ_COUNT 0 0 0 0 0
MCYCLE_DEATH_COUNT 0 0 0 0 0
MCYCLE_MAJ_INJ_COUNT 0 0 0 0 0
MIN_INJ_COUNT 5 1 0 1 0
MOD_INJ_COUNT 0 0 1 0 0
MOTORCYCLE_COUNT 0 0 0 0 0
PED_COUNT 0 0 0 1 1
PED_DEATH_COUNT 0 0 0 0 0
PED_MAJ_INJ_COUNT 0 0 0 0 0
PERSON_COUNT 7 3 2 2 2
SMALL_TRUCK_COUNT 0 0 0 0 0
SUV_COUNT 2 0 0 0 1
UNBELTED_OCC_COUNT 0 0 0 0 0
UNB_DEATH_COUNT 0 0 0 0 0
UNB_MAJ_INJ_COUNT 0 0 0 0 0
UNK_INJ_DEG_COUNT 0 0 0 0 1
UNK_INJ_PER_COUNT 0 0 0 0 0
VAN_COUNT 0 0 0 0 0
VEHICLE_COUNT 3 1 1 1 1

In [113]:
collision_filtered_agg_count[collision_filtered_agg_count['SEG_ID']==960283]


Out[113]:
SEG_ID LENGTH CRASH_MONTH AUTOMOBILE_COUNT BELTED_DEATH_COUNT BELTED_MAJ_INJ_COUNT BICYCLE_COUNT BICYCLE_DEATH_COUNT BICYCLE_MAJ_INJ_COUNT BUS_COUNT ... PERSON_COUNT SMALL_TRUCK_COUNT SUV_COUNT UNBELTED_OCC_COUNT UNB_DEATH_COUNT UNB_MAJ_INJ_COUNT UNK_INJ_DEG_COUNT UNK_INJ_PER_COUNT VAN_COUNT VEHICLE_COUNT
0 960283.0 439.229039 8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 7.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
4125 960283.0 439.229039 10 3.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 5.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 5.0
4720 960283.0 439.229039 1 1.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0

3 rows × 33 columns


In [13]:
collision_agg_count.AUTOMOBILE_COUNT + collision_agg_count.BICYCLE_COUNT


Out[13]:
0          6.0
1         10.0
2          4.0
3          1.0
4          0.0
5         10.0
6         11.0
7          4.0
8         14.0
9          3.0
10        25.0
11         2.0
12         3.0
13         1.0
14         1.0
15         0.0
16        17.0
17        93.0
18         4.0
19         0.0
20        30.0
21        11.0
22        27.0
23         8.0
24       192.0
25         3.0
26         2.0
27         3.0
28        88.0
29         6.0
         ...  
43458      2.0
43459      5.0
43460      2.0
43461      2.0
43462     62.0
43463      0.0
43464      1.0
43465      1.0
43466      7.0
43467      2.0
43468      2.0
43469      7.0
43470      3.0
43471      9.0
43472      3.0
43473     11.0
43474      4.0
43475     21.0
43476      4.0
43477      9.0
43478      1.0
43479      8.0
43480      4.0
43481      1.0
43482     16.0
43483      6.0
43484      2.0
43485      1.0
43486      4.0
43487      6.0
dtype: float64

In [15]:
range(2, collision_agg_count.shape[0])


Out[15]:
range(2, 43488)

In [115]:
col_list= list(collision_filtered_agg_count)
col_list.remove('SEG_ID')
col_list.remove('LENGTH')
col_list.remove('CRASH_MONTH')
col_list


Out[115]:
['AUTOMOBILE_COUNT',
 'BELTED_DEATH_COUNT',
 'BELTED_MAJ_INJ_COUNT',
 'BICYCLE_COUNT',
 'BICYCLE_DEATH_COUNT',
 'BICYCLE_MAJ_INJ_COUNT',
 'BUS_COUNT',
 'COMM_VEH_COUNT',
 'FATAL_COUNT',
 'HEAVY_TRUCK_COUNT',
 'INJURY_COUNT',
 'MAJ_INJ_COUNT',
 'MCYCLE_DEATH_COUNT',
 'MCYCLE_MAJ_INJ_COUNT',
 'MIN_INJ_COUNT',
 'MOD_INJ_COUNT',
 'MOTORCYCLE_COUNT',
 'PED_COUNT',
 'PED_DEATH_COUNT',
 'PED_MAJ_INJ_COUNT',
 'PERSON_COUNT',
 'SMALL_TRUCK_COUNT',
 'SUV_COUNT',
 'UNBELTED_OCC_COUNT',
 'UNB_DEATH_COUNT',
 'UNB_MAJ_INJ_COUNT',
 'UNK_INJ_DEG_COUNT',
 'UNK_INJ_PER_COUNT',
 'VAN_COUNT',
 'VEHICLE_COUNT']

In [32]:
collision_agg_count['TOTAL_COLLISION_COUNT'] = collision_agg_count[col_list].sum(axis=1)

In [33]:
print(collision_agg_count.shape)
collision_agg_count.head().T


(13206, 33)
Out[33]:
0 1 2 3 4
SEG_ID 960283.000000 541001.000000 760684.000000 300613.000000 640755.000000
LENGTH 439.229039 778.344098 170.621503 194.531579 396.314637
AUTOMOBILE_COUNT 6.000000 9.000000 3.000000 1.000000 0.000000
BELTED_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
BELTED_MAJ_INJ_COUNT 0.000000 2.000000 0.000000 0.000000 0.000000
BICYCLE_COUNT 0.000000 1.000000 1.000000 0.000000 0.000000
BICYCLE_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
BICYCLE_MAJ_INJ_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
BUS_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
COMM_VEH_COUNT 1.000000 0.000000 0.000000 0.000000 0.000000
FATAL_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
HEAVY_TRUCK_COUNT 1.000000 0.000000 0.000000 0.000000 0.000000
INJURY_COUNT 9.000000 8.000000 2.000000 1.000000 1.000000
MAJ_INJ_COUNT 0.000000 2.000000 0.000000 0.000000 0.000000
MCYCLE_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
MCYCLE_MAJ_INJ_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
MIN_INJ_COUNT 9.000000 4.000000 0.000000 1.000000 0.000000
MOD_INJ_COUNT 0.000000 2.000000 1.000000 0.000000 0.000000
MOTORCYCLE_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
PED_COUNT 0.000000 1.000000 0.000000 1.000000 1.000000
PED_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
PED_MAJ_INJ_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
PERSON_COUNT 17.000000 16.000000 3.000000 2.000000 2.000000
SMALL_TRUCK_COUNT 0.000000 1.000000 0.000000 0.000000 0.000000
SUV_COUNT 4.000000 0.000000 0.000000 0.000000 1.000000
UNBELTED_OCC_COUNT 0.000000 1.000000 0.000000 0.000000 0.000000
UNB_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
UNB_MAJ_INJ_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
UNK_INJ_DEG_COUNT 0.000000 0.000000 1.000000 0.000000 1.000000
UNK_INJ_PER_COUNT 0.000000 1.000000 0.000000 0.000000 0.000000
VAN_COUNT 0.000000 1.000000 0.000000 0.000000 0.000000
VEHICLE_COUNT 11.000000 11.000000 3.000000 1.000000 1.000000
TOTAL_COLLISION_COUNT 58.000000 60.000000 14.000000 7.000000 7.000000

In [116]:
collision_filtered_agg_count['TOTAL_COLLISION_COUNT'] = collision_filtered_agg_count[col_list].sum(axis=1)
print(collision_filtered_agg_count.shape)
collision_filtered_agg_count.head().T


(17599, 34)
Out[116]:
0 1 2 3 4
SEG_ID 960283 541001 760684 300613 640755
LENGTH 439.229 778.344 170.622 194.532 396.315
CRASH_MONTH 8 8 8 8 8
AUTOMOBILE_COUNT 0 1 1 1 0
BELTED_DEATH_COUNT 0 0 0 0 0
BELTED_MAJ_INJ_COUNT 0 0 0 0 0
BICYCLE_COUNT 0 1 1 0 0
BICYCLE_DEATH_COUNT 0 0 0 0 0
BICYCLE_MAJ_INJ_COUNT 0 0 0 0 0
BUS_COUNT 0 0 0 0 0
COMM_VEH_COUNT 1 0 0 0 0
FATAL_COUNT 0 0 0 0 0
HEAVY_TRUCK_COUNT 1 0 0 0 0
INJURY_COUNT 5 1 1 1 1
MAJ_INJ_COUNT 0 0 0 0 0
MCYCLE_DEATH_COUNT 0 0 0 0 0
MCYCLE_MAJ_INJ_COUNT 0 0 0 0 0
MIN_INJ_COUNT 5 1 0 1 0
MOD_INJ_COUNT 0 0 1 0 0
MOTORCYCLE_COUNT 0 0 0 0 0
PED_COUNT 0 0 0 1 1
PED_DEATH_COUNT 0 0 0 0 0
PED_MAJ_INJ_COUNT 0 0 0 0 0
PERSON_COUNT 7 3 2 2 2
SMALL_TRUCK_COUNT 0 0 0 0 0
SUV_COUNT 2 0 0 0 1
UNBELTED_OCC_COUNT 0 0 0 0 0
UNB_DEATH_COUNT 0 0 0 0 0
UNB_MAJ_INJ_COUNT 0 0 0 0 0
UNK_INJ_DEG_COUNT 0 0 0 0 1
UNK_INJ_PER_COUNT 0 0 0 0 0
VAN_COUNT 0 0 0 0 0
VEHICLE_COUNT 3 1 1 1 1
TOTAL_COLLISION_COUNT 24 8 7 7 7

In [18]:
collision_filtered_agg_count[collision_filtered_agg_count['SEG_ID']==960283].T


Out[18]:
0 4125 4720
SEG_ID 960283 960283 960283
LENGTH 439.229 439.229 439.229
CRASH_MONTH 8 10 1
AUTOMOBILE_COUNT 0 3 1
BELTED_DEATH_COUNT 0 0 0
BELTED_MAJ_INJ_COUNT 0 0 0
BICYCLE_COUNT 0 0 0
BICYCLE_DEATH_COUNT 0 0 0
BICYCLE_MAJ_INJ_COUNT 0 0 0
BUS_COUNT 0 0 0
COMM_VEH_COUNT 1 0 0
FATAL_COUNT 0 0 0
HEAVY_TRUCK_COUNT 1 0 0
INJURY_COUNT 5 2 0
MAJ_INJ_COUNT 0 0 0
MCYCLE_DEATH_COUNT 0 0 0
MCYCLE_MAJ_INJ_COUNT 0 0 0
MIN_INJ_COUNT 5 2 0
MOD_INJ_COUNT 0 0 0
MOTORCYCLE_COUNT 0 0 0
PED_COUNT 0 0 0
PED_DEATH_COUNT 0 0 0
PED_MAJ_INJ_COUNT 0 0 0
PERSON_COUNT 7 5 2
SMALL_TRUCK_COUNT 0 0 0
SUV_COUNT 2 2 0
UNBELTED_OCC_COUNT 0 0 0
UNB_DEATH_COUNT 0 0 0
UNB_MAJ_INJ_COUNT 0 0 0
UNK_INJ_DEG_COUNT 0 0 0
UNK_INJ_PER_COUNT 0 0 0
VAN_COUNT 0 0 0
VEHICLE_COUNT 3 5 1
TOTAL_COLLISION_COUNT 24 19 4

In [117]:
for column in collision_filtered_agg_count.columns:
    if('_COUNT' in column):
        col_len = column + '/LEN'
        collision_filtered_agg_count[col_len] = collision_filtered_agg_count[column]/collision_filtered_agg_count.LENGTH
        normalize(column, collision_filtered_agg_count)
        normalize(col_len, collision_filtered_agg_count)

In [118]:
print(collision_filtered_agg_count.shape)
print(collision_filtered_agg_count.drop_duplicates().shape)
collision_filtered_agg_count[collision_filtered_agg_count['SEG_ID']==960283].T


(17599, 127)
(17599, 127)
Out[118]:
0 4125 4720
SEG_ID 960283 960283 960283
LENGTH 439.229 439.229 439.229
CRASH_MONTH 8 10 1
AUTOMOBILE_COUNT 0 3 1
BELTED_DEATH_COUNT 0 0 0
BELTED_MAJ_INJ_COUNT 0 0 0
BICYCLE_COUNT 0 0 0
BICYCLE_DEATH_COUNT 0 0 0
BICYCLE_MAJ_INJ_COUNT 0 0 0
BUS_COUNT 0 0 0
COMM_VEH_COUNT 1 0 0
FATAL_COUNT 0 0 0
HEAVY_TRUCK_COUNT 1 0 0
INJURY_COUNT 5 2 0
MAJ_INJ_COUNT 0 0 0
MCYCLE_DEATH_COUNT 0 0 0
MCYCLE_MAJ_INJ_COUNT 0 0 0
MIN_INJ_COUNT 5 2 0
MOD_INJ_COUNT 0 0 0
MOTORCYCLE_COUNT 0 0 0
PED_COUNT 0 0 0
PED_DEATH_COUNT 0 0 0
PED_MAJ_INJ_COUNT 0 0 0
PERSON_COUNT 7 5 2
SMALL_TRUCK_COUNT 0 0 0
SUV_COUNT 2 2 0
UNBELTED_OCC_COUNT 0 0 0
UNB_DEATH_COUNT 0 0 0
UNB_MAJ_INJ_COUNT 0 0 0
UNK_INJ_DEG_COUNT 0 0 0
... ... ... ...
SMALL_TRUCK_COUNT/LEN 0 0 0
SMALL_TRUCK_COUNT_norm 0 0 0
SMALL_TRUCK_COUNT/LEN_norm 0 0 0
SUV_COUNT/LEN 0.00455343 0.00455343 0
SUV_COUNT_norm 0.41629 0.41629 0
SUV_COUNT/LEN_norm 0.0389145 0.0389145 0
UNBELTED_OCC_COUNT/LEN 0 0 0
UNBELTED_OCC_COUNT_norm 0 0 0
UNBELTED_OCC_COUNT/LEN_norm 0 0 0
UNB_DEATH_COUNT/LEN 0 0 0
UNB_DEATH_COUNT_norm 0 0 0
UNB_DEATH_COUNT/LEN_norm 0 0 0
UNB_MAJ_INJ_COUNT/LEN 0 0 0
UNB_MAJ_INJ_COUNT_norm 0 0 0
UNB_MAJ_INJ_COUNT/LEN_norm 0 0 0
UNK_INJ_DEG_COUNT/LEN 0 0 0
UNK_INJ_DEG_COUNT_norm 0 0 0
UNK_INJ_DEG_COUNT/LEN_norm 0 0 0
UNK_INJ_PER_COUNT/LEN 0 0 0
UNK_INJ_PER_COUNT_norm 0 0 0
UNK_INJ_PER_COUNT/LEN_norm 0 0 0
VAN_COUNT/LEN 0 0 0
VAN_COUNT_norm 0 0 0
VAN_COUNT/LEN_norm 0 0 0
VEHICLE_COUNT/LEN 0.00683015 0.0113836 0.00227672
VEHICLE_COUNT_norm 0.35085 0.453467 0.175425
VEHICLE_COUNT/LEN_norm 0.0291109 0.0484087 0.00972568
TOTAL_COLLISION_COUNT/LEN 0.0546412 0.0432576 0.00910687
TOTAL_COLLISION_COUNT_norm 0.591442 0.550441 0.295721
TOTAL_COLLISION_COUNT/LEN_norm 0.066306 0.0527801 0.0112989

127 rows × 3 columns


In [184]:
[col for col in collision_filtered_agg_count.columns if 'LEN_norm' in col]


Out[184]:
['AUTOMOBILE_COUNT/LEN_norm',
 'BELTED_DEATH_COUNT/LEN_norm',
 'BELTED_MAJ_INJ_COUNT/LEN_norm',
 'BICYCLE_COUNT/LEN_norm',
 'BICYCLE_DEATH_COUNT/LEN_norm',
 'BICYCLE_MAJ_INJ_COUNT/LEN_norm',
 'BUS_COUNT/LEN_norm',
 'COMM_VEH_COUNT/LEN_norm',
 'FATAL_COUNT/LEN_norm',
 'HEAVY_TRUCK_COUNT/LEN_norm',
 'INJURY_COUNT/LEN_norm',
 'MAJ_INJ_COUNT/LEN_norm',
 'MCYCLE_DEATH_COUNT/LEN_norm',
 'MCYCLE_MAJ_INJ_COUNT/LEN_norm',
 'MIN_INJ_COUNT/LEN_norm',
 'MOD_INJ_COUNT/LEN_norm',
 'MOTORCYCLE_COUNT/LEN_norm',
 'PED_COUNT/LEN_norm',
 'PED_DEATH_COUNT/LEN_norm',
 'PED_MAJ_INJ_COUNT/LEN_norm',
 'PERSON_COUNT/LEN_norm',
 'SMALL_TRUCK_COUNT/LEN_norm',
 'SUV_COUNT/LEN_norm',
 'UNBELTED_OCC_COUNT/LEN_norm',
 'UNB_DEATH_COUNT/LEN_norm',
 'UNB_MAJ_INJ_COUNT/LEN_norm',
 'UNK_INJ_DEG_COUNT/LEN_norm',
 'UNK_INJ_PER_COUNT/LEN_norm',
 'VAN_COUNT/LEN_norm',
 'VEHICLE_COUNT/LEN_norm',
 'TOTAL_COLLISION_COUNT/LEN_norm']

In [186]:
collision_filtered_agg_count[['AUTOMOBILE_COUNT', 'BICYCLE_COUNT', 'BUS_COUNT', 'COMM_VEH_COUNT', 'HEAVY_TRUCK_COUNT', 'MOTORCYCLE_COUNT', 'SMALL_TRUCK_COUNT', 'SUV_COUNT', 'VAN_COUNT', 'VEHICLE_COUNT']].T


Out[186]:
0 1 2 3 4 5 6 7 8 9 ... 17589 17590 17591 17592 17593 17594 17595 17596 17597 17598
AUTOMOBILE_COUNT 0.0 1.0 1.0 1.0 0.0 1.0 2.0 2.0 1.0 0.0 ... 0.0 1.0 2.0 1.0 0.0 0.0 0.0 1.0 2.0 0.0
BICYCLE_COUNT 0.0 1.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
BUS_COUNT 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
COMM_VEH_COUNT 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HEAVY_TRUCK_COUNT 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
MOTORCYCLE_COUNT 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
SMALL_TRUCK_COUNT 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
SUV_COUNT 2.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 2.0 ... 1.0 1.0 0.0 0.0 0.0 0.0 2.0 1.0 0.0 0.0
VAN_COUNT 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 ... 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0
VEHICLE_COUNT 3.0 1.0 1.0 1.0 1.0 2.0 3.0 2.0 2.0 2.0 ... 2.0 2.0 2.0 2.0 1.0 1.0 2.0 3.0 2.0 1.0

10 rows × 17599 columns


In [188]:
collision_filtered_agg_count[['PED_COUNT', 'PED_DEATH_COUNT', 'PED_MAJ_INJ_COUNT', 'PERSON_COUNT']].T


Out[188]:
0 1 2 3 4 5 6 7 8 9 ... 17589 17590 17591 17592 17593 17594 17595 17596 17597 17598
PED_COUNT 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0
PED_DEATH_COUNT 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
PED_MAJ_INJ_COUNT 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
PERSON_COUNT 7.0 3.0 2.0 2.0 2.0 3.0 6.0 4.0 2.0 2.0 ... 2.0 5.0 2.0 2.0 1.0 2.0 2.0 2.0 2.0 2.0

4 rows × 17599 columns


In [23]:
def normalize(col_name, df):
    col_name_norm = col_name + '_norm'
    df[col_name_norm] = df[col_name]+1
    df[col_name_norm] = np.log2(df[col_name_norm])
    df[col_name_norm] = df[col_name_norm]/df[col_name_norm].max()

In [34]:
for column in collision_agg_count.columns:
    if('_COUNT' in column):
        col_len = column + '/LEN'
        collision_agg_count[col_len] = collision_agg_count[column]/collision_agg_count.LENGTH
        normalize(column, collision_agg_count)
        normalize(col_len, collision_agg_count)

In [35]:
print(collision_agg_count.shape)
print(collision_agg_count.drop_duplicates().shape)


(13206, 126)
(13206, 126)

In [119]:
print(collision_filtered_agg_count.shape)
print(collision_filtered_agg_count.drop_duplicates().shape)
print(collision_filtered_agg_count.dropna().shape)


(17599, 127)
(17599, 127)
(17587, 127)

In [122]:
print(set(collision_filtered_agg_count.columns)-set(collision_filtered_agg_count.dropna().columns))


set()

In [120]:
collision_filtered_agg_count[pd.isnull(collision_filtered_agg_count.CRASH_MONTH)==1].shape


Out[120]:
(0, 127)

In [121]:
collision_filtered_agg_count.head().T


Out[121]:
0 1 2 3 4
SEG_ID 960283 541001 760684 300613 640755
LENGTH 439.229 778.344 170.622 194.532 396.315
CRASH_MONTH 8 8 8 8 8
AUTOMOBILE_COUNT 0 1 1 1 0
BELTED_DEATH_COUNT 0 0 0 0 0
BELTED_MAJ_INJ_COUNT 0 0 0 0 0
BICYCLE_COUNT 0 1 1 0 0
BICYCLE_DEATH_COUNT 0 0 0 0 0
BICYCLE_MAJ_INJ_COUNT 0 0 0 0 0
BUS_COUNT 0 0 0 0 0
COMM_VEH_COUNT 1 0 0 0 0
FATAL_COUNT 0 0 0 0 0
HEAVY_TRUCK_COUNT 1 0 0 0 0
INJURY_COUNT 5 1 1 1 1
MAJ_INJ_COUNT 0 0 0 0 0
MCYCLE_DEATH_COUNT 0 0 0 0 0
MCYCLE_MAJ_INJ_COUNT 0 0 0 0 0
MIN_INJ_COUNT 5 1 0 1 0
MOD_INJ_COUNT 0 0 1 0 0
MOTORCYCLE_COUNT 0 0 0 0 0
PED_COUNT 0 0 0 1 1
PED_DEATH_COUNT 0 0 0 0 0
PED_MAJ_INJ_COUNT 0 0 0 0 0
PERSON_COUNT 7 3 2 2 2
SMALL_TRUCK_COUNT 0 0 0 0 0
SUV_COUNT 2 0 0 0 1
UNBELTED_OCC_COUNT 0 0 0 0 0
UNB_DEATH_COUNT 0 0 0 0 0
UNB_MAJ_INJ_COUNT 0 0 0 0 0
UNK_INJ_DEG_COUNT 0 0 0 0 1
... ... ... ... ... ...
SMALL_TRUCK_COUNT/LEN 0 0 0 0 0
SMALL_TRUCK_COUNT_norm 0 0 0 0 0
SMALL_TRUCK_COUNT/LEN_norm 0 0 0 0 0
SUV_COUNT/LEN 0.00455343 0 0 0 0.00252325
SUV_COUNT_norm 0.41629 0 0 0 0.26265
SUV_COUNT/LEN_norm 0.0389145 0 0 0 0.021586
UNBELTED_OCC_COUNT/LEN 0 0 0 0 0
UNBELTED_OCC_COUNT_norm 0 0 0 0 0
UNBELTED_OCC_COUNT/LEN_norm 0 0 0 0 0
UNB_DEATH_COUNT/LEN 0 0 0 0 0
UNB_DEATH_COUNT_norm 0 0 0 0 0
UNB_DEATH_COUNT/LEN_norm 0 0 0 0 0
UNB_MAJ_INJ_COUNT/LEN 0 0 0 0 0
UNB_MAJ_INJ_COUNT_norm 0 0 0 0 0
UNB_MAJ_INJ_COUNT/LEN_norm 0 0 0 0 0
UNK_INJ_DEG_COUNT/LEN 0 0 0 0 0.00252325
UNK_INJ_DEG_COUNT_norm 0 0 0 0 0.224244
UNK_INJ_DEG_COUNT/LEN_norm 0 0 0 0 0.0287781
UNK_INJ_PER_COUNT/LEN 0 0 0 0 0
UNK_INJ_PER_COUNT_norm 0 0 0 0 0
UNK_INJ_PER_COUNT/LEN_norm 0 0 0 0 0
VAN_COUNT/LEN 0 0 0 0 0
VAN_COUNT_norm 0 0 0 0 0
VAN_COUNT/LEN_norm 0 0 0 0 0
VEHICLE_COUNT/LEN 0.00683015 0.00128478 0.00586093 0.00514055 0.00252325
VEHICLE_COUNT_norm 0.35085 0.175425 0.175425 0.175425 0.175425
VEHICLE_COUNT/LEN_norm 0.0291109 0.00549104 0.024992 0.0219281 0.0107775
TOTAL_COLLISION_COUNT/LEN 0.0546412 0.0102782 0.0410265 0.0359839 0.0176627
TOTAL_COLLISION_COUNT_norm 0.591442 0.403722 0.38208 0.38208 0.38208
TOTAL_COLLISION_COUNT/LEN_norm 0.066306 0.0127448 0.0501118 0.04406 0.0218216

127 rows × 5 columns


In [36]:
collision_agg_count.head().T


Out[36]:
0 1 2 3 4
SEG_ID 960283.000000 541001.000000 760684.000000 300613.000000 640755.000000
LENGTH 439.229039 778.344098 170.621503 194.531579 396.314637
AUTOMOBILE_COUNT 6.000000 9.000000 3.000000 1.000000 0.000000
BELTED_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
BELTED_MAJ_INJ_COUNT 0.000000 2.000000 0.000000 0.000000 0.000000
BICYCLE_COUNT 0.000000 1.000000 1.000000 0.000000 0.000000
BICYCLE_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
BICYCLE_MAJ_INJ_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
BUS_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
COMM_VEH_COUNT 1.000000 0.000000 0.000000 0.000000 0.000000
FATAL_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
HEAVY_TRUCK_COUNT 1.000000 0.000000 0.000000 0.000000 0.000000
INJURY_COUNT 9.000000 8.000000 2.000000 1.000000 1.000000
MAJ_INJ_COUNT 0.000000 2.000000 0.000000 0.000000 0.000000
MCYCLE_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
MCYCLE_MAJ_INJ_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
MIN_INJ_COUNT 9.000000 4.000000 0.000000 1.000000 0.000000
MOD_INJ_COUNT 0.000000 2.000000 1.000000 0.000000 0.000000
MOTORCYCLE_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
PED_COUNT 0.000000 1.000000 0.000000 1.000000 1.000000
PED_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
PED_MAJ_INJ_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
PERSON_COUNT 17.000000 16.000000 3.000000 2.000000 2.000000
SMALL_TRUCK_COUNT 0.000000 1.000000 0.000000 0.000000 0.000000
SUV_COUNT 4.000000 0.000000 0.000000 0.000000 1.000000
UNBELTED_OCC_COUNT 0.000000 1.000000 0.000000 0.000000 0.000000
UNB_DEATH_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
UNB_MAJ_INJ_COUNT 0.000000 0.000000 0.000000 0.000000 0.000000
UNK_INJ_DEG_COUNT 0.000000 0.000000 1.000000 0.000000 1.000000
UNK_INJ_PER_COUNT 0.000000 1.000000 0.000000 0.000000 0.000000
... ... ... ... ... ...
SMALL_TRUCK_COUNT/LEN 0.000000 0.001285 0.000000 0.000000 0.000000
SMALL_TRUCK_COUNT_norm 0.000000 0.173765 0.000000 0.000000 0.000000
SMALL_TRUCK_COUNT/LEN_norm 0.000000 0.010017 0.000000 0.000000 0.000000
SUV_COUNT/LEN 0.009107 0.000000 0.000000 0.000000 0.002523
SUV_COUNT_norm 0.330128 0.000000 0.000000 0.000000 0.142178
SUV_COUNT/LEN_norm 0.032025 0.000000 0.000000 0.000000 0.008902
UNBELTED_OCC_COUNT/LEN 0.000000 0.001285 0.000000 0.000000 0.000000
UNBELTED_OCC_COUNT_norm 0.000000 0.182088 0.000000 0.000000 0.000000
UNBELTED_OCC_COUNT/LEN_norm 0.000000 0.007687 0.000000 0.000000 0.000000
UNB_DEATH_COUNT/LEN 0.000000 0.000000 0.000000 0.000000 0.000000
UNB_DEATH_COUNT_norm 0.000000 0.000000 0.000000 0.000000 0.000000
UNB_DEATH_COUNT/LEN_norm 0.000000 0.000000 0.000000 0.000000 0.000000
UNB_MAJ_INJ_COUNT/LEN 0.000000 0.000000 0.000000 0.000000 0.000000
UNB_MAJ_INJ_COUNT_norm 0.000000 0.000000 0.000000 0.000000 0.000000
UNB_MAJ_INJ_COUNT/LEN_norm 0.000000 0.000000 0.000000 0.000000 0.000000
UNK_INJ_DEG_COUNT/LEN 0.000000 0.000000 0.005861 0.000000 0.002523
UNK_INJ_DEG_COUNT_norm 0.000000 0.000000 0.160053 0.000000 0.160053
UNK_INJ_DEG_COUNT/LEN_norm 0.000000 0.000000 0.015892 0.000000 0.006853
UNK_INJ_PER_COUNT/LEN 0.000000 0.001285 0.000000 0.000000 0.000000
UNK_INJ_PER_COUNT_norm 0.000000 0.187902 0.000000 0.000000 0.000000
UNK_INJ_PER_COUNT/LEN_norm 0.000000 0.006881 0.000000 0.000000 0.000000
VAN_COUNT/LEN 0.000000 0.001285 0.000000 0.000000 0.000000
VAN_COUNT_norm 0.000000 0.227670 0.000000 0.000000 0.000000
VAN_COUNT/LEN_norm 0.000000 0.018292 0.000000 0.000000 0.000000
VEHICLE_COUNT/LEN 0.025044 0.014133 0.017583 0.005141 0.002523
VEHICLE_COUNT_norm 0.389064 0.389064 0.217053 0.108527 0.108527
VEHICLE_COUNT/LEN_norm 0.025339 0.014376 0.017856 0.005253 0.002582
TOTAL_COLLISION_COUNT/LEN 0.132050 0.077087 0.082053 0.035984 0.017663
TOTAL_COLLISION_COUNT_norm 0.520493 0.524748 0.345679 0.265438 0.265438
TOTAL_COLLISION_COUNT/LEN_norm 0.055549 0.033259 0.035319 0.015833 0.007842

126 rows × 5 columns


In [38]:
collision_agg_count.dropna().to_csv('../philly/Philly_Collision_Details.csv')

In [123]:
collision_filtered_agg_count.dropna().to_csv('../philly/Philly_Collision_Details_2013-14.csv')

In [48]:
parking_vio = pd.read_csv('../philly/Parking_Violations.csv')

In [49]:
print(parking_vio.shape)
parking_vio.head().T


(6863434, 11)
Out[49]:
0 1 2 3 4
Issue Date and Time 01/01/2012 12:00:00 AM 01/01/2012 12:00:00 AM 01/01/2012 12:00:00 AM 01/01/2012 12:01:00 AM 01/01/2012 12:07:00 AM
State PA PA PA TX PA
Plate ID 1048932 679582 1082165 1720567 1172258
Division 0 9 5 2 63
Location 1000 VINE 400 S 25TH 7016 HENRY AVE 300 DEVEREAU 450 N 6TH
Location Standardized 1000 VINE ST, 19107 400 S 25TH ST, 19146 7016 HENRY AVE, 19128 NaN 450 N 6TH ST, 19123
Coordinates (39.9568850255,-75.1563742867) (39.9473721746,-75.1814961216) (40.0483515634,-75.2185249487) NaN (39.9598882869,-75.1484695504)
Violation Description PARKING PROHBITED CC SCHOOL ZONE SIDEWALK BLOCKING DRIVEWAY PARKING PROHBITED CC
Fine $51.00 $36.00 $51.00 $51.00 $51.00
Issuing Agency POLICE POLICE POLICE POLICE POLICE
Violation Location (39.9568850255, -75.1563742867) (39.9473721746, -75.1814961216) (40.0483515634, -75.2185249487) NaN (39.9598882869, -75.1484695504)

In [52]:
parking_vio = parking_vio.dropna(subset=['Coordinates'])
print(parking_vio.shape)
parking_vio.head().T


(6522895, 11)
Out[52]:
0 1 2 4 5
Issue Date and Time 01/01/2012 12:00:00 AM 01/01/2012 12:00:00 AM 01/01/2012 12:00:00 AM 01/01/2012 12:07:00 AM 01/01/2012 12:07:00 AM
State PA PA PA PA PA
Plate ID 1048932 679582 1082165 1172258 978500
Division 0 9 5 63 16
Location 1000 VINE 400 S 25TH 7016 HENRY AVE 450 N 6TH 1000 N 42ND
Location Standardized 1000 VINE ST, 19107 400 S 25TH ST, 19146 7016 HENRY AVE, 19128 450 N 6TH ST, 19123 1000 N 42ND ST, 19104
Coordinates (39.9568850255,-75.1563742867) (39.9473721746,-75.1814961216) (40.0483515634,-75.2185249487) (39.9598882869,-75.1484695504) (39.9722105836,-75.2086555318)
Violation Description PARKING PROHBITED CC SCHOOL ZONE SIDEWALK PARKING PROHBITED CC DOUBLE PARKED
Fine $51.00 $36.00 $51.00 $51.00 $51.00
Issuing Agency POLICE POLICE POLICE POLICE POLICE
Violation Location (39.9568850255, -75.1563742867) (39.9473721746, -75.1814961216) (40.0483515634, -75.2185249487) (39.9598882869, -75.1484695504) (39.9722105836, -75.2086555318)

In [55]:
parking_vio['Violation Location'][0]


Out[55]:
'(39.9568850255, -75.1563742867)'

In [60]:
parking_vio['Coordinates'][0].replace('(', '').replace(')', '').split(',')[0]


Out[60]:
'39.9568850255'

In [57]:
parking_vio.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 6522895 entries, 0 to 6863433
Data columns (total 11 columns):
Issue Date and Time      object
State                    object
Plate ID                 int64
Division                 float64
Location                 object
Location Standardized    object
Coordinates              object
Violation Description    object
Fine                     object
Issuing Agency           object
Violation Location       object
dtypes: float64(1), int64(1), object(9)
memory usage: 597.2+ MB

In [66]:
points = [Point(float(row['Coordinates'].replace('(', '').replace(')', '').split(',')[1]), float(row['Coordinates'].replace('(', '').replace(')', '').split(',')[0])) for key, row in parking_vio.iterrows()]
geo_df = GeoDataFrame(parking_vio,geometry=points)
geo_df.to_file('../philly/Parking_Violations.geojson', driver='GeoJSON')


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-66-1df58c4ba068> in <module>()
      1 points = [Point(float(row['Coordinates'].replace('(', '').replace(')', '').split(',')[1]), float(row['Coordinates'].replace('(', '').replace(')', '').split(',')[0])) for key, row in parking_vio.iterrows()]
      2 geo_df = GeoDataFrame(parking_vio,geometry=points)
----> 3 geo_df.to_file('../philly/Parking_Violations.geojson', driver='GeoJSON')

//anaconda/lib/python3.5/site-packages/geopandas/geodataframe.py in to_file(self, filename, driver, schema, **kwargs)
    341         """
    342         from geopandas.io.file import to_file
--> 343         to_file(self, filename, driver, schema, **kwargs)
    344 
    345     def to_crs(self, crs=None, epsg=None, inplace=False):

//anaconda/lib/python3.5/site-packages/geopandas/io/file.py in to_file(df, filename, driver, schema, **kwargs)
     59     filename = os.path.abspath(os.path.expanduser(filename))
     60     with fiona.open(filename, 'w', driver=driver, crs=df.crs,
---> 61                     schema=schema, **kwargs) as c:
     62         for feature in df.iterfeatures():
     63             c.write(feature)

//anaconda/lib/python3.5/site-packages/fiona/__init__.py in open(path, mode, driver, schema, crs, encoding, layer, vfs, enabled_drivers, crs_wkt)
    176         c = Collection(path, mode, crs=crs, driver=driver, schema=this_schema,
    177                        encoding=encoding, layer=layer, vsi=vsi, archive=archive,
--> 178                        enabled_drivers=enabled_drivers, crs_wkt=crs_wkt)
    179     else:
    180         raise ValueError(

//anaconda/lib/python3.5/site-packages/fiona/collection.py in __init__(self, path, mode, driver, schema, crs, encoding, layer, vsi, archive, enabled_drivers, crs_wkt, **kwargs)
    153             self.encoding = encoding
    154             self.session = WritingSession()
--> 155             self.session.start(self, **kwargs)
    156             self.encoding = encoding or self.session.get_fileencoding().lower()
    157 

fiona/ogrext.pyx in fiona.ogrext.WritingSession.start (fiona/ogrext.c:15539)()

ValueError: Null layer

In [ ]:
parking_vio_gpd = gpd.read_file('../philly/Parking_Violations.geojson')
print(parking_vio_gpd.shape)
parking_vio.head().T

In [67]:
geo_df.shape


Out[67]:
(6522895, 12)

In [68]:
geo_df.head().T


Out[68]:
0 1 2 4 5
Issue Date and Time 01/01/2012 12:00:00 AM 01/01/2012 12:00:00 AM 01/01/2012 12:00:00 AM 01/01/2012 12:07:00 AM 01/01/2012 12:07:00 AM
State PA PA PA PA PA
Plate ID 1048932 679582 1082165 1172258 978500
Division 0 9 5 63 16
Location 1000 VINE 400 S 25TH 7016 HENRY AVE 450 N 6TH 1000 N 42ND
Location Standardized 1000 VINE ST, 19107 400 S 25TH ST, 19146 7016 HENRY AVE, 19128 450 N 6TH ST, 19123 1000 N 42ND ST, 19104
Coordinates (39.9568850255,-75.1563742867) (39.9473721746,-75.1814961216) (40.0483515634,-75.2185249487) (39.9598882869,-75.1484695504) (39.9722105836,-75.2086555318)
Violation Description PARKING PROHBITED CC SCHOOL ZONE SIDEWALK PARKING PROHBITED CC DOUBLE PARKED
Fine $51.00 $36.00 $51.00 $51.00 $51.00
Issuing Agency POLICE POLICE POLICE POLICE POLICE
Violation Location (39.9568850255, -75.1563742867) (39.9473721746, -75.1814961216) (40.0483515634, -75.2185249487) (39.9598882869, -75.1484695504) (39.9722105836, -75.2086555318)
geometry POINT (-75.1563742867 39.9568850255) POINT (-75.18149612160001 39.9473721746) POINT (-75.21852494869999 40.0483515634) POINT (-75.14846955039999 39.9598882869) POINT (-75.20865553180001 39.9722105836)

In [69]:
geo_df[['geometry']].dropna().shape


Out[69]:
(6522895, 1)

In [ ]:
geo_df.to_file('../philly/GeoJSON_data/Parking_Violations.geojson', driver='GeoJSON')

In [ ]:
philly_seg_buffer_gp = street_gpd[['SEG_ID','geometry']].copy()
philly_seg_buffer_gp.geometry = philly_seg_buffer_gp.buffer(0.0001)
park_vio_buffer_gp = geo_df[['Plate ID','Coordinates','geometry']].dropna().copy()
park_vio_buffer_gp.geometry = park_vio_buffer_gp.buffer(0.0001)
joined = sjoin(park_vio_buffer_gp, philly_seg_buffer_gp, how='left', op='intersects')
gb = joined.reset_index().groupby(['Plate ID', 'Coordinates']).count()
    
print(gb[gb.SEG_ID==0].shape[0], 'points spatially match no segment')
print(gb[gb.SEG_ID==1].shape[0], 'points spatially match 1 segment')
print(gb[gb.SEG_ID>1].shape[0], 'points spatially match multiple segments')


Exception ignored in: <bound method Collection.__del__ of <open Collection '/Users/kiranjavkar/Documents/Cycling Safety/philly/Parking_Violations.geojson:OgrGeoJSON', mode 'w' at 0x3e6aac438>>
Traceback (most recent call last):
  File "//anaconda/lib/python3.5/site-packages/fiona/collection.py", line 416, in __del__
    self.__exit__(None, None, None)
  File "//anaconda/lib/python3.5/site-packages/fiona/collection.py", line 411, in __exit__
    self.close()
  File "//anaconda/lib/python3.5/site-packages/fiona/collection.py", line 395, in close
    self.flush()
  File "//anaconda/lib/python3.5/site-packages/fiona/collection.py", line 384, in flush
    if self.session is not None and self.session.get_length() > 0:
  File "fiona/ogrext.pyx", line 495, in fiona.ogrext.Session.get_length (fiona/ogrext.c:9385)
ValueError: Null layer

In [127]:
bike_network = pd.read_csv('../philly/Bike_Network.csv')
print(bike_network.shape)
bike_network.head().T


(4065, 7)
Out[127]:
0 1 2 3 4
OBJECTID 1 2 3 4 5
SEG_ID 420562 420568 420554 421801 421802
STREETNAME SPRING GARDEN ST SPRING GARDEN ST SPRING GARDEN ST N 21ST ST N 21ST ST
ST_CODE 73800 73800 73800 88190 88190
ONEWAY TF TF TF TF TF
CLASS 2 2 2 4 4
TYPE Conventional Conventional Conventional Sharrow Sharrow

In [126]:
street_gpd = gpd.read_file('../philly/GeoJSON_data/Street_Centerline_qgis3.geojson')

In [128]:
bike_lane = bike_network.groupby('SEG_ID').size().reset_index()
bike_lane.columns = ['SEG_ID', 'BIKELANE']
print(bike_lane.shape)
bike_lane.head().T


(4065, 2)
Out[128]:
0 1 2 3 4
SEG_ID 100002 100003 100004 100045 100066
BIKELANE 1 1 1 1 1

In [129]:
street_gpd = street_gpd.merge(bike_lane, left_on='SEG_ID', right_on='SEG_ID', right_index=False, how='left')

In [130]:
street_gpd.BIKELANE = street_gpd.BIKELANE.fillna(0)
print(street_gpd.shape)
street_gpd.head().T


(41022, 32)
Out[130]:
0 1 2 3 4
CLASS 3 3 3 5 4
FNODE_ 2 2 1 6 5
LENGTH 449.863 540.083 446.104 447.261 148.216
LPOLY_ 0 0 0 0 0
L_F_ADD 1500 400 1600 1600 350
L_HUNDRED 1500 400 1600 1600 300
L_T_ADD 1598 498 1698 1698 398
MULTI_REP 0 0 0 0 0
NEWSEGDATE None None None None None
ONEWAY FT TF FT TF TF
PRE_DIR None N None None N
RESPONSIBL FAM FAM FAM None None
RPOLY_ 0 0 0 0 0
R_F_ADD 1501 401 1601 1601 351
R_HUNDRED 1500 400 1600 1600 300
R_T_ADD 1599 499 1699 1699 399
SEG_ID 420708 422065 420702 420732 420718
SHAPE_LEN 449.863 540.083 446.104 447.261 148.216
STCL2_ 1 2 3 4 5
STCL2_ID 85205 86540 85199 85229 85215
STNAME CALLOWHILL ST N 15TH ST CALLOWHILL ST CARLTON ST N 17TH ST
STREETLABE CALLOWHILL ST N 15TH ST CALLOWHILL ST CARLTON ST N 17TH ST
ST_CODE 20880 88070 20880 21440 88110
ST_NAME CALLOWHILL 15TH CALLOWHILL CARLTON 17TH
ST_TYPE ST ST ST ST ST
SUF_DIR None None None None None
TNODE_ 1 3 4 5 4
UPDATE_ 1997-02-10 1997-02-10 1997-02-10 1997-02-10 1997-06-27
ZIP_LEFT 19130 19130 19130 19103 19103
ZIP_RIGHT 19130 19130 19130 19103 19103
geometry LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16529687686355 39.960013687935... LINESTRING (-75.16537539004391 39.959601782423... LINESTRING (-75.16694872703492 39.959805718080...
BIKELANE 0 0 0 0 0

In [149]:
unique(street_gpd.BIKELANE)


Out[149]:
array([ 0.,  1.])

In [132]:
street_gpd[street_gpd.BIKELANE==1].shape


Out[132]:
(4061, 32)

In [131]:
set(bike_lane.SEG_ID).difference(set(street_gpd.SEG_ID))


Out[131]:
{422130, 422142, 500805, 500807}

In [28]:
street_gpd[street_gpd.SEG_ID==500807]


Out[28]:
CLASS FNODE_ LENGTH LPOLY_ L_F_ADD L_HUNDRED L_T_ADD MULTI_REP NEWSEGDATE ONEWAY ... ST_CODE ST_NAME ST_TYPE SUF_DIR TNODE_ UPDATE_ ZIP_LEFT ZIP_RIGHT geometry BIKELANE

0 rows × 32 columns


In [133]:
set(bike_network.ST_CODE).difference(set(street_gpd.ST_CODE))


Out[133]:
{36585}

In [134]:
bike_network[bike_network.SEG_ID==422130].ST_CODE.iloc[0]


Out[134]:
88550

In [59]:
bike_network.loc[bike_network['SEG_ID']==422130, 'ST_CODE'].iloc[0]


Out[59]:
88550

In [63]:
remaining_segids = set(bike_lane.SEG_ID).difference(set(street_gpd.SEG_ID))
for segid in remaining_segids:
    print(segid)
    print(street_gpd[street_gpd.ST_CODE==bike_network[bike_network.SEG_ID==segid].ST_CODE.iloc[0]].SEG_ID)
    print()


422130
28908    420969
28909    420955
28914    420907
29007    420928
39237    423108
Name: SEG_ID, dtype: int64

500805
39239    500804
39986    500802
Name: SEG_ID, dtype: int64

422142
28523    421124
28995    420876
29005    420699
29324    421122
29325    420762
29838    422653
33314    422518
33319    422517
33321    422522
33422    422494
33514    420314
33616    420413
33622    420360
33623    420395
33717    421103
33818    422840
33819    422839
33836    422652
33929    422655
33933    420633
33935    422654
39153    423440
39154    423441
39285    423405
Name: SEG_ID, dtype: int64

500807
Series([], Name: SEG_ID, dtype: int64)


In [61]:
street_gpd[street_gpd.ST_CODE==bike_network[bike_network.SEG_ID==422130].ST_CODE.iloc[0]].SEG_ID


Out[61]:
28908    420969
28909    420955
28914    420907
29007    420928
39237    423108
Name: SEG_ID, dtype: int64

In [32]:
unique(bike_network.TYPE)


Out[32]:
array(['Buffered', 'Buffered w Conventional',
       'Contraflow w Conventional, same', 'Conventional',
       'Conventional w Sharrows', 'Sharrow'], dtype=object)

In [33]:
unique(bike_network.ONEWAY)


Out[33]:
array(['B', 'FT', 'TF'], dtype=object)

In [147]:
bike_lane_type = bike_network.groupby(['SEG_ID','TYPE']).size().reset_index()
bike_lane_type.columns = ['SEG_ID', 'TYPE', 'BIKELANE']
print(bike_lane_type.shape)
bike_lane_type.head().T


(4065, 3)
Out[147]:
0 1 2 3 4
SEG_ID 100002 100003 100004 100045 100066
TYPE Conventional Conventional Conventional Conventional Conventional
BIKELANE 1 1 1 1 1

In [145]:
unique(bike_lane.BIKELANE)


Out[145]:
array([1])

In [148]:
unique(bike_lane_type.BIKELANE)


Out[148]:
array([1])

In [67]:
len(street_gpd.SEG_ID)


Out[67]:
41022

In [68]:
import geopandas as gpd
from geopandas.tools import sjoin
import pandas as pd
from IPython.display import display
from pandas import DataFrame
from geopandas import GeoDataFrame
from shapely.geometry import Point

In [151]:
for column in collision_filtered_agg_count.columns:
    if('norm' not in column and 'LEN' not in column):
        print(column)


SEG_ID
CRASH_MONTH
AUTOMOBILE_COUNT
BELTED_DEATH_COUNT
BELTED_MAJ_INJ_COUNT
BICYCLE_COUNT
BICYCLE_DEATH_COUNT
BICYCLE_MAJ_INJ_COUNT
BUS_COUNT
COMM_VEH_COUNT
FATAL_COUNT
HEAVY_TRUCK_COUNT
INJURY_COUNT
MAJ_INJ_COUNT
MCYCLE_DEATH_COUNT
MCYCLE_MAJ_INJ_COUNT
MIN_INJ_COUNT
MOD_INJ_COUNT
MOTORCYCLE_COUNT
PED_COUNT
PED_DEATH_COUNT
PED_MAJ_INJ_COUNT
PERSON_COUNT
SMALL_TRUCK_COUNT
SUV_COUNT
UNBELTED_OCC_COUNT
UNB_DEATH_COUNT
UNB_MAJ_INJ_COUNT
UNK_INJ_DEG_COUNT
UNK_INJ_PER_COUNT
VAN_COUNT
VEHICLE_COUNT
TOTAL_COLLISION_COUNT

In [153]:
street_gpd[['SEG_ID', 'BIKELANE']].shape


Out[153]:
(41022, 2)

In [154]:
unique(street_gpd[['SEG_ID', 'BIKELANE']].BIKELANE)


Out[154]:
array([ 0.,  1.])

In [157]:
sum(street_gpd.BIKELANE>=1)


Out[157]:
4061

In [158]:
street_gpd[['SEG_ID', 'BIKELANE']].to_csv('../philly/Philly_Bikelanes.csv')

In [159]:
street_gpd.head().T


Out[159]:
0 1 2 3 4
CLASS 3 3 3 5 4
FNODE_ 2 2 1 6 5
LENGTH 449.863 540.083 446.104 447.261 148.216
LPOLY_ 0 0 0 0 0
L_F_ADD 1500 400 1600 1600 350
L_HUNDRED 1500 400 1600 1600 300
L_T_ADD 1598 498 1698 1698 398
MULTI_REP 0 0 0 0 0
NEWSEGDATE None None None None None
ONEWAY FT TF FT TF TF
PRE_DIR None N None None N
RESPONSIBL FAM FAM FAM None None
RPOLY_ 0 0 0 0 0
R_F_ADD 1501 401 1601 1601 351
R_HUNDRED 1500 400 1600 1600 300
R_T_ADD 1599 499 1699 1699 399
SEG_ID 420708 422065 420702 420732 420718
SHAPE_LEN 449.863 540.083 446.104 447.261 148.216
STCL2_ 1 2 3 4 5
STCL2_ID 85205 86540 85199 85229 85215
STNAME CALLOWHILL ST N 15TH ST CALLOWHILL ST CARLTON ST N 17TH ST
STREETLABE CALLOWHILL ST N 15TH ST CALLOWHILL ST CARLTON ST N 17TH ST
ST_CODE 20880 88070 20880 21440 88110
ST_NAME CALLOWHILL 15TH CALLOWHILL CARLTON 17TH
ST_TYPE ST ST ST ST ST
SUF_DIR None None None None None
TNODE_ 1 3 4 5 4
UPDATE_ 1997-02-10 1997-02-10 1997-02-10 1997-02-10 1997-06-27
ZIP_LEFT 19130 19130 19130 19103 19103
ZIP_RIGHT 19130 19130 19130 19103 19103
geometry LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16371257852462 39.959816984389... LINESTRING (-75.16529687686355 39.960013687935... LINESTRING (-75.16537539004391 39.959601782423... LINESTRING (-75.16694872703492 39.959805718080...
BIKELANE 0 0 0 0 0

In [174]:
street_gpd[['OXFORD' in stname for stname in street_gpd.STNAME]].ST_NAME


Out[174]:
3093     OXFORD
3175     OXFORD
3176     OXFORD
3178     OXFORD
3257     OXFORD
3270     OXFORD
3271     OXFORD
3275     OXFORD
3277     OXFORD
3493     OXFORD
3494     OXFORD
3967     OXFORD
4706     OXFORD
4707     OXFORD
4708     OXFORD
4709     OXFORD
4805     OXFORD
4807     OXFORD
4814     OXFORD
4816     OXFORD
4817     OXFORD
4819     OXFORD
4834     OXFORD
4835     OXFORD
4836     OXFORD
4838     OXFORD
4840     OXFORD
4841     OXFORD
4842     OXFORD
4844     OXFORD
          ...  
31134    OXFORD
31499    OXFORD
31507    OXFORD
31645    OXFORD
31646    OXFORD
31647    OXFORD
32282    OXFORD
32284    OXFORD
33067    OXFORD
33073    OXFORD
33074    OXFORD
33160    OXFORD
33859    OXFORD
33860    OXFORD
35346    OXFORD
35347    OXFORD
35791    OXFORD
35861    OXFORD
35862    OXFORD
35866    OXFORD
35868    OXFORD
36660    OXFORD
37047    OXFORD
37110    OXFORD
37113    OXFORD
37118    OXFORD
38468    OXFORD
40531    OXFORD
40534    OXFORD
40618    OXFORD
Name: ST_NAME, dtype: object

In [ ]: