Playing around with Pittsburgh 311 Data

This notebook is going to play around with the 311 Data from the Western Pennsylvania Regional Data Center

I have taken the liberty of downloading the 311 data



In [ ]:

    
# use the %ls magic to list the files in the current directory.
%ls



In [2]:

    
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sms
%matplotlib inline



In [3]:

    
three11s = pd.read_csv("data/pgh-311.csv", parse_dates=['CREATED_ON'])



In [4]:

    
three11s.dtypes









    Out[4]:





_id                               int64
REQUEST_ID                        int64
CREATED_ON               datetime64[ns]
REQUEST_TYPE                     object
REQUEST_ORIGIN                   object
DEPARTMENT                       object
NEIGHBORHOOD                     object
COUNCIL_DISTRICT                float64
WARD                            float64
TRACT                           float64
PUBLIC_WORKS_DIVISION           float64
PLI_DIVISION                    float64
POLICE_ZONE                     float64
dtype: object



In [5]:

    
three11s.head()









    Out[5]:






  
    
      
      _id
      REQUEST_ID
      CREATED_ON
      REQUEST_TYPE
      REQUEST_ORIGIN
      DEPARTMENT
      NEIGHBORHOOD
      COUNCIL_DISTRICT
      WARD
      TRACT
      PUBLIC_WORKS_DIVISION
      PLI_DIVISION
      POLICE_ZONE
    
  
  
    
      0
      1
      3762
      2015-05-06 15:16:00
      Potholes
      Call Center
      DPW - Street Maintenance
      Lincoln-Lemington-Belmar
      9.0
      12.0
      4.200312e+10
      2.0
      12.0
      5.0
    
    
      1
      2
      3763
      2015-05-06 15:21:00
      Potholes
      Call Center
      DPW - Street Maintenance
      Carrick
      4.0
      32.0
      4.200329e+10
      4.0
      32.0
      3.0
    
    
      2
      57402
      63038
      2016-04-19 08:01:00
      Vacant Building
      Control Panel
      Permits, Licenses and Inspections
      Knoxville
      3.0
      30.0
      4.200330e+10
      4.0
      30.0
      3.0
    
    
      3
      4
      3765
      2015-05-06 15:33:00
      Public Works Department
      Call Center
      DPW - Administration
      Greenfield
      5.0
      15.0
      4.200315e+10
      3.0
      15.0
      4.0
    
    
      4
      6
      3767
      2015-05-06 15:38:00
      Potholes
      Call Center
      DPW - Street Maintenance
      Squirrel Hill North
      8.0
      14.0
      4.200314e+10
      3.0
      14.0
      4.0



In [6]:

    
three11s.loc[0]









    Out[6]:





_id                                             1
REQUEST_ID                                   3762
CREATED_ON                    2015-05-06 15:16:00
REQUEST_TYPE                             Potholes
REQUEST_ORIGIN                        Call Center
DEPARTMENT               DPW - Street Maintenance
NEIGHBORHOOD             Lincoln-Lemington-Belmar
COUNCIL_DISTRICT                                9
WARD                                           12
TRACT                                 4.20031e+10
PUBLIC_WORKS_DIVISION                           2
PLI_DIVISION                                   12
POLICE_ZONE                                     5
Name: 0, dtype: object

Embedded Plots



In [7]:

    
# Plot the number of 311 requests per month

month_counts = three11s.groupby(three11s.CREATED_ON.dt.month)

y = month_counts.size()
x = month_counts.CREATED_ON.first()

axes = pd.Series(y.values, index=x).plot(figsize=(15,5))

plt.ylim(0)
plt.xlabel('Month')
plt.ylabel('Complaint')









    Out[7]:





<matplotlib.text.Text at 0x11706ed68>

Exploring Request types



In [8]:

    
grouped_by_type = three11s.groupby(three11s.REQUEST_TYPE)

size = grouped_by_type.size()
size
#len(size)
#size[size > 200]









    Out[8]:





REQUEST_TYPE
Abandoned Vehicle (parked on street)           1213
Accessibility Construction Issue                  7
Alarm Permit                                      2
Americans with Disabilities                       5
Animal Waste                                    115
Barking Dog                                     131
Barricades                                       95
Basketball Hoop                                  30
Bike Lane Bollard                                23
Bike Trail Maintenance                           37
Billing                                           3
Billing or Shut Off                              35
Board Up (PLI referral to DPW)                  104
Boat/Trailer on Street                           37
Bollard Repair/Maintenance                       27
Brick/block or concrete slab repair             113
Bridge Maintenance                                8
Broken Sidewalk                                 502
Building (Residential)                          139
Building (Residential)/DO NOT USE                 9
Building Maintenance                           2645
Building Violation (Residential)                 56
Building Violation (Residential)/DO NOT USE      10
Building Without a Permit                       707
Bulk                                            124
Bus Shelter                                       7
Bus Stop Request                                  2
Business Accessibility                            6
Cable Bureau/Programming                          6
Catch Basin Clogged                             203
                                               ... 
Traffic or Pedestrian Signal, Request            64
Tree Fallen Across Road                         327
Tree Fallen Across Sidewalk                     192
Tree Issues                                      83
Tree Removal                                    207
Tree/Stump Removal                              229
URA property                                     11
Unpermitted Demolition                            2
Unpermitted Electrical Work                      41
Unpermitted Fire System Work                     52
Unpermitted HVAC Work                            66
Unpermitted Land Operations                       3
Unpermitted Sign Construction                     5
Utility Cut - Other                             255
Utility Cut - PWSA                              248
Utility Pole                                    154
Vacant Building                                 542
Vacant Building (Residential)                    39
Vacant Building (Residential)/DO NOT USE          2
Vacant Lot                                       67
Vacant and Open Building                        252
Violations                                     2933
Walkway or Trail                                 17
Water Main Break                                171
Water Pressure                                   13
Water/Drinking Fountains                         62
Weeds/Debris                                   1539
Weeds/Debris (Private Property)                 210
Wires                                           207
Zoning Issue                                    220
dtype: int64

There are too many request types (268). We need some higher level categories to make this more comprehensible. Fortunately, there is an Issue and Category codebook that we can use to map between low and higher level categories.



In [9]:

    
codebook = pd.read_csv('data/codebook.csv')
codebook.head()









    Out[9]:






  
    
      
      Category
      Issue
      Definition
      Department
    
  
  
    
      0
      Accessibility
      Americans with Disabilities
      NaN
      City Planning - Disabilities
    
    
      1
      Accessibility
      City Facility
      NaN
      DPW - Facilities
    
    
      2
      Accessibility
      Handicap Ramp, Repair
      NaN
      DPW - Construction Division
    
    
      3
      Accessibility
      Handicap Ramp, Request for Installation
      NaN
      DPW - Transportation & Engineering
    
    
      4
      Accessibility
      Sidewalk, Lack of Snow/Ice Removal
      NaN
      311



In [10]:

    
merged_data = pd.merge(three11s, 
                       codebook[['Category', 'Issue']], 
                       how='left',
                       left_on="REQUEST_TYPE", 
                       right_on="Issue")



In [11]:

    
merged_data.head()









    Out[11]:






  
    
      
      _id
      REQUEST_ID
      CREATED_ON
      REQUEST_TYPE
      REQUEST_ORIGIN
      DEPARTMENT
      NEIGHBORHOOD
      COUNCIL_DISTRICT
      WARD
      TRACT
      PUBLIC_WORKS_DIVISION
      PLI_DIVISION
      POLICE_ZONE
      Category
      Issue
    
  
  
    
      0
      1
      3762
      2015-05-06 15:16:00
      Potholes
      Call Center
      DPW - Street Maintenance
      Lincoln-Lemington-Belmar
      9.0
      12.0
      4.200312e+10
      2.0
      12.0
      5.0
      Road/Street Issues
      Potholes
    
    
      1
      2
      3763
      2015-05-06 15:21:00
      Potholes
      Call Center
      DPW - Street Maintenance
      Carrick
      4.0
      32.0
      4.200329e+10
      4.0
      32.0
      3.0
      Road/Street Issues
      Potholes
    
    
      2
      57402
      63038
      2016-04-19 08:01:00
      Vacant Building
      Control Panel
      Permits, Licenses and Inspections
      Knoxville
      3.0
      30.0
      4.200330e+10
      4.0
      30.0
      3.0
      Neighborhood Issues
      Vacant Building
    
    
      3
      4
      3765
      2015-05-06 15:33:00
      Public Works Department
      Call Center
      DPW - Administration
      Greenfield
      5.0
      15.0
      4.200315e+10
      3.0
      15.0
      4.0
      Ask Us/General FAQs
      Public Works Department
    
    
      4
      6
      3767
      2015-05-06 15:38:00
      Potholes
      Call Center
      DPW - Street Maintenance
      Squirrel Hill North
      8.0
      14.0
      4.200314e+10
      3.0
      14.0
      4.0
      Road/Street Issues
      Potholes



In [12]:

    
grouped_by_type = merged_data.groupby(merged_data.Category)
size = grouped_by_type.size()
size









    Out[12]:





Category
Accessibility                                        248
Animal Issues                                       1447
Ask Us/General FAQs                                  285
Building Maintenance                                4015
Business Issues                                      333
City Facilities and Infrastructure                   721
Communications (Cable, Website)                       25
Construction Issues                                 1408
Garbage and Litter Issues                           8077
Graffiti Issues                                      555
In database, but not on 311 Web Submission Form     3202
Neighborhood Issues                                 2868
Parking                                             2428
Parks Issues                                        2938
Pedestrian/Bicycle Concerns                           91
Permits                                              323
Personnel (City)                                      62
Public Safety                                       1530
Road/Street Issues                                 22927
Street Light                                        1396
Taxes                                                 29
Traffic and Street Sign Issues                       190
Tree Issues                                         2418
Utility Issues                                      1769
Weeds/Debris                                         333
dtype: int64

That is a more manageable list of categories for data visualization. Let's take a look at the distribution of requests per category in the dataset.



In [13]:

    
size.plot(kind='barh', figsize=(8,6))









    Out[13]:





<matplotlib.axes._subplots.AxesSubplot at 0x116e75cf8>

Looking at requests at the neighborhood level

Thankfully, the 311 data from the WPRDC already includes neighborhood information for each request in the NEIGHBORHOOD column. We can take advantage of this to filter and count requests by neighborhood.



In [15]:

    
merged_data.groupby(merged_data.NEIGHBORHOOD).size().sort_values(inplace=False,
                                                         ascending=False)









    Out[15]:





NEIGHBORHOOD
Brookline                    2835
South Side Slopes            2562
Carrick                      2467
South Side Flats             2322
Squirrel Hill South          2037
Mount Washington             1696
Bloomfield                   1613
Beechview                    1521
Sheraden                     1388
Greenfield                   1387
Central Business District    1373
Central Lawrenceville        1325
Shadyside                    1264
East Liberty                 1134
Highland Park                1121
Hazelwood                    1048
Lincoln-Lemington-Belmar     1046
Squirrel Hill North           996
Stanton Heights               979
Overbrook                     938
Knoxville                     918
Brighton Heights              884
Perry North                   883
Point Breeze                  874
Elliott                       866
Garfield                      838
Homewood North                837
Marshall-Shadeland            832
Central Oakland               827
Lower Lawrenceville           783
                             ... 
Windgap                       261
Friendship                    245
Terrace Village               223
Homewood West                 216
Regent Square                 209
Swisshelm Park                197
Allegheny Center              187
West End                      187
Bon Air                       183
North Shore                   165
Summer Hill                   158
New Homestead                 156
Allegheny West                140
California-Kirkbride          134
Mt. Oliver                    133
Bedford Dwellings             124
South Shore                   114
Oakwood                       108
Chateau                       108
Hays                           93
St. Clair                      89
Esplen                         88
Chartiers City                 78
Fairywood                      74
East Carnegie                  71
Ridgemont                      60
Northview Heights              47
Mount Oliver Borough           28
Glen Hazel                     28
Arlington Heights              17
dtype: int64

In GRAPH form



In [16]:

    
merged_data.groupby(merged_data.NEIGHBORHOOD).size().sort_values(inplace=False,
                                                         ascending=True).plot(kind="barh", figsize=(5,20))









    Out[16]:





<matplotlib.axes._subplots.AxesSubplot at 0x11796bb70>

So we can see from the graph above that Brookline, followed by the South Side Slopes, Carrick, and South Side Flats, make the most 311 requests. It would be interesting to get some neighborhood population data and compute the number of requests per capita.

I bet those data are available, maybe YOU could create that graph!

Widgets

Jupyter Notebooks have a very powerful widget framework that allows you to easily add interactive components to live notebooks.



In [17]:

    
# create a function that generates a chart of requests per neighborhood
def issues_by_neighborhood(neighborhood):
    """Generates a plot of issue categories by neighborhood"""
    grouped_by_type = merged_data[merged_data['NEIGHBORHOOD'] == neighborhood].groupby(merged_data.Category)
    size = grouped_by_type.size()
    size.plot(kind='barh', figsize=(8,6))



In [18]:

    
issues_by_neighborhood('Greenfield')



In [19]:

    
issues_by_neighborhood('Brookline')



In [20]:

    
issues_by_neighborhood('Garfield')



In [ ]:

    
from ipywidgets import interact

@interact(hood=sorted(list(pd.Series(three11s.NEIGHBORHOOD.unique()).dropna())))
def issues_by_neighborhood(hood):
    """Generates a plot of issue categories by neighborhood"""
    grouped_by_type = merged_data[merged_data['NEIGHBORHOOD'] == hood].groupby(merged_data.Category)
    size = grouped_by_type.size()
    size.plot(kind='barh',figsize=(8,6))



In [ ]:

	_id	REQUEST_ID	CREATED_ON	REQUEST_TYPE	REQUEST_ORIGIN	DEPARTMENT	NEIGHBORHOOD	COUNCIL_DISTRICT	WARD	TRACT	PUBLIC_WORKS_DIVISION	PLI_DIVISION	POLICE_ZONE
0	1	3762	2015-05-06 15:16:00	Potholes	Call Center	DPW - Street Maintenance	Lincoln-Lemington-Belmar	9.0	12.0	4.200312e+10	2.0	12.0	5.0
1	2	3763	2015-05-06 15:21:00	Potholes	Call Center	DPW - Street Maintenance	Carrick	4.0	32.0	4.200329e+10	4.0	32.0	3.0
2	57402	63038	2016-04-19 08:01:00	Vacant Building	Control Panel	Permits, Licenses and Inspections	Knoxville	3.0	30.0	4.200330e+10	4.0	30.0	3.0
3	4	3765	2015-05-06 15:33:00	Public Works Department	Call Center	DPW - Administration	Greenfield	5.0	15.0	4.200315e+10	3.0	15.0	4.0
4	6	3767	2015-05-06 15:38:00	Potholes	Call Center	DPW - Street Maintenance	Squirrel Hill North	8.0	14.0	4.200314e+10	3.0	14.0	4.0

	Category	Issue	Definition	Department
0	Accessibility	Americans with Disabilities	NaN	City Planning - Disabilities
1	Accessibility	City Facility	NaN	DPW - Facilities
2	Accessibility	Handicap Ramp, Repair	NaN	DPW - Construction Division
3	Accessibility	Handicap Ramp, Request for Installation	NaN	DPW - Transportation & Engineering
4	Accessibility	Sidewalk, Lack of Snow/Ice Removal	NaN	311