notebook.community

Edit and run



In [1]:

    
import json
import requests
%matplotlib inline



In [2]:

    
url = "https://data.seattle.gov/resource/pu5n-trf4.json"
params = {'$where':'event_clearance_description = "NOISE DISTURBANCE"','$limit':50000}
response = requests.get(url,params = params)
print response.url









    



https://data.seattle.gov/resource/pu5n-trf4.json?%24where=event_clearance_description+%3D+%22NOISE+DISTURBANCE%22&%24limit=50000



In [4]:

    
import pandas as pd
noise_data_r = pd.read_json(response.url)
#noise_data = pd.read_csv("noise_report.csv",sep='\t', encoding='utf-8')



In [5]:

    
noise_data_r.shape









    Out[5]:





(36501, 19)



In [6]:

    
#noise_data.to_csv("noise_report.csv",sep='\t', encoding='utf-8')



In [86]:

    
noise_data_r.head(20)









    Out[86]:






  
    
      
      at_scene_time
      cad_cdw_id
      cad_event_number
      census_tract
      district_sector
      event_clearance_code
      event_clearance_date
      event_clearance_description
      event_clearance_group
      event_clearance_subgroup
      general_offense_number
      hundred_block_location
      incident_location
      initial_type_description
      initial_type_group
      initial_type_subgroup
      latitude
      longitude
      zone_beat
    
  
  
    
      0
      2012-12-21 21:21:00
      1004299
      12000435444
      10600.4000
      W
      244
      2012-12-21T21:44:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012435444
      64XX BLOCK OF CALIFORNIA AVE SW
      {u'type': u'Point', u'coordinates': [-122.3872...
      FIREWORKS - NUISANCE (NO HAZARD)
      MISCELLANEOUS MISDEMEANORS
      MISCELLANEOUS MISDEMEANORS
      47.545680
      -122.387206
      W3
    
    
      1
      2012-12-21 20:53:00
      1004323
      12000435440
      10900.2001
      O
      244
      2012-12-21T21:17:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012435440
      62XX BLOCK OF STANLEY AVE S
      {u'type': u'Point', u'coordinates': [-122.3155...
      NOISE - DISTURBANCE (PARTY, ETC)
      DISTURBANCES
      DISTURBANCES
      47.547482
      -122.315509
      O3
    
    
      2
      2012-12-21 20:10:00
      1004339
      12000435342
      6600.3007
      D
      244
      2012-12-21T20:38:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012435342
      YALE AV E / E NEWTON ST
      {u'type': u'Point', u'coordinates': [-122.3271...
      NUISANCE - MISCHIEF
      MISCELLANEOUS MISDEMEANORS
      MISCELLANEOUS MISDEMEANORS
      47.636764
      -122.327118
      D2
    
    
      3
      2012-12-21 19:55:00
      1004343
      12000435364
      10900.2072
      O
      244
      2012-12-21T19:59:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012435364
      65XX BLOCK OF 5TH PL S
      {u'type': u'Point', u'coordinates': [-122.3272...
      HAZ - POTENTIAL THRT TO PHYS SAFETY (NO HAZMAT)
      HAZARDS
      HAZARDS
      47.544907
      -122.327272
      O3
    
    
      4
      2012-12-21 17:33:00
      1004457
      12000435108
      9600.2013
      W
      244
      2012-12-21T19:47:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012435108
      42XX BLOCK OF SW ADMIRAL WAY
      {u'type': u'Point', u'coordinates': [-122.3858...
      NOISE - DISTURBANCE (PARTY, ETC)
      DISTURBANCES
      DISTURBANCES
      47.581191
      -122.385859
      W1
    
    
      5
      NaT
      1004596
      12000440340
      7800.3012
      G
      244
      2012-12-26T21:22:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012440340
      3XX BLOCK OF 32ND AVE
      {u'type': u'Point', u'coordinates': [-122.2915...
      NaN
      NaN
      NaN
      47.604835
      -122.291588
      G3
    
    
      6
      NaT
      100466
      10000370151
      8600.2007
      E
      244
      2010-10-22T23:43:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2010370151
      4XX BLOCK OF 11TH AVE
      {u'type': u'Point', u'coordinates': [-122.3181...
      NaN
      NaN
      NaN
      47.605702
      -122.318109
      E3
    
    
      7
      NaT
      101506
      10000371516
      8002.2008
      M
      244
      2010-10-24T04:01:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2010371516
      XX BLOCK OF BELL ST
      {u'type': u'Point', u'coordinates': [-122.3478...
      NaN
      NaN
      NaN
      47.612378
      -122.347802
      M1
    
    
      8
      NaT
      100472
      10000370135
      8200.2007
      D
      244
      2010-10-22T23:33:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2010370135
      9XX BLOCK OF 8TH AVE
      {u'type': u'Point', u'coordinates': [-122.3281...
      NaN
      NaN
      NaN
      47.607588
      -122.328145
      D3
    
    
      9
      NaT
      100475
      10000369949
      6700.2010
      D
      244
      2010-10-22T20:38:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2010369949
      8XX BLOCK OF 4TH AVE N
      {u'type': u'Point', u'coordinates': [-122.3488...
      NaN
      NaN
      NaN
      47.626725
      -122.348860
      D1
    
    
      10
      NaT
      1004795
      12000440339
      11200.1020
      F
      244
      2012-12-26T21:55:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012440339
      93XX BLOCK OF 7TH AVE S
      {u'type': u'Point', u'coordinates': [-122.3249...
      NaN
      NaN
      NaN
      47.520477
      -122.324978
      F3
    
    
      11
      2012-12-20 21:56:00
      1004883
      12000434216
      10400.8003
      R
      244
      2012-12-21T01:06:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012434216
      48XX BLOCK OF 24TH AVE S
      {u'type': u'Point', u'coordinates': [-122.3028...
      ROBBERY - IP/JO (INCLUDES STRONG ARM)
      ROBBERY
      ROBBERY
      47.559039
      -122.302866
      R2
    
    
      12
      2012-12-20 23:37:00
      1004884
      12000434256
      7400.3004
      E
      244
      2012-12-20T23:45:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012434256
      17XX BLOCK OF E OLIVE WAY
      {u'type': u'Point', u'coordinates': [-122.3226...
      NOISE - DIST, GENERAL (CONST, RESID, BALL PLAY)
      NOISE DISTURBANCE
      DISTURBANCES
      47.619895
      -122.322640
      E1
    
    
      13
      2012-12-20 23:24:00
      1004896
      12000434198
      10900.2001
      O
      244
      2012-12-20T23:38:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012434198
      62XX BLOCK OF STANLEY AVE S
      {u'type': u'Point', u'coordinates': [-122.3155...
      NOISE - DIST, GENERAL (CONST, RESID, BALL PLAY)
      NOISE DISTURBANCE
      DISTURBANCES
      47.547482
      -122.315509
      O3
    
    
      14
      2012-12-20 22:55:00
      1004903
      12000434254
      4300.1008
      U
      244
      2012-12-20T23:02:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012434254
      57XX BLOCK OF 27TH AVE NE
      {u'type': u'Point', u'coordinates': [-122.2985...
      NOISE - DISTURBANCE (PARTY, ETC)
      DISTURBANCES
      DISTURBANCES
      47.669255
      -122.298512
      U1
    
    
      15
      NaT
      1004929
      12000434129
      3300.3006
      B
      244
      2012-12-20T22:08:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012434129
      14XX BLOCK OF NW 63RD ST
      {u'type': u'Point', u'coordinates': [-122.3748...
      NaN
      NaN
      NaN
      47.674546
      -122.374894
      B2
    
    
      16
      2012-12-20 15:17:00
      1005110
      12000433784
      300.3007
      N
      244
      2012-12-20T15:17:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012433784
      143XX BLOCK OF LENORA PL N
      {u'type': u'Point', u'coordinates': [-122.3434...
      NOISE - DISTURBANCE (PARTY, ETC)
      DISTURBANCES
      DISTURBANCES
      47.732665
      -122.343408
      N1
    
    
      17
      NaT
      100526
      10000370080
      3200.3011
      B
      244
      2010-10-22T22:50:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2010370080
      24XX BLOCK OF NW MARKET ST
      {u'type': u'Point', u'coordinates': [-122.3888...
      NaN
      NaN
      NaN
      47.668674
      -122.388861
      B2
    
    
      18
      NaT
      100727
      10000370358
      4200.6012
      L
      244
      2010-10-23T02:45:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2010370358
      55XX BLOCK OF 29TH AVE NE
      {u'type': u'Point', u'coordinates': [-122.2962...
      NaN
      NaN
      NaN
      47.670347
      -122.296214
      L3
    
    
      19
      NaT
      1005453
      12000440280
      3300.3006
      B
      244
      2012-12-27T00:51:00.000
      NOISE DISTURBANCE
      DISTURBANCES
      DISTURBANCES
      2012440280
      14XX BLOCK OF NW 63RD ST
      {u'type': u'Point', u'coordinates': [-122.3748...
      NaN
      NaN
      NaN
      47.674546
      -122.374894
      B2



In [8]:

    
noise_data_r.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 36501 entries, 0 to 36500
Data columns (total 19 columns):
at_scene_time                  5517 non-null datetime64[ns]
cad_cdw_id                     36501 non-null int64
cad_event_number               36501 non-null int64
census_tract                   36496 non-null object
district_sector                36501 non-null object
event_clearance_code           36501 non-null int64
event_clearance_date           36498 non-null object
event_clearance_description    36501 non-null object
event_clearance_group          36501 non-null object
event_clearance_subgroup       36501 non-null object
general_offense_number         36501 non-null int64
hundred_block_location         36501 non-null object
incident_location              36501 non-null object
initial_type_description       19152 non-null object
initial_type_group             19152 non-null object
initial_type_subgroup          19152 non-null object
latitude                       36501 non-null float64
longitude                      36501 non-null float64
zone_beat                      36501 non-null object
dtypes: datetime64[ns](1), float64(2), int64(4), object(12)
memory usage: 5.6+ MB

Fill missing value: (missing date and time)



In [63]:

    
# fill with last known values
#bfill
#noise_data2 = noise_data_r.copy()
#noise_data2['at_scene_time'].bfill(inplace = True)
#noise_data2.dropna(subset = ['at_scene_time'], axis = 0, inplace = True)
#ffill
#noise_data1 = noise_data_r.copy()
#noise_data1['at_scene_time'].ffill(inplace = True)
#noise_data1.dropna(subset = ['at_scene_time'],axis = 0, inplace = True)
noise_data = noise_data_r.copy()
noise_data['ff_at_scene_time'] = noise_data_r['at_scene_time'].ffill()
noise_data['bf_at_scene_time'] = noise_data_r['at_scene_time'].bfill()



In [64]:

    
noise_data.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 36501 entries, 0 to 36500
Data columns (total 21 columns):
at_scene_time                  5517 non-null datetime64[ns]
cad_cdw_id                     36501 non-null int64
cad_event_number               36501 non-null int64
census_tract                   36496 non-null object
district_sector                36501 non-null object
event_clearance_code           36501 non-null int64
event_clearance_date           36498 non-null object
event_clearance_description    36501 non-null object
event_clearance_group          36501 non-null object
event_clearance_subgroup       36501 non-null object
general_offense_number         36501 non-null int64
hundred_block_location         36501 non-null object
incident_location              36501 non-null object
initial_type_description       19152 non-null object
initial_type_group             19152 non-null object
initial_type_subgroup          19152 non-null object
latitude                       36501 non-null float64
longitude                      36501 non-null float64
zone_beat                      36501 non-null object
ff_at_scene_time               36501 non-null datetime64[ns]
bf_at_scene_time               36501 non-null datetime64[ns]
dtypes: datetime64[ns](3), float64(2), int64(4), object(12)
memory usage: 6.1+ MB

Noise versus seasons and time



In [80]:

    
# form another column about hours
# form another column about date
# form another column about months: spring: 3-5; Summer: 6-8; Autumn: 9-11;Winter: 12-2
def getSeason(d):
    mo = d.month
    if mo>=3 and mo<=5:
        return 'spring'
    elif mo>=6 and mo<=8:
        return 'summer'
    elif mo>=9 and mo<=11:
        return 'autumn'
    else:
        return 'winter'
testrow = noise_data.iloc[1,:]
print getSeason(testrow.at_scene_time), testrow.at_scene_time.hour,testrow.at_scene_time.date()
noise_data['fhour'] = noise_data.apply(lambda row:row['ff_at_scene_time'].hour, axis = 1)
noise_data['fdate'] = noise_data.apply(lambda row:row['ff_at_scene_time'].date(),axis = 1)
noise_data['fseason'] = noise_data.apply(lambda row:getSeason(row['ff_at_scene_time']), axis = 1)
noise_data['fyear'] = noise_data.apply(lambda row:row['ff_at_scene_time'].year, axis = 1)
noise_data['bhour'] = noise_data.apply(lambda row:row['bf_at_scene_time'].hour, axis = 1)
noise_data['bdate'] = noise_data.apply(lambda row:row['bf_at_scene_time'].date(),axis = 1)
noise_data['bseason'] = noise_data.apply(lambda row:getSeason(row['bf_at_scene_time']), axis = 1)
noise_data['byear'] = noise_data.apply(lambda row:row['bf_at_scene_time'].year, axis = 1)
noise_data['hour'] = noise_data.apply(lambda row:row['at_scene_time'].hour, axis = 1)
noise_data['date'] = noise_data.apply(lambda row:row['at_scene_time'].date(),axis = 1)
noise_data['season'] = noise_data.apply(lambda row:getSeason(row['bf_at_scene_time']), axis = 1)
noise_data['year'] = noise_data.apply(lambda row:row['at_scene_time'].year, axis = 1)
print noise_data['year'].is_monotonic









    



winter 20 2012-12-21
False



In [81]:

    
import matplotlib.pyplot as plt
import numpy as np
hour_bin = np.linspace(0,24,25)
f, (ax1,ax2,ax3) = plt.subplots(1,3, sharey=True,figsize = (15,5))
ax1.hist(noise_data['hour'].dropna(),bins = hour_bin)
ax1.set_xticks([0,3,7,11,15,19,23])
ax1.set_xlim(0,24)
ax1.set_xlabel('Hours')
ax1.set_ylabel('Numbers of noise complaints')
ax1.set_title('No_fill')
ax2.hist(noise_data['fhour'],bins = hour_bin)
ax2.set_xticks([0,3,7,11,15,19,23])
ax2.set_xlim(0,24)
ax2.set_xlabel('Hours')
ax2.set_title('Forward_fill')
f.text(
    0.95, 0.05,
    'Data source: 911 Seattle City police report\n$\copyright$ ZhouYu',
    ha='right', va='bottom',
    size=10,
    color='gray',alpha = 0.5)
#plt.subplots(122)
ax3.hist(noise_data['bhour'],bins = hour_bin)
ax3.set_xticks([0,3,7,11,15,19,23])
ax3.set_xlim(0,24)
ax3.set_xlabel('Hours')
#ax2.set_ylabel('Numbers of noise complaints')
ax3.set_title('Backward_fill')
plt.show()



In [74]:

    
ini_datetime = noise_data['at_scene_time'].min()
end_datetime = noise_data['at_scene_time'].max()
print noise_data['at_scene_time'].min(), noise_data['at_scene_time'].max()
st_datetime = pd.to_datetime('2011/1/1')
ed_datetime = pd.to_datetime('2016/12/31 23:59:59')
print st_datetime, ed_datetime
season_analysis_ff = noise_data.ix[(noise_data['ff_at_scene_time']>=st_datetime)&(noise_data['ff_at_scene_time']<=ed_datetime)]
seasonal_noise_ff = (season_analysis_ff['fseason'].value_counts()).to_dict()
season_analysis_bf = noise_data.ix[(noise_data['bf_at_scene_time']>=st_datetime)&(noise_data['bf_at_scene_time']<=ed_datetime)]
seasonal_noise_bf = (season_analysis_bf['bseason'].value_counts()).to_dict()
noise_s =noise_data.dropna(subset = ['at_scene_time'],axis = 0)
season_analysis = noise_s.ix[(noise_data['at_scene_time']>=st_datetime)&(noise_data['at_scene_time']<=ed_datetime)]
seasonal_noise = (season_analysis['season'].value_counts()).to_dict()
print seasonal_noise_ff
print seasonal_noise_bf
print seasonal_noise
#print noise_data['season'].value_counts()
labels = seasonal_noise.keys()
freq = np.array(seasonal_noise.values())
freq= freq/float(np.sum(freq))
labels_ff = seasonal_noise_ff.keys()
freq_ff = np.array(seasonal_noise_ff.values())
freq_ff = freq_ff/float(np.sum(freq_ff))
labels_bf = seasonal_noise_bf.keys()
freq_bf = np.array(seasonal_noise_bf.values())
freq_bf = freq_bf/float(np.sum(freq_bf))
explode = (0,0,0.1,0)
fig1,(ax1,ax2,ax3) = plt.subplots(1,3,figsize = (15,5))
ax1.pie(freq,explode = explode,labels = labels,autopct = '%1.1f%%',shadow = True, startangle = 90)
ax1.axis('equal')
ax1.set_title('Original')
ax2.pie(freq_ff,explode = explode,labels = labels,autopct = '%1.1f%%',shadow = True, startangle = 90)
ax2.axis('equal')
ax2.set_title('Forward Fill')
fig1.text(
    0.95, 0.05,
    'Data source: 911 Seattle City police report\n$\copyright$ ZhouYu',
    ha='right', va='bottom',
    size=10,
    color='gray',alpha = 0.5)
ax3.pie(freq_bf,explode = explode,labels = labels,autopct = '%1.1f%%',shadow = True, startangle = 90)
ax3.axis('equal')
ax3.set_title('Backward Fill')
plt.show()









    



2010-12-31 23:32:00 2017-03-05 15:52:52
2011-01-01 00:00:00 2016-12-31 23:59:59
{'autumn': 5918, 'summer': 7514, 'winter': 17833, 'spring': 4521}
{'autumn': 5875, 'summer': 20751, 'winter': 4601, 'spring': 4523}
{'autumn': 1584, 'summer': 1672, 'winter': 1002, 'spring': 1071}



In [163]:

    
yearly_noise= (season_analysis['year'].value_counts()).to_dict()
years = sorted(yearly_noise.keys())
noise_byyear = [yearly_noise[x] for x in years]
yearly_noise_ff = (season_analysis_ff['fyear'].value_counts()).to_dict()
years_ff = sorted(yearly_noise_ff.keys())
noise_byyear_ff = [yearly_noise_ff[x] for x in years_ff]
yearly_noise_bf = (season_analysis_bf['byear'].value_counts()).to_dict()
years_bf = sorted(yearly_noise_bf.keys())
noise_byyear_bf = [yearly_noise_bf[x] for x in years_bf]
print years
fig, (ax1,ax2,ax3) = plt.subplots(1,3,sharey = True,figsize = (15,5))
ax1.plot(years,noise_byyear,'-o')
ax1.set_xticks(years)
ax1.set_xticklabels(map(str,years),rotation = 45)
ax1.set_title('Original')
ax1.set_ylabel('number of noise complaints')
ax2.plot(years_ff,noise_byyear_ff,'-o')
ax2.set_xticks(years_ff)
ax2.set_xticklabels(map(str,years_ff),rotation = 45)
ax2.set_title('Forward Fill')
fig.text(
    0.95, 0.05,
    'Data source: 911 Seattle City police report\n$\copyright$ ZhouYu',
    ha='right', va='bottom',
    size=10,
    color='gray',alpha = 0.5)
ax3.plot(years_bf,noise_byyear_bf,'-o')
ax3.set_xticks(years_bf)
ax3.set_xticklabels(map(str,years_bf),rotation = 45)
ax3.set_title('Backward Fill')
plt.show()









    



[2011.0, 2012.0, 2013.0, 2014.0, 2015.0, 2016.0]

Add*: It seems that most complains happen in 2015

1. Use event_clearance_date as an approximation of event date time



In [122]:

    
clear_noise = season_analysis.dropna(subset = ['event_clearance_date'],axis = 0)
clear_noise['event_clearance_datetime'] = pd.to_datetime(clear_noise['event_clearance_date'])
clear_noise['process_time'] = clear_noise['event_clearance_datetime']-clear_noise['at_scene_time']
clear_noise['process_time'] = clear_noise.apply(lambda row:row['process_time'].seconds/60, axis = 1)



In [137]:

    
plt.hist(clear_noise['process_time'],bins = 'auto')
plt.xlabel('process time/ minutes')
plt.ylabel('Number of noise complaints')
plt.title('process time')
plt.text(
    0.95, 0.05,
    'Data source: 911 Seattle City police report\n$\copyright$ ZhouYu',
    ha='left', va='top',
    size=10,
    color='gray',alpha = 0.5)
plt.show()

It seems that use clerance date would be good approximation!



In [155]:

    
clear_noise_s = (noise_data_r.dropna(subset = ['event_clearance_date'],axis = 0)).copy()
clear_noise_s['event_clearance_datetime'] = pd.to_datetime(clear_noise_s['event_clearance_date'])
print a.shape
print clear_noise_s.shape
#clear_noise_s['event_clearance_datetime'] #=









    



(36498,)
(36498, 20)



In [156]:

    
clear_noise_s['hour'] = clear_noise_s.apply(lambda row:row['event_clearance_datetime'].hour, axis = 1)
clear_noise_s['date'] = clear_noise_s.apply(lambda row:row['event_clearance_datetime'].date(),axis = 1)
clear_noise_s['season'] = clear_noise_s.apply(lambda row:getSeason(row['event_clearance_datetime']), axis = 1)
clear_noise_s['year'] = clear_noise_s.apply(lambda row:row['event_clearance_datetime'].year, axis = 1)
clc_season_analysis = clear_noise_s.ix[(clear_noise_s['event_clearance_datetime']>=st_datetime)&(clear_noise_s['event_clearance_datetime']<=ed_datetime)]



In [160]:

    
seasonal_noise_c = (clc_season_analysis['season'].value_counts()).to_dict()
labels = seasonal_noise_c.keys()
freq = np.array(seasonal_noise_c.values())
freq= freq/float(np.sum(freq))
fig, ax = plt.subplots()
ax.pie(freq,explode = explode,labels = labels,autopct = '%1.1f%%',shadow = True, startangle = 90)
ax.axis('equal')
ax.set_title('Clearance date')
fig.text(
    0.95, 0.05,
    'Data source: 911 Seattle City police report\n$\copyright$ ZhouYu',
    ha='right', va='bottom',
    size=10,
    color='gray',alpha = 0.5)
plt.show()



In [164]:

    
yearly_noise= (clc_season_analysis['year'].value_counts()).to_dict()
years = sorted(yearly_noise.keys())
noise_byyear = [yearly_noise[x] for x in years]
fig, ax1= plt.subplots()
ax1.plot(years,noise_byyear,'-o')
ax1.set_xticks(years)
ax1.set_xticklabels(map(str,years),rotation = 45)
fig.text(
    0.95, 0.05,
    'Data source: 911 Seattle City police report\n$\copyright$ ZhouYu',
    ha='right', va='bottom',
    size=10,
    color='gray',alpha = 0.5)
ax1.set_title('Clearance date')
ax1.set_ylabel('Numbers of noise complaints')
plt.show()

Noise versus locations



In [ ]:



In [102]:

    
testrow









    Out[102]:





at_scene_time                                                2012-12-21 20:53:00
cad_cdw_id                                                               1004323
cad_event_number                                                     12000435440
census_tract                                                          10900.2001
district_sector                                                                O
event_clearance_code                                                         244
event_clearance_date                                     2012-12-21T21:17:00.000
event_clearance_description                                    NOISE DISTURBANCE
event_clearance_group                                               DISTURBANCES
event_clearance_subgroup                                            DISTURBANCES
general_offense_number                                                2012435440
hundred_block_location                               62XX BLOCK OF STANLEY AVE S
incident_location              {u'type': u'Point', u'coordinates': [-122.3155...
initial_type_description                        NOISE - DISTURBANCE (PARTY, ETC)
initial_type_group                                                  DISTURBANCES
initial_type_subgroup                                               DISTURBANCES
latitude                                                                 47.5475
longitude                                                               -122.316
zone_beat                                                                     O3
ff_at_scene_time                                             2012-12-21 20:53:00
bf_at_scene_time                                             2012-12-21 20:53:00
fhour                                                                         20
fdate                                                                 2012-12-21
fseason                                                                   winter
fyear                                                                       2012
bhour                                                                         20
bdate                                                                 2012-12-21
bseason                                                                   winter
byear                                                                       2012
hour                                                                          20
date                                                                  2012-12-21
season                                                                    winter
year                                                                        2012
Name: 1, dtype: object



In [11]:

    
from mpl_toolkits.basemap import Basemap
coords_lon = noise_data['longitude'].min(), noise_data['longitude'].max()
coords_lat = noise_data['latitude'].min(), noise_data['latitude'].max()
w = coords_lon[1]-coords_lon[0]
h = coords_lat[1]-coords_lat[0]
m = Basemap(llcrnrlon = coords_lon[0]-0.01*w,llcrnrlat=coords_lat[0]-0.01*h,
            urcrnrlon=coords_lon[1]+0.01*w,urcrnrlat=coords_lat[1]+0.01*h,
        projection='tmerc',ellps = 'WGS84',lat_ts =0,
            lat_0 = np.mean(coords_lat),lon_0=np.mean(coords_lon),resolution = 'i',
           suppress_ticks = True)
m.readshapefile('neighborhoods/WGS84/Neighborhoods','seattle',color = 'none',zorder = 2,drawbounds = True)
# load the shapefile, use the name 'states'
#map.readshapefile('st99_d00', name='states', drawbounds=True)
m.drawcoastlines()
#m.drawstates()
#m.drawrivers()
#m.plot(41,-87, marker = 'o')









    Out[11]:





<matplotlib.collections.LineCollection at 0x1166a1f90>



In [12]:

    
# set up a map dataframe
from lxml import etree
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.collections import PatchCollection
from mpl_toolkits.basemap import Basemap
from shapely.geometry import Point, Polygon, MultiPoint, MultiPolygon
from shapely.prepared import prep
from pysal.esda.mapclassify import Natural_Breaks as nb
from descartes import PolygonPatch
import fiona
from itertools import chain

df_map = pd.DataFrame({
    'poly': [Polygon(xy) for xy in m.seattle],
    'hood_name': [hood['S_HOOD'] for hood in m.seattle_info]})
df_map['area_m'] = df_map['poly'].map(lambda x: x.area)
df_map['area_km'] = df_map['area_m'] / 100000

# Create Point objects in map coordinates from dataframe lon and lat values
map_points = pd.Series(
    [Point(m(mapped_x, mapped_y)) for mapped_x, mapped_y in zip(noise_data['longitude'], noise_data['latitude'])])
incident_points = MultiPoint(list(map_points.values))
hood_polygon = prep(MultiPolygon(list(df_map['poly'].values)))
# calculate points that fall within the London boundary
ldn_points = filter(hood_polygon.contains, incident_points)



In [15]:

    
from matplotlib.colors import LinearSegmentedColormap
def colorbar_index(ncolors, cmap, labels=None, **kwargs):
    """
    This is a convenience function to stop you making off-by-one errors
    Takes a standard colour ramp, and discretizes it,
    then draws a colour bar with correctly aligned labels
    """
    cmap = cmap_discretize(cmap, ncolors)
    mappable = cm.ScalarMappable(cmap=cmap)
    mappable.set_array([])
    mappable.set_clim(-0.5, ncolors+0.5)
    colorbar = plt.colorbar(mappable, **kwargs)
    colorbar.set_ticks(np.linspace(0, ncolors, ncolors))
    colorbar.set_ticklabels(range(ncolors))
    if labels:
        colorbar.set_ticklabels(labels)
    return colorbar

def cmap_discretize(cmap, N):
    """
    Return a discrete colormap from the continuous colormap cmap.

        cmap: colormap instance, eg. cm.jet. 
        N: number of colors.

    Example
        x = resize(arange(100), (5,100))
        djet = cmap_discretize(cm.jet, 5)
        imshow(x, cmap=djet)

    """
    if type(cmap) == str:
        cmap = get_cmap(cmap)
    colors_i = np.concatenate((np.linspace(0, 1., N), (0., 0., 0., 0.)))
    colors_rgba = cmap(colors_i)
    indices = np.linspace(0, 1., N + 1)
    cdict = {}
    for ki, key in enumerate(('red', 'green', 'blue')):
        cdict[key] = [(indices[i], colors_rgba[i - 1, ki], colors_rgba[i, ki]) for i in xrange(N + 1)]
    return LinearSegmentedColormap(cmap.name + "_%d" % N, cdict, 1024)



In [16]:

    
# draw ward patches from polygons
df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch(
    x,
    fc='#555555',
    ec='#787878', lw=.25, alpha=.9,
    zorder=4))

plt.clf()
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='w', frame_on=False)

# we don't need to pass points to m() because we calculated using map_points and shapefile polygons
dev = m.scatter(
    [geom.x for geom in ldn_points],
    [geom.y for geom in ldn_points],
    5, marker='o', lw=.25,
    facecolor='#33ccff', edgecolor='w',
    alpha=0.9, antialiased=True,
    label='Noise Campliant Location', zorder=3)
# plot boroughs by adding the PatchCollection to the axes instance
ax.add_collection(PatchCollection(df_map['patches'].values, match_original=True))
# copyright and source data info
smallprint = ax.text(
    1.03, 0,
    'Total points: %s\n data from Seattle city 911 reports\n$\copyright$ Zhou' % len(ldn_points),
    ha='right', va='bottom',
    size=4,
    color='#555555',
    transform=ax.transAxes)

# Draw a map scale
m.drawmapscale(
    coords_lon[0] + 0.08, coords_lat[0] + 0.015,
    coords_lon[0], coords_lat[0],
    10.,
    barstyle='fancy', labelstyle='simple',
    fillcolor1='w', fillcolor2='#555555',
    fontcolor='#555555',
    zorder=5)
plt.title("Noise complaint locations, Seattle")
plt.tight_layout()
# this will set the image width to 722px at 100dpi
fig.set_size_inches(7.22, 5.25)
#plt.savefig('data/london_plaques.png', dpi=100, alpha=True)
plt.show()









    





<matplotlib.figure.Figure at 0x12fb08350>



In [17]:

    
df_map['count'] = df_map['poly'].map(lambda x: int(len(filter(prep(x).contains, ldn_points))))
df_map['density_m'] = df_map['count'] / df_map['area_m']
df_map['density_km'] = df_map['count'] / df_map['area_km']
# it's easier to work with NaN values when classifying
df_map.replace(to_replace={'density_m': {0: np.nan}, 'density_km': {0: np.nan}}, inplace=True)



In [18]:

    
breaks = nb(
    df_map[df_map['density_km'].notnull()].density_km.values,
    initial=300,
    k=5)
# the notnull method lets us match indices when joining
jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map['density_km'].notnull()].index)
df_map = df_map.join(jb)
df_map.jenks_bins.fillna(-1, inplace=True)



In [19]:

    
#jenks_labels = ["<= %0.1f/km incidents $^2$(%s neighborhooods)" % (b, c) for b, c in zip(
#    breaks.bins, breaks.counts)]
jenks_labels = ["<= %0.1f incidents/km$^2$"%b for b in breaks.bins]
jenks_labels.insert(0, 'No complaint (%s neighborhoods)' % len(df_map[df_map['density_km'].isnull()]))



In [20]:

    
#from matplotlib import *
plt.clf()
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='w', frame_on=False)

# use a blue colour ramp - we'll be converting it to a map using cmap()
cmap = plt.get_cmap('Reds')
# draw wards with grey outlines
df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch(x, ec='#555555', lw=.2, alpha=1., zorder=4))
pc = PatchCollection(df_map['patches'], match_original=True)
# impose our colour map onto the patch collection
norm = Normalize()
pc.set_facecolor(cmap(norm(df_map['jenks_bins'].values)))
ax.add_collection(pc)

# Add a colour bar
cb = colorbar_index(ncolors=len(jenks_labels), cmap=cmap, shrink=0.5, labels=jenks_labels)
#cb.ax.tick_params(labelsize=6)

# Show highest densities, in descending order
#highest = '\n'.join(
#    value[1] for _, value in df_map[(df_map['jenks_bins'] == 4)][:10].sort().iterrows())
#highest = 'Most Dense Complains:\n\n' + highest
# Subtraction is necessary for precise y coordinate alignment
#details = cb.ax.text(
#   -1., 0 - 0.007,
#    highest,
#    ha='right', va='bottom',
#    size=5,
#    color='#555555')

# Bin method, copyright and source data info
smallprint = ax.text(
    1.03, 0,
    '911 Seattle City police report\n$\copyright$ Zhou',
    ha='right', va='bottom',
    size=4,
    color='#555555',
    transform=ax.transAxes)

# Draw a map scale
m.drawmapscale(
    coords_lon[0] + 0.08, coords_lat[0] + 0.015,
    coords_lon[0], coords_lat[0],
    10.,
    barstyle='fancy', labelstyle='simple',
    fillcolor1='w', fillcolor2='#555555',
    fontcolor='#555555',
    zorder=5)
# this will set the image width to 722px at 100dpi
plt.title('Density of Noise Complaints')
plt.tight_layout()
fig.set_size_inches(7.22, 5.25)
#plt.savefig('data/london_plaques.png', dpi=100, alpha=True)
plt.show()









    





<matplotlib.figure.Figure at 0x12fb26050>



In [20]:

    
np.sum(df_map['count'])









    Out[20]:





36295



In [ ]:

	at_scene_time	cad_cdw_id	cad_event_number	census_tract	district_sector	event_clearance_code	event_clearance_date	event_clearance_description	event_clearance_group	event_clearance_subgroup	general_offense_number	hundred_block_location	incident_location	initial_type_description	initial_type_group	initial_type_subgroup	latitude	longitude	zone_beat
0	2012-12-21 21:21:00	1004299	12000435444	10600.4000	W	244	2012-12-21T21:44:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012435444	64XX BLOCK OF CALIFORNIA AVE SW	{u'type': u'Point', u'coordinates': [-122.3872...	FIREWORKS - NUISANCE (NO HAZARD)	MISCELLANEOUS MISDEMEANORS	MISCELLANEOUS MISDEMEANORS	47.545680	-122.387206	W3
1	2012-12-21 20:53:00	1004323	12000435440	10900.2001	O	244	2012-12-21T21:17:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012435440	62XX BLOCK OF STANLEY AVE S	{u'type': u'Point', u'coordinates': [-122.3155...	NOISE - DISTURBANCE (PARTY, ETC)	DISTURBANCES	DISTURBANCES	47.547482	-122.315509	O3
2	2012-12-21 20:10:00	1004339	12000435342	6600.3007	D	244	2012-12-21T20:38:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012435342	YALE AV E / E NEWTON ST	{u'type': u'Point', u'coordinates': [-122.3271...	NUISANCE - MISCHIEF	MISCELLANEOUS MISDEMEANORS	MISCELLANEOUS MISDEMEANORS	47.636764	-122.327118	D2
3	2012-12-21 19:55:00	1004343	12000435364	10900.2072	O	244	2012-12-21T19:59:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012435364	65XX BLOCK OF 5TH PL S	{u'type': u'Point', u'coordinates': [-122.3272...	HAZ - POTENTIAL THRT TO PHYS SAFETY (NO HAZMAT)	HAZARDS	HAZARDS	47.544907	-122.327272	O3
4	2012-12-21 17:33:00	1004457	12000435108	9600.2013	W	244	2012-12-21T19:47:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012435108	42XX BLOCK OF SW ADMIRAL WAY	{u'type': u'Point', u'coordinates': [-122.3858...	NOISE - DISTURBANCE (PARTY, ETC)	DISTURBANCES	DISTURBANCES	47.581191	-122.385859	W1
5	NaT	1004596	12000440340	7800.3012	G	244	2012-12-26T21:22:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012440340	3XX BLOCK OF 32ND AVE	{u'type': u'Point', u'coordinates': [-122.2915...	NaN	NaN	NaN	47.604835	-122.291588	G3
6	NaT	100466	10000370151	8600.2007	E	244	2010-10-22T23:43:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2010370151	4XX BLOCK OF 11TH AVE	{u'type': u'Point', u'coordinates': [-122.3181...	NaN	NaN	NaN	47.605702	-122.318109	E3
7	NaT	101506	10000371516	8002.2008	M	244	2010-10-24T04:01:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2010371516	XX BLOCK OF BELL ST	{u'type': u'Point', u'coordinates': [-122.3478...	NaN	NaN	NaN	47.612378	-122.347802	M1
8	NaT	100472	10000370135	8200.2007	D	244	2010-10-22T23:33:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2010370135	9XX BLOCK OF 8TH AVE	{u'type': u'Point', u'coordinates': [-122.3281...	NaN	NaN	NaN	47.607588	-122.328145	D3
9	NaT	100475	10000369949	6700.2010	D	244	2010-10-22T20:38:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2010369949	8XX BLOCK OF 4TH AVE N	{u'type': u'Point', u'coordinates': [-122.3488...	NaN	NaN	NaN	47.626725	-122.348860	D1
10	NaT	1004795	12000440339	11200.1020	F	244	2012-12-26T21:55:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012440339	93XX BLOCK OF 7TH AVE S	{u'type': u'Point', u'coordinates': [-122.3249...	NaN	NaN	NaN	47.520477	-122.324978	F3
11	2012-12-20 21:56:00	1004883	12000434216	10400.8003	R	244	2012-12-21T01:06:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012434216	48XX BLOCK OF 24TH AVE S	{u'type': u'Point', u'coordinates': [-122.3028...	ROBBERY - IP/JO (INCLUDES STRONG ARM)	ROBBERY	ROBBERY	47.559039	-122.302866	R2
12	2012-12-20 23:37:00	1004884	12000434256	7400.3004	E	244	2012-12-20T23:45:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012434256	17XX BLOCK OF E OLIVE WAY	{u'type': u'Point', u'coordinates': [-122.3226...	NOISE - DIST, GENERAL (CONST, RESID, BALL PLAY)	NOISE DISTURBANCE	DISTURBANCES	47.619895	-122.322640	E1
13	2012-12-20 23:24:00	1004896	12000434198	10900.2001	O	244	2012-12-20T23:38:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012434198	62XX BLOCK OF STANLEY AVE S	{u'type': u'Point', u'coordinates': [-122.3155...	NOISE - DIST, GENERAL (CONST, RESID, BALL PLAY)	NOISE DISTURBANCE	DISTURBANCES	47.547482	-122.315509	O3
14	2012-12-20 22:55:00	1004903	12000434254	4300.1008	U	244	2012-12-20T23:02:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012434254	57XX BLOCK OF 27TH AVE NE	{u'type': u'Point', u'coordinates': [-122.2985...	NOISE - DISTURBANCE (PARTY, ETC)	DISTURBANCES	DISTURBANCES	47.669255	-122.298512	U1
15	NaT	1004929	12000434129	3300.3006	B	244	2012-12-20T22:08:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012434129	14XX BLOCK OF NW 63RD ST	{u'type': u'Point', u'coordinates': [-122.3748...	NaN	NaN	NaN	47.674546	-122.374894	B2
16	2012-12-20 15:17:00	1005110	12000433784	300.3007	N	244	2012-12-20T15:17:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012433784	143XX BLOCK OF LENORA PL N	{u'type': u'Point', u'coordinates': [-122.3434...	NOISE - DISTURBANCE (PARTY, ETC)	DISTURBANCES	DISTURBANCES	47.732665	-122.343408	N1
17	NaT	100526	10000370080	3200.3011	B	244	2010-10-22T22:50:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2010370080	24XX BLOCK OF NW MARKET ST	{u'type': u'Point', u'coordinates': [-122.3888...	NaN	NaN	NaN	47.668674	-122.388861	B2
18	NaT	100727	10000370358	4200.6012	L	244	2010-10-23T02:45:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2010370358	55XX BLOCK OF 29TH AVE NE	{u'type': u'Point', u'coordinates': [-122.2962...	NaN	NaN	NaN	47.670347	-122.296214	L3
19	NaT	1005453	12000440280	3300.3006	B	244	2012-12-27T00:51:00.000	NOISE DISTURBANCE	DISTURBANCES	DISTURBANCES	2012440280	14XX BLOCK OF NW 63RD ST	{u'type': u'Point', u'coordinates': [-122.3748...	NaN	NaN	NaN	47.674546	-122.374894	B2