In [9]:
import pandas as pd
%matplotlib inline
In [2]:
communities = pd.read_csv('community_crime_rates_socio.csv')
In [57]:
communities
Out[57]:
Unnamed: 0
Crime Count
population
Crime Rate
Community Area Number
COMMUNITY AREA NAME
PERCENT OF HOUSING CROWDED
PERCENT HOUSEHOLDS BELOW POVERTY
PERCENT AGED 16+ UNEMPLOYED
PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA
PERCENT AGED UNDER 18 OR OVER 64
PER CAPITA INCOME
HARDSHIP INDEX
0
0
3525
54991
64.101398
1
Rogers Park
7.7
23.6
8.7
18.2
27.5
23939
39
1
1
3063
71942
42.575964
2
West Ridge
7.8
17.2
8.8
20.8
38.5
23040
46
2
2
3592
56362
63.730883
3
Uptown
3.8
24.0
8.9
11.8
22.2
35787
20
3
3
1751
39493
44.336971
4
Lincoln Square
3.4
10.9
8.2
13.4
25.5
37524
17
4
4
1379
31867
43.273606
5
North Center
0.3
7.5
5.2
4.5
26.2
57123
6
5
5
5511
94368
58.399034
6
Lake View
1.1
11.4
4.7
2.6
17.0
60058
5
6
6
3729
64116
58.160210
7
Lincoln Park
0.8
12.3
5.1
3.6
21.5
71551
2
7
7
8944
80484
111.127678
8
Near North Side
1.9
12.9
7.0
2.5
22.6
88669
1
8
8
254
11187
22.704925
9
Edison Park
1.1
3.3
6.5
7.4
35.3
40959
8
9
9
1259
37023
34.005888
10
Norwood Park
2.0
5.4
9.0
11.5
39.5
32875
21
10
10
1144
25448
44.954417
11
Jefferson Park
2.7
8.6
12.4
13.4
35.5
27751
25
11
11
444
18508
23.989626
12
Forest Glen
1.1
7.5
6.8
4.9
40.5
44164
11
12
12
830
17931
46.288551
13
North Park
3.9
13.2
9.9
14.4
39.0
26576
33
13
13
2508
51542
48.659346
14
Albany Park
11.3
19.2
10.0
32.9
32.0
21323
53
14
14
3399
64124
53.006675
15
Portage Park
4.1
11.6
12.6
19.3
34.0
24336
35
15
15
2967
53359
55.604490
16
Irving Park
6.3
13.1
10.0
22.4
31.6
27249
34
16
16
1636
41932
39.015549
17
Dunning
5.2
10.6
10.0
16.2
33.6
26282
28
17
17
575
13426
42.827350
18
Montclaire
8.1
15.3
13.8
23.5
38.6
22014
50
18
18
4775
78743
60.640311
19
Belmont Cragin
10.8
18.7
14.6
37.3
37.3
15461
70
19
19
1707
25010
68.252699
20
Hermosa
6.9
20.5
13.1
41.6
36.4
15089
71
20
20
2230
39262
56.797922
21
Avondale
6.0
15.3
9.2
24.7
31.0
20039
42
21
21
4747
73595
64.501665
22
Logan Square
3.2
16.8
8.2
14.8
26.2
31908
23
22
22
8021
56323
142.410738
23
Humboldt park
14.8
33.9
17.3
35.4
38.0
13781
85
23
23
6975
81432
85.654288
24
West Town
2.3
14.7
6.6
12.9
21.7
43198
10
24
24
17050
98514
173.071848
25
Austin
6.3
28.6
22.6
24.4
37.9
15957
73
25
25
5804
18001
322.426532
26
West Garfield Park
9.4
41.7
25.8
24.5
43.6
10934
92
26
26
5279
20567
256.673312
27
East Garfield Park
8.2
42.4
19.6
21.3
43.2
12961
83
27
27
7804
54881
142.198575
28
Near West Side
3.8
20.6
10.7
9.6
22.2
44689
15
28
28
8053
35912
224.242593
29
North Lawndale
7.4
43.1
21.2
27.6
42.7
12034
87
29
29
4404
79288
55.544345
30
South Lawndale
15.2
30.7
15.8
54.8
33.8
10402
96
...
...
...
...
...
...
...
...
...
...
...
...
...
...
47
47
1461
13812
105.777585
48
Calumet Heights
2.1
11.5
20.0
11.0
44.0
28887
38
48
48
6555
44619
146.910509
49
Roseland
2.5
19.8
20.3
16.9
41.2
17949
52
49
49
1037
7325
141.569966
50
Pullman
1.5
21.6
22.8
13.1
38.6
20588
51
50
50
1904
15109
126.017605
51
South Deering
4.0
29.2
16.3
21.0
39.5
14685
65
51
51
1304
23042
56.592310
52
East Side
6.8
19.2
12.1
31.9
42.8
17104
64
52
52
3957
29651
133.452497
53
West Pullman
3.3
25.9
19.4
20.5
42.1
16563
62
53
53
1175
6482
181.271213
54
Riverdale
5.8
56.5
34.6
27.5
51.5
8201
98
54
54
507
9426
53.787397
55
Hegewisch
3.3
17.1
9.6
19.2
42.9
22677
44
55
55
1800
34513
52.154261
56
Garfield Ridge
2.6
8.8
11.3
19.3
38.1
26353
32
56
56
986
13393
73.620548
57
Archer Heights
8.5
14.1
16.5
35.9
39.2
16134
67
57
57
2715
45368
59.843943
58
Brighton Park
14.4
23.6
13.9
45.1
39.3
13089
84
58
58
1066
15612
68.280810
59
McKinley Park
7.2
18.7
13.4
32.9
35.6
16954
61
59
59
1590
31977
49.723239
60
Bridgeport
4.5
18.9
13.7
22.2
31.3
22694
43
60
60
4848
44377
109.245780
61
New City
11.9
29.0
23.0
41.5
38.9
12765
91
61
61
1033
18109
57.043459
62
West Elsdon
11.1
15.6
16.7
37.0
37.7
15754
69
62
62
2335
39894
58.530105
63
Gage Park
15.8
23.4
18.2
51.5
38.8
12171
93
63
63
926
23139
40.019016
64
Clearing
2.7
8.9
9.5
18.8
37.6
25113
29
64
64
2029
33355
60.830460
65
West Lawn
5.8
14.9
9.6
33.6
39.6
16907
56
65
65
6095
55628
109.567124
66
Chicago Lawn
7.6
27.9
17.1
31.2
40.6
13231
80
66
66
7213
35505
203.154485
67
West Englewood
4.8
34.4
35.9
26.3
40.7
11317
89
67
67
7006
30654
228.550923
68
Englewood
3.8
46.6
28.0
28.5
42.5
11888
94
68
68
6669
32602
204.558003
69
Greater Grand Crossing
3.6
29.6
23.0
16.5
41.0
17285
66
69
69
2283
41081
55.573136
70
Ashburn
4.0
10.4
11.7
17.7
36.9
23482
37
70
70
7748
48743
158.956158
71
Auburn Gresham
4.0
27.6
28.3
18.5
41.9
15528
74
71
71
977
20034
48.767096
72
Beverly
0.9
5.1
8.0
3.7
40.5
39523
12
72
72
3114
26493
117.540482
73
Washington Height
1.1
16.9
20.8
13.7
42.6
19713
48
73
73
609
19093
31.896507
74
Mount Greenwood
1.0
3.4
8.7
4.3
36.8
34381
16
74
74
2055
22544
91.155075
75
Morgan Park
0.8
13.2
15.0
10.8
40.3
27149
30
75
75
1621
12756
127.077454
76
O'Hare
3.6
15.4
7.1
10.9
30.3
25828
24
76
76
2213
56521
39.153589
77
Edgewater
4.1
18.2
9.2
9.7
23.8
33385
19
77 rows × 13 columns
In [7]:
communities.columns
Out[7]:
Index([u'Unnamed: 0', u'Crime Count', u'population', u'Crime Rate',
u'Community Area Number', u'COMMUNITY AREA NAME',
u'PERCENT OF HOUSING CROWDED', u'PERCENT HOUSEHOLDS BELOW POVERTY',
u'PERCENT AGED 16+ UNEMPLOYED',
u'PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA',
u'PERCENT AGED UNDER 18 OR OVER 64', u'PER CAPITA INCOME ',
u'HARDSHIP INDEX'],
dtype='object')
In [59]:
communities.drop(['Unnamed: 0'], axis=1, inplace=True)
In [75]:
communities.plot(kind='scatter',
x='PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA',
y='Crime Rate')
Out[75]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc5f3e4fc10>
In [19]:
import statsmodels.api as sm
In [60]:
X = communities.drop(['Crime Rate', 'COMMUNITY AREA NAME', 'Crime Count'], axis=1)
y = communities['Crime Rate']
In [61]:
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
results.summary()
Out[61]:
OLS Regression Results
Dep. Variable: Crime Rate R-squared: 0.743
Model: OLS Adj. R-squared: 0.708
Method: Least Squares F-statistic: 21.47
Date: Thu, 25 Feb 2016 Prob (F-statistic): 1.55e-16
Time: 12:31:14 Log-Likelihood: -381.61
No. Observations: 77 AIC: 783.2
Df Residuals: 67 BIC: 806.7
Df Model: 9
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
const 83.6523 63.630 1.315 0.193 -43.354 210.659
population -0.0001 0.000 -0.509 0.612 -0.001 0.000
Community Area Number 0.1615 0.230 0.701 0.486 -0.298 0.621
PERCENT OF HOUSING CROWDED 2.1276 2.839 0.750 0.456 -3.538 7.794
PERCENT HOUSEHOLDS BELOW POVERTY 1.1320 1.107 1.023 0.310 -1.077 3.341
PERCENT AGED 16+ UNEMPLOYED 1.6460 1.574 1.046 0.299 -1.496 4.788
PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA -5.5273 1.334 -4.142 0.000 -8.191 -2.864
PERCENT AGED UNDER 18 OR OVER 64 -2.8399 1.327 -2.141 0.036 -5.488 -0.192
PER CAPITA INCOME 0.0009 0.001 1.474 0.145 -0.000 0.002
HARDSHIP INDEX 3.0019 1.068 2.810 0.006 0.870 5.134
Omnibus: 12.939 Durbin-Watson: 1.788
Prob(Omnibus): 0.002 Jarque-Bera (JB): 30.224
Skew: 0.435 Prob(JB): 2.73e-07
Kurtosis: 5.944 Cond. No. 7.38e+05
In [76]:
y - results.fittedvalues
Out[76]:
0 -30.548340
1 -17.860678
2 -27.746741
3 -6.912807
4 -25.744970
5 -44.667030
6 -33.001879
7 0.118674
8 2.004039
9 10.857009
10 1.313185
11 -12.518830
12 -9.725766
13 1.054443
14 8.231496
15 17.906104
16 -4.780052
17 -29.994605
18 3.323975
19 31.423562
20 9.583057
21 5.131727
22 -0.489824
23 39.306076
24 23.368762
25 100.219960
26 52.933863
27 42.121330
28 42.588384
29 -15.052890
...
47 11.021966
48 37.655931
49 -4.262890
50 -12.664496
51 1.419393
52 8.277694
53 -46.961619
54 -4.565217
55 24.415173
56 15.127637
57 -11.015180
58 2.858249
59 -31.368946
60 -19.744856
61 -13.071495
62 -15.867696
63 18.173843
64 40.443706
65 -18.244945
66 -11.719264
67 15.979260
68 28.511758
69 -3.709392
70 -31.535948
71 -1.481777
72 4.299939
73 -32.743896
74 16.173232
75 46.651569
76 -60.695775
dtype: float64
In [ ]:
Content source: computationforpolicy/lecture-examples
Similar notebooks: