In [9]:
import pandas as pd
%matplotlib inline

In [2]:
communities = pd.read_csv('community_crime_rates_socio.csv')

In [57]:
communities


Out[57]:
Unnamed: 0 Crime Count population Crime Rate Community Area Number COMMUNITY AREA NAME PERCENT OF HOUSING CROWDED PERCENT HOUSEHOLDS BELOW POVERTY PERCENT AGED 16+ UNEMPLOYED PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA PERCENT AGED UNDER 18 OR OVER 64 PER CAPITA INCOME HARDSHIP INDEX
0 0 3525 54991 64.101398 1 Rogers Park 7.7 23.6 8.7 18.2 27.5 23939 39
1 1 3063 71942 42.575964 2 West Ridge 7.8 17.2 8.8 20.8 38.5 23040 46
2 2 3592 56362 63.730883 3 Uptown 3.8 24.0 8.9 11.8 22.2 35787 20
3 3 1751 39493 44.336971 4 Lincoln Square 3.4 10.9 8.2 13.4 25.5 37524 17
4 4 1379 31867 43.273606 5 North Center 0.3 7.5 5.2 4.5 26.2 57123 6
5 5 5511 94368 58.399034 6 Lake View 1.1 11.4 4.7 2.6 17.0 60058 5
6 6 3729 64116 58.160210 7 Lincoln Park 0.8 12.3 5.1 3.6 21.5 71551 2
7 7 8944 80484 111.127678 8 Near North Side 1.9 12.9 7.0 2.5 22.6 88669 1
8 8 254 11187 22.704925 9 Edison Park 1.1 3.3 6.5 7.4 35.3 40959 8
9 9 1259 37023 34.005888 10 Norwood Park 2.0 5.4 9.0 11.5 39.5 32875 21
10 10 1144 25448 44.954417 11 Jefferson Park 2.7 8.6 12.4 13.4 35.5 27751 25
11 11 444 18508 23.989626 12 Forest Glen 1.1 7.5 6.8 4.9 40.5 44164 11
12 12 830 17931 46.288551 13 North Park 3.9 13.2 9.9 14.4 39.0 26576 33
13 13 2508 51542 48.659346 14 Albany Park 11.3 19.2 10.0 32.9 32.0 21323 53
14 14 3399 64124 53.006675 15 Portage Park 4.1 11.6 12.6 19.3 34.0 24336 35
15 15 2967 53359 55.604490 16 Irving Park 6.3 13.1 10.0 22.4 31.6 27249 34
16 16 1636 41932 39.015549 17 Dunning 5.2 10.6 10.0 16.2 33.6 26282 28
17 17 575 13426 42.827350 18 Montclaire 8.1 15.3 13.8 23.5 38.6 22014 50
18 18 4775 78743 60.640311 19 Belmont Cragin 10.8 18.7 14.6 37.3 37.3 15461 70
19 19 1707 25010 68.252699 20 Hermosa 6.9 20.5 13.1 41.6 36.4 15089 71
20 20 2230 39262 56.797922 21 Avondale 6.0 15.3 9.2 24.7 31.0 20039 42
21 21 4747 73595 64.501665 22 Logan Square 3.2 16.8 8.2 14.8 26.2 31908 23
22 22 8021 56323 142.410738 23 Humboldt park 14.8 33.9 17.3 35.4 38.0 13781 85
23 23 6975 81432 85.654288 24 West Town 2.3 14.7 6.6 12.9 21.7 43198 10
24 24 17050 98514 173.071848 25 Austin 6.3 28.6 22.6 24.4 37.9 15957 73
25 25 5804 18001 322.426532 26 West Garfield Park 9.4 41.7 25.8 24.5 43.6 10934 92
26 26 5279 20567 256.673312 27 East Garfield Park 8.2 42.4 19.6 21.3 43.2 12961 83
27 27 7804 54881 142.198575 28 Near West Side 3.8 20.6 10.7 9.6 22.2 44689 15
28 28 8053 35912 224.242593 29 North Lawndale 7.4 43.1 21.2 27.6 42.7 12034 87
29 29 4404 79288 55.544345 30 South Lawndale 15.2 30.7 15.8 54.8 33.8 10402 96
... ... ... ... ... ... ... ... ... ... ... ... ... ...
47 47 1461 13812 105.777585 48 Calumet Heights 2.1 11.5 20.0 11.0 44.0 28887 38
48 48 6555 44619 146.910509 49 Roseland 2.5 19.8 20.3 16.9 41.2 17949 52
49 49 1037 7325 141.569966 50 Pullman 1.5 21.6 22.8 13.1 38.6 20588 51
50 50 1904 15109 126.017605 51 South Deering 4.0 29.2 16.3 21.0 39.5 14685 65
51 51 1304 23042 56.592310 52 East Side 6.8 19.2 12.1 31.9 42.8 17104 64
52 52 3957 29651 133.452497 53 West Pullman 3.3 25.9 19.4 20.5 42.1 16563 62
53 53 1175 6482 181.271213 54 Riverdale 5.8 56.5 34.6 27.5 51.5 8201 98
54 54 507 9426 53.787397 55 Hegewisch 3.3 17.1 9.6 19.2 42.9 22677 44
55 55 1800 34513 52.154261 56 Garfield Ridge 2.6 8.8 11.3 19.3 38.1 26353 32
56 56 986 13393 73.620548 57 Archer Heights 8.5 14.1 16.5 35.9 39.2 16134 67
57 57 2715 45368 59.843943 58 Brighton Park 14.4 23.6 13.9 45.1 39.3 13089 84
58 58 1066 15612 68.280810 59 McKinley Park 7.2 18.7 13.4 32.9 35.6 16954 61
59 59 1590 31977 49.723239 60 Bridgeport 4.5 18.9 13.7 22.2 31.3 22694 43
60 60 4848 44377 109.245780 61 New City 11.9 29.0 23.0 41.5 38.9 12765 91
61 61 1033 18109 57.043459 62 West Elsdon 11.1 15.6 16.7 37.0 37.7 15754 69
62 62 2335 39894 58.530105 63 Gage Park 15.8 23.4 18.2 51.5 38.8 12171 93
63 63 926 23139 40.019016 64 Clearing 2.7 8.9 9.5 18.8 37.6 25113 29
64 64 2029 33355 60.830460 65 West Lawn 5.8 14.9 9.6 33.6 39.6 16907 56
65 65 6095 55628 109.567124 66 Chicago Lawn 7.6 27.9 17.1 31.2 40.6 13231 80
66 66 7213 35505 203.154485 67 West Englewood 4.8 34.4 35.9 26.3 40.7 11317 89
67 67 7006 30654 228.550923 68 Englewood 3.8 46.6 28.0 28.5 42.5 11888 94
68 68 6669 32602 204.558003 69 Greater Grand Crossing 3.6 29.6 23.0 16.5 41.0 17285 66
69 69 2283 41081 55.573136 70 Ashburn 4.0 10.4 11.7 17.7 36.9 23482 37
70 70 7748 48743 158.956158 71 Auburn Gresham 4.0 27.6 28.3 18.5 41.9 15528 74
71 71 977 20034 48.767096 72 Beverly 0.9 5.1 8.0 3.7 40.5 39523 12
72 72 3114 26493 117.540482 73 Washington Height 1.1 16.9 20.8 13.7 42.6 19713 48
73 73 609 19093 31.896507 74 Mount Greenwood 1.0 3.4 8.7 4.3 36.8 34381 16
74 74 2055 22544 91.155075 75 Morgan Park 0.8 13.2 15.0 10.8 40.3 27149 30
75 75 1621 12756 127.077454 76 O'Hare 3.6 15.4 7.1 10.9 30.3 25828 24
76 76 2213 56521 39.153589 77 Edgewater 4.1 18.2 9.2 9.7 23.8 33385 19

77 rows × 13 columns


In [7]:
communities.columns


Out[7]:
Index([u'Unnamed: 0', u'Crime Count', u'population', u'Crime Rate',
       u'Community Area Number', u'COMMUNITY AREA NAME',
       u'PERCENT OF HOUSING CROWDED', u'PERCENT HOUSEHOLDS BELOW POVERTY',
       u'PERCENT AGED 16+ UNEMPLOYED',
       u'PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA',
       u'PERCENT AGED UNDER 18 OR OVER 64', u'PER CAPITA INCOME ',
       u'HARDSHIP INDEX'],
      dtype='object')

In [59]:
communities.drop(['Unnamed: 0'], axis=1, inplace=True)

In [75]:
communities.plot(kind='scatter', 
                 x='PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA', 
                 y='Crime Rate')


Out[75]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc5f3e4fc10>

In [19]:
import statsmodels.api as sm

In [60]:
X = communities.drop(['Crime Rate', 'COMMUNITY AREA NAME', 'Crime Count'], axis=1)
y = communities['Crime Rate']

In [61]:
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
results.summary()


Out[61]:
OLS Regression Results
Dep. Variable: Crime Rate R-squared: 0.743
Model: OLS Adj. R-squared: 0.708
Method: Least Squares F-statistic: 21.47
Date: Thu, 25 Feb 2016 Prob (F-statistic): 1.55e-16
Time: 12:31:14 Log-Likelihood: -381.61
No. Observations: 77 AIC: 783.2
Df Residuals: 67 BIC: 806.7
Df Model: 9
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
const 83.6523 63.630 1.315 0.193 -43.354 210.659
population -0.0001 0.000 -0.509 0.612 -0.001 0.000
Community Area Number 0.1615 0.230 0.701 0.486 -0.298 0.621
PERCENT OF HOUSING CROWDED 2.1276 2.839 0.750 0.456 -3.538 7.794
PERCENT HOUSEHOLDS BELOW POVERTY 1.1320 1.107 1.023 0.310 -1.077 3.341
PERCENT AGED 16+ UNEMPLOYED 1.6460 1.574 1.046 0.299 -1.496 4.788
PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA -5.5273 1.334 -4.142 0.000 -8.191 -2.864
PERCENT AGED UNDER 18 OR OVER 64 -2.8399 1.327 -2.141 0.036 -5.488 -0.192
PER CAPITA INCOME 0.0009 0.001 1.474 0.145 -0.000 0.002
HARDSHIP INDEX 3.0019 1.068 2.810 0.006 0.870 5.134
Omnibus: 12.939 Durbin-Watson: 1.788
Prob(Omnibus): 0.002 Jarque-Bera (JB): 30.224
Skew: 0.435 Prob(JB): 2.73e-07
Kurtosis: 5.944 Cond. No. 7.38e+05

In [76]:
y - results.fittedvalues


Out[76]:
0     -30.548340
1     -17.860678
2     -27.746741
3      -6.912807
4     -25.744970
5     -44.667030
6     -33.001879
7       0.118674
8       2.004039
9      10.857009
10      1.313185
11    -12.518830
12     -9.725766
13      1.054443
14      8.231496
15     17.906104
16     -4.780052
17    -29.994605
18      3.323975
19     31.423562
20      9.583057
21      5.131727
22     -0.489824
23     39.306076
24     23.368762
25    100.219960
26     52.933863
27     42.121330
28     42.588384
29    -15.052890
         ...    
47     11.021966
48     37.655931
49     -4.262890
50    -12.664496
51      1.419393
52      8.277694
53    -46.961619
54     -4.565217
55     24.415173
56     15.127637
57    -11.015180
58      2.858249
59    -31.368946
60    -19.744856
61    -13.071495
62    -15.867696
63     18.173843
64     40.443706
65    -18.244945
66    -11.719264
67     15.979260
68     28.511758
69     -3.709392
70    -31.535948
71     -1.481777
72      4.299939
73    -32.743896
74     16.173232
75     46.651569
76    -60.695775
dtype: float64

In [ ]: