Using the data from the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, create a predictor using the weights from the model. This time, use the built in attributes in your model rather than hard-coding them into your algorithm
In [4]:
import pandas as pd
import statsmodels.formula.api as smf
In [5]:
df = pd.read_excel("2013_NYC_CD_MedianIncome_Recycle.xlsx")
In [11]:
df
Out[11]:
CD_Name
MdHHIncE
RecycleRate
0
Battery Park City, Greenwich Village & Soho
119596
0.286771
1
Battery Park City, Greenwich Village & Soho
119596
0.264074
2
Chinatown & Lower East Side
40919
0.156485
3
Chelsea, Clinton & Midtown Business Distric
92583
0.235125
4
Chelsea, Clinton & Midtown Business Distric
92583
0.246725
5
Murray Hill, Gramercy & Stuyvesant Town
101769
0.222046
6
Upper West Side & West Side
96009
0.256809
7
Upper East Side
104602
0.253719
8
Hamilton Heights, Manhattanville & West Harlem
41736
0.155888
9
Central Harlem
36468
0.133018
10
East Harlem
30335
0.140438
11
Washington Heights, Inwood & Marble Hill
37685
0.149605
12
Hunts Point, Longwood & Melrose
21318
0.104569
13
Hunts Point, Longwood & Melrose
21318
0.103643
14
Belmont, Crotona Park East & East Tremont
22343
0.119219
15
Concourse, Highbridge & Mount Eden
25745
0.103573
16
Morris Heights, Fordham South & Mount Hope
24517
0.119646
17
Belmont, Crotona Park East & East Tremont
22343
0.110713
18
Bedford Park, Fordham North & Norwood
30541
0.136455
19
Riverdale, Fieldston & Kingsbridge
56877
0.221890
20
Castle Hill, Clason Point & Parkchester
34779
0.105807
21
Co-op City, Pelham Bay & Schuylerville
54685
0.214509
22
Pelham Parkway, Morris Park & Laconia
43503
0.163576
23
Wakefield, Williamsbridge & Woodlawn
43541
0.182580
24
Greenpoint & Williamsburg
50778
0.141621
25
Brooklyn Heights & Fort Greene
73290
0.237205
26
Bedford-Stuyvesant
36528
0.125818
27
Bushwick
38274
0.132463
28
East New York & Starrett City
33700
0.114030
29
Park Slope, Carroll Gardens & Red Hook
93969
0.302798
30
Sunset Park & Windsor Terrace
43351
0.197697
31
Crown Heights North & Prospect Heights
41075
0.156241
32
Crown Heights South, Prospect Lefferts & Wingate
41095
0.115119
33
Bay Ridge & Dyker Heights
57006
0.220855
34
Bensonhurst & Bath Beach
48252
0.183393
35
Borough Park, Kensington & Ocean Parkway
38215
0.156080
36
Brighton Beach & Coney Island
30159
0.134260
37
Flatbush & Midwood
41681
0.145995
38
Sheepshead Bay, Gerritsen Beach & Homecrest
49392
0.193802
39
Brownsville & Ocean Hill
27772
0.091464
40
East Flatbush, Farragut & Rugby
45954
0.134002
41
Canarsie & Flatlands
63106
0.174876
42
Astoria & Long Island City
50716
0.215254
43
Sunnyside & Woodside
54136
0.198388
44
Jackson Heights & North Corona
47555
0.137919
45
Elmhurst & South Corona
45661
0.130604
46
Ridgewood, Glendale & Middle Village
54924
0.214185
47
Forest Hills & Rego Park
64372
0.210247
48
Flushing, Murray Hill & Whitestone
51251
0.192124
49
Briarwood, Fresh Meadows & Hillcrest
59124
0.194293
50
Richmond Hill & Woodhaven
58578
0.187987
51
Howard Beach & Ozone Park
60828
0.183898
52
Bayside, Douglaston & Little Neck
74960
0.253064
53
Jamaica, Hollis & St. Albans
51251
0.157345
54
Queens Village, Cambria Heights & Rosedale
76002
0.196679
55
Far Rockaway, Breezy Point & Broad Channel
46944
0.123351
56
Port Richmond, Stapleton & Mariner's Harbor
57975
0.196748
57
New Springville & South Beach
71925
0.211485
58
Tottenville, Great Kills & Annadale
84670
0.210379
In [6]:
df.head
Out[6]:
<bound method NDFrame.head of CD_Name MdHHIncE RecycleRate
0 Battery Park City, Greenwich Village & Soho 119596 0.286771
1 Battery Park City, Greenwich Village & Soho 119596 0.264074
2 Chinatown & Lower East Side 40919 0.156485
3 Chelsea, Clinton & Midtown Business Distric 92583 0.235125
4 Chelsea, Clinton & Midtown Business Distric 92583 0.246725
5 Murray Hill, Gramercy & Stuyvesant Town 101769 0.222046
6 Upper West Side & West Side 96009 0.256809
7 Upper East Side 104602 0.253719
8 Hamilton Heights, Manhattanville & West Harlem 41736 0.155888
9 Central Harlem 36468 0.133018
10 East Harlem 30335 0.140438
11 Washington Heights, Inwood & Marble Hill 37685 0.149605
12 Hunts Point, Longwood & Melrose 21318 0.104569
13 Hunts Point, Longwood & Melrose 21318 0.103643
14 Belmont, Crotona Park East & East Tremont 22343 0.119219
15 Concourse, Highbridge & Mount Eden 25745 0.103573
16 Morris Heights, Fordham South & Mount Hope 24517 0.119646
17 Belmont, Crotona Park East & East Tremont 22343 0.110713
18 Bedford Park, Fordham North & Norwood 30541 0.136455
19 Riverdale, Fieldston & Kingsbridge 56877 0.221890
20 Castle Hill, Clason Point & Parkchester 34779 0.105807
21 Co-op City, Pelham Bay & Schuylerville 54685 0.214509
22 Pelham Parkway, Morris Park & Laconia 43503 0.163576
23 Wakefield, Williamsbridge & Woodlawn 43541 0.182580
24 Greenpoint & Williamsburg 50778 0.141621
25 Brooklyn Heights & Fort Greene 73290 0.237205
26 Bedford-Stuyvesant 36528 0.125818
27 Bushwick 38274 0.132463
28 East New York & Starrett City 33700 0.114030
29 Park Slope, Carroll Gardens & Red Hook 93969 0.302798
30 Sunset Park & Windsor Terrace 43351 0.197697
31 Crown Heights North & Prospect Heights 41075 0.156241
32 Crown Heights South, Prospect Lefferts & Wingate 41095 0.115119
33 Bay Ridge & Dyker Heights 57006 0.220855
34 Bensonhurst & Bath Beach 48252 0.183393
35 Borough Park, Kensington & Ocean Parkway 38215 0.156080
36 Brighton Beach & Coney Island 30159 0.134260
37 Flatbush & Midwood 41681 0.145995
38 Sheepshead Bay, Gerritsen Beach & Homecrest 49392 0.193802
39 Brownsville & Ocean Hill 27772 0.091464
40 East Flatbush, Farragut & Rugby 45954 0.134002
41 Canarsie & Flatlands 63106 0.174876
42 Astoria & Long Island City 50716 0.215254
43 Sunnyside & Woodside 54136 0.198388
44 Jackson Heights & North Corona 47555 0.137919
45 Elmhurst & South Corona 45661 0.130604
46 Ridgewood, Glendale & Middle Village 54924 0.214185
47 Forest Hills & Rego Park 64372 0.210247
48 Flushing, Murray Hill & Whitestone 51251 0.192124
49 Briarwood, Fresh Meadows & Hillcrest 59124 0.194293
50 Richmond Hill & Woodhaven 58578 0.187987
51 Howard Beach & Ozone Park 60828 0.183898
52 Bayside, Douglaston & Little Neck 74960 0.253064
53 Jamaica, Hollis & St. Albans 51251 0.157345
54 Queens Village, Cambria Heights & Rosedale 76002 0.196679
55 Far Rockaway, Breezy Point & Broad Channel 46944 0.123351
56 Port Richmond, Stapleton & Mariner's Harbor 57975 0.196748
57 New Springville & South Beach 71925 0.211485
58 Tottenville, Great Kills & Annadale 84670 0.210379>
In [7]:
lm = smf.ols(formula="RecycleRate~MdHHIncE",data=df).fit()
In [8]:
lm.params
Out[8]:
Intercept 0.074804
MdHHIncE 0.000002
dtype: float64
In [9]:
intercept, slope = lm.params
def recycle_rate_estimator(median_income):
return (0.074804 + (0.000002* float(median_income)))
In [10]:
recycle_rate_estimator(119596)
Out[10]:
0.313996
In [ ]:
Content source: ledeprogram/algorithms
Similar notebooks: