In [9]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [7]:
df = pd.read_excel('data/2013_NYC_CD_MedianIncome_Recycle.xlsx')

In [4]:
print(df.head)


<bound method NDFrame.head of                                              CD_Name  MdHHIncE  RecycleRate
0        Battery Park City, Greenwich Village & Soho    119596     0.286771
1        Battery Park City, Greenwich Village & Soho    119596     0.264074
2                        Chinatown & Lower East Side     40919     0.156485
3        Chelsea, Clinton & Midtown Business Distric     92583     0.235125
4        Chelsea, Clinton & Midtown Business Distric     92583     0.246725
5            Murray Hill, Gramercy & Stuyvesant Town    101769     0.222046
6                        Upper West Side & West Side     96009     0.256809
7                                    Upper East Side    104602     0.253719
8     Hamilton Heights, Manhattanville & West Harlem     41736     0.155888
9                                     Central Harlem     36468     0.133018
10                                       East Harlem     30335     0.140438
11          Washington Heights, Inwood & Marble Hill     37685     0.149605
12                   Hunts Point, Longwood & Melrose     21318     0.104569
13                   Hunts Point, Longwood & Melrose     21318     0.103643
14         Belmont, Crotona Park East & East Tremont     22343     0.119219
15                Concourse, Highbridge & Mount Eden     25745     0.103573
16        Morris Heights, Fordham South & Mount Hope     24517     0.119646
17         Belmont, Crotona Park East & East Tremont     22343     0.110713
18             Bedford Park, Fordham North & Norwood     30541     0.136455
19                Riverdale, Fieldston & Kingsbridge     56877     0.221890
20           Castle Hill, Clason Point & Parkchester     34779     0.105807
21            Co-op City, Pelham Bay & Schuylerville     54685     0.214509
22             Pelham Parkway, Morris Park & Laconia     43503     0.163576
23              Wakefield, Williamsbridge & Woodlawn     43541     0.182580
24                         Greenpoint & Williamsburg     50778     0.141621
25                    Brooklyn Heights & Fort Greene     73290     0.237205
26                                Bedford-Stuyvesant     36528     0.125818
27                                          Bushwick     38274     0.132463
28                     East New York & Starrett City     33700     0.114030
29            Park Slope, Carroll Gardens & Red Hook     93969     0.302798
30                     Sunset Park & Windsor Terrace     43351     0.197697
31            Crown Heights North & Prospect Heights     41075     0.156241
32  Crown Heights South, Prospect Lefferts & Wingate     41095     0.115119
33                         Bay Ridge & Dyker Heights     57006     0.220855
34                          Bensonhurst & Bath Beach     48252     0.183393
35          Borough Park, Kensington & Ocean Parkway     38215     0.156080
36                     Brighton Beach & Coney Island     30159     0.134260
37                                Flatbush & Midwood     41681     0.145995
38       Sheepshead Bay, Gerritsen Beach & Homecrest     49392     0.193802
39                          Brownsville & Ocean Hill     27772     0.091464
40                   East Flatbush, Farragut & Rugby     45954     0.134002
41                              Canarsie & Flatlands     63106     0.174876
42                        Astoria & Long Island City     50716     0.215254
43                              Sunnyside & Woodside     54136     0.198388
44                    Jackson Heights & North Corona     47555     0.137919
45                           Elmhurst & South Corona     45661     0.130604
46              Ridgewood, Glendale & Middle Village     54924     0.214185
47                          Forest Hills & Rego Park     64372     0.210247
48                Flushing, Murray Hill & Whitestone     51251     0.192124
49              Briarwood, Fresh Meadows & Hillcrest     59124     0.194293
50                         Richmond Hill & Woodhaven     58578     0.187987
51                         Howard Beach & Ozone Park     60828     0.183898
52                 Bayside, Douglaston & Little Neck     74960     0.253064
53                      Jamaica, Hollis & St. Albans     51251     0.157345
54        Queens Village, Cambria Heights & Rosedale     76002     0.196679
55        Far Rockaway, Breezy Point & Broad Channel     46944     0.123351
56       Port Richmond, Stapleton & Mariner's Harbor     57975     0.196748
57                     New Springville & South Beach     71925     0.211485
58               Tottenville, Great Kills & Annadale     84670     0.210379>

In [5]:
df.corr()


Out[5]:
MdHHIncE RecycleRate
MdHHIncE 1.000000 0.884783
RecycleRate 0.884783 1.000000

In [6]:
df.columns


Out[6]:
Index(['CD_Name', 'MdHHIncE', 'RecycleRate'], dtype='object')

Findings: There is a strong positive 88.47% correlation between recycling rate and median household income. As household income increases so does the recycling rate.


In [15]:
# df.plot(kind='scatter', x='MdHHIncE', y='RecycleRate', alpha=0.2, xlim=(0, 34000),ylim = (0, .35))
ax = df.plot(kind='scatter', x='MdHHIncE', y='RecycleRate', alpha=0.2)
ax.set_title('Recycle Rate vs. Median Household Income ')
ax.set_xlabel('Median Household Income ($ USD)')
ax.set_ylabel('Recycling Rate')


Out[15]:
<matplotlib.text.Text at 0x114431358>

In [ ]: