Assignment 2

Using the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, calculate the correlation between the recycling rate and the median income. Discuss your findings in your PR.


In [3]:
import pandas as pd
%matplotlib inline

In [4]:
df = pd.read_excel("2013_NYC_CD_MedianIncome_Recycle.xlsx")

In [5]:
df.head()


Out[5]:
CD_Name MdHHIncE RecycleRate
0 Battery Park City, Greenwich Village & Soho 119596 0.286771
1 Battery Park City, Greenwich Village & Soho 119596 0.264074
2 Chinatown & Lower East Side 40919 0.156485
3 Chelsea, Clinton & Midtown Business Distric 92583 0.235125
4 Chelsea, Clinton & Midtown Business Distric 92583 0.246725

In [6]:
df


Out[6]:
CD_Name MdHHIncE RecycleRate
0 Battery Park City, Greenwich Village & Soho 119596 0.286771
1 Battery Park City, Greenwich Village & Soho 119596 0.264074
2 Chinatown & Lower East Side 40919 0.156485
3 Chelsea, Clinton & Midtown Business Distric 92583 0.235125
4 Chelsea, Clinton & Midtown Business Distric 92583 0.246725
5 Murray Hill, Gramercy & Stuyvesant Town 101769 0.222046
6 Upper West Side & West Side 96009 0.256809
7 Upper East Side 104602 0.253719
8 Hamilton Heights, Manhattanville & West Harlem 41736 0.155888
9 Central Harlem 36468 0.133018
10 East Harlem 30335 0.140438
11 Washington Heights, Inwood & Marble Hill 37685 0.149605
12 Hunts Point, Longwood & Melrose 21318 0.104569
13 Hunts Point, Longwood & Melrose 21318 0.103643
14 Belmont, Crotona Park East & East Tremont 22343 0.119219
15 Concourse, Highbridge & Mount Eden 25745 0.103573
16 Morris Heights, Fordham South & Mount Hope 24517 0.119646
17 Belmont, Crotona Park East & East Tremont 22343 0.110713
18 Bedford Park, Fordham North & Norwood 30541 0.136455
19 Riverdale, Fieldston & Kingsbridge 56877 0.221890
20 Castle Hill, Clason Point & Parkchester 34779 0.105807
21 Co-op City, Pelham Bay & Schuylerville 54685 0.214509
22 Pelham Parkway, Morris Park & Laconia 43503 0.163576
23 Wakefield, Williamsbridge & Woodlawn 43541 0.182580
24 Greenpoint & Williamsburg 50778 0.141621
25 Brooklyn Heights & Fort Greene 73290 0.237205
26 Bedford-Stuyvesant 36528 0.125818
27 Bushwick 38274 0.132463
28 East New York & Starrett City 33700 0.114030
29 Park Slope, Carroll Gardens & Red Hook 93969 0.302798
30 Sunset Park & Windsor Terrace 43351 0.197697
31 Crown Heights North & Prospect Heights 41075 0.156241
32 Crown Heights South, Prospect Lefferts & Wingate 41095 0.115119
33 Bay Ridge & Dyker Heights 57006 0.220855
34 Bensonhurst & Bath Beach 48252 0.183393
35 Borough Park, Kensington & Ocean Parkway 38215 0.156080
36 Brighton Beach & Coney Island 30159 0.134260
37 Flatbush & Midwood 41681 0.145995
38 Sheepshead Bay, Gerritsen Beach & Homecrest 49392 0.193802
39 Brownsville & Ocean Hill 27772 0.091464
40 East Flatbush, Farragut & Rugby 45954 0.134002
41 Canarsie & Flatlands 63106 0.174876
42 Astoria & Long Island City 50716 0.215254
43 Sunnyside & Woodside 54136 0.198388
44 Jackson Heights & North Corona 47555 0.137919
45 Elmhurst & South Corona 45661 0.130604
46 Ridgewood, Glendale & Middle Village 54924 0.214185
47 Forest Hills & Rego Park 64372 0.210247
48 Flushing, Murray Hill & Whitestone 51251 0.192124
49 Briarwood, Fresh Meadows & Hillcrest 59124 0.194293
50 Richmond Hill & Woodhaven 58578 0.187987
51 Howard Beach & Ozone Park 60828 0.183898
52 Bayside, Douglaston & Little Neck 74960 0.253064
53 Jamaica, Hollis & St. Albans 51251 0.157345
54 Queens Village, Cambria Heights & Rosedale 76002 0.196679
55 Far Rockaway, Breezy Point & Broad Channel 46944 0.123351
56 Port Richmond, Stapleton & Mariner's Harbor 57975 0.196748
57 New Springville & South Beach 71925 0.211485
58 Tottenville, Great Kills & Annadale 84670 0.210379

In [7]:
df['MdHHIncE'].mean()


Out[7]:
53895.932203389828

In [8]:
df['MdHHIncE'].median()


Out[8]:
48252.0

In [9]:
df['MdHHIncE'].mode()


Out[9]:
0     21318
1     22343
2     51251
3     92583
4    119596
dtype: int64

In [10]:
df['MdHHIncE'].describe()


Out[10]:
count        59.000000
mean      53895.932203
std       24371.741796
min       21318.000000
25%       37950.000000
50%       48252.000000
75%       61967.000000
max      119596.000000
Name: MdHHIncE, dtype: float64

In [17]:
df['MdHHIncE'].max() - df['MdHHIncE'].min()


Out[17]:
98278

In [11]:
df['MdHHIncE'].quantile(q=0.25)


Out[11]:
37950.0

In [12]:
df['MdHHIncE'].quantile(q=0.5)


Out[12]:
48252.0

In [13]:
df['MdHHIncE'].quantile(q=0.75)


Out[13]:
61967.0

In [35]:
m_iqr = df['MdHHIncE'].quantile(q=0.75) - df['MdHHIncE'].quantile(q=0.25)
m_iqr


Out[35]:
24017.0

In [34]:
df['MdHHIncE'].quantile(q=0.75) + (iqr*1.5)


Out[34]:
61967.118987365975

In [19]:
df['MdHHIncE'].quantile(q=0.25) - (iqr*1.5)


Out[19]:
1924.5

In [20]:
df['MdHHIncE'].std()


Out[20]:
24371.741795935275

In [22]:
df['RecycleRate'].describe()


Out[22]:
count    59.000000
mean      0.175569
std       0.051499
min       0.091464
25%       0.133510
50%       0.174876
75%       0.212835
max       0.302798
Name: RecycleRate, dtype: float64

In [23]:
df['RecycleRate'].median()


Out[23]:
0.17487626679236387

In [24]:
df['RecycleRate'].max() - df['RecycleRate'].min()


Out[24]:
0.21133426219762536

In [25]:
df['RecycleRate'].quantile(q=0.25)


Out[25]:
0.13351008438749995

In [26]:
df['RecycleRate'].quantile(q=0.5)


Out[26]:
0.17487626679236387

In [27]:
df['RecycleRate'].quantile(q=0.75)


Out[27]:
0.21283499503973174

In [36]:
r_iqr = df['RecycleRate'].quantile(q=0.75) - df['RecycleRate'].quantile(q=0.25)
r_iqr


Out[36]:
0.079324910652231795

In [29]:
df['RecycleRate'].quantile(q=0.25) + (iqr*1.5)


Out[29]:
0.25249745036584764

In [30]:
df['RecycleRate'].quantile(q=0.25) - (iqr*1.5)


Out[30]:
0.014522718409152258

In [31]:
df['RecycleRate'].std()


Out[31]:
0.051499302971834054

In [32]:
df.plot(kind='scatter', y='RecycleRate', x='MdHHIncE')


Out[32]:
<matplotlib.axes._subplots.AxesSubplot at 0x10f070f60>

In [33]:
df.corr()


Out[33]:
MdHHIncE RecycleRate
MdHHIncE 1.000000 0.884783
RecycleRate 0.884783 1.000000

In [ ]: