notebook.community

Edit and run



In [1]:

    
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline



In [3]:

    
df = pd.read_excel("2013_NYC_CD_MedianIncome_Recycle.xlsx")



In [4]:

    
df.head()









    Out[4]:






  
    
      
      CD_Name
      MdHHIncE
      RecycleRate
    
  
  
    
      0
      Battery Park City, Greenwich Village & Soho
      119596
      0.286771
    
    
      1
      Battery Park City, Greenwich Village & Soho
      119596
      0.264074
    
    
      2
      Chinatown & Lower East Side
      40919
      0.156485
    
    
      3
      Chelsea, Clinton & Midtown Business Distric
      92583
      0.235125
    
    
      4
      Chelsea, Clinton & Midtown Business Distric
      92583
      0.246725



In [5]:

    
df['MdHHIncE'].describe()









    Out[5]:





count        59.000000
mean      53895.932203
std       24371.741796
min       21318.000000
25%       37950.000000
50%       48252.000000
75%       61967.000000
max      119596.000000
Name: MdHHIncE, dtype: float64



In [12]:

    
df['MdHHIncE'].median()









    Out[12]:





48252.0



In [14]:

    
df['MdHHIncE'].mode()









    Out[14]:





0     21318
1     22343
2     51251
3     92583
4    119596
dtype: int64



In [23]:

    
range=df['MdHHIncE'].max()-df['MdHHIncE'].min()
range









    Out[23]:





98278



In [15]:

    
df['MdHHIncE'].quantile(q=0.75)









    Out[15]:





61967.0



In [17]:

    
df['MdHHIncE'].quantile(q=0.25)









    Out[17]:





37950.0



In [16]:

    
df['MdHHIncE'].quantile(q=0.5) #is the same as median









    Out[16]:





48252.0



In [19]:

    
Inter_quartile_Range=df['MdHHIncE'].quantile(q=0.75) - df['MdHHIncE'].quantile(q=0.25)
Inter_quartile_Range









    Out[19]:





24017.0



In [21]:

    
Identification_of_outlier=1.5*Inter_quartile_Range
Identification_of_outlier









    Out[21]:





36025.5



In [7]:

    
df['RecycleRate'].describe()









    Out[7]:





count    59.000000
mean      0.175569
std       0.051499
min       0.091464
25%       0.133510
50%       0.174876
75%       0.212835
max       0.302798
Name: RecycleRate, dtype: float64



In [10]:

    
df.plot(kind='scatter', x='MdHHIncE', y='RecycleRate')
plt.xlabel('Median Income')
plt.ylabel('Recycling Rate')









    Out[10]:





<matplotlib.text.Text at 0x11163dc88>



In [11]:

    
df.corr()









    Out[11]:






  
    
      
      MdHHIncE
      RecycleRate
    
  
  
    
      MdHHIncE
      1.000000
      0.884783
    
    
      RecycleRate
      0.884783
      1.000000



In [ ]:

	CD_Name	MdHHIncE	RecycleRate
0	Battery Park City, Greenwich Village & Soho	119596	0.286771
1	Battery Park City, Greenwich Village & Soho	119596	0.264074
2	Chinatown & Lower East Side	40919	0.156485
3	Chelsea, Clinton & Midtown Business Distric	92583	0.235125
4	Chelsea, Clinton & Midtown Business Distric	92583	0.246725