In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
df = pd.read_excel("2013_NYC_CD_MedianIncome_Recycle.xlsx")
In [4]:
df.head()
Out[4]:
In [5]:
df['MdHHIncE'].describe()
Out[5]:
In [12]:
df['MdHHIncE'].median()
Out[12]:
In [14]:
df['MdHHIncE'].mode()
Out[14]:
In [23]:
range=df['MdHHIncE'].max()-df['MdHHIncE'].min()
range
Out[23]:
In [15]:
df['MdHHIncE'].quantile(q=0.75)
Out[15]:
In [17]:
df['MdHHIncE'].quantile(q=0.25)
Out[17]:
In [16]:
df['MdHHIncE'].quantile(q=0.5) #is the same as median
Out[16]:
In [19]:
Inter_quartile_Range=df['MdHHIncE'].quantile(q=0.75) - df['MdHHIncE'].quantile(q=0.25)
Inter_quartile_Range
Out[19]:
In [21]:
Identification_of_outlier=1.5*Inter_quartile_Range
Identification_of_outlier
Out[21]:
In [7]:
df['RecycleRate'].describe()
Out[7]:
In [10]:
df.plot(kind='scatter', x='MdHHIncE', y='RecycleRate')
plt.xlabel('Median Income')
plt.ylabel('Recycling Rate')
Out[10]:
In [11]:
df.corr()
Out[11]:
In [ ]: