In [1]:
import pg8000
from pandas import DataFrame
conn = pg8000.connect(host="training.c1erymiua9dx.us-east-1.rds.amazonaws.com",port=5432,database="training",user="dot_student",password="qgis")
In [3]:
conn.rollback()
In [4]:
cursor = conn.cursor()
In [5]:
database=cursor.execute("SELECT * FROM noise_311")
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_sql("SELECT * FROM noise_311", conn)
In [6]:
df.head()
Out[6]:
In [7]:
df=df.rename(columns = lambda x : str(x)[1:])
df.columns = [c.replace('\'', '') for c in df.columns]
In [9]:
df['duration']=df['closed_date']-df['created_date']
In [10]:
df['duration'].head()
Out[10]:
In [11]:
## AVERAGE TIME TAKEN TO DEAL WITH A COMPLAINT :
df['duration'].mean()
Out[11]:
In [12]:
df['duration'].max()
Out[12]:
In [13]:
df['duration'].min()
Out[13]:
In [14]:
df['duration'].median()
Out[14]:
In [15]:
df['duration'].mode()
Out[15]:
In [17]:
#RANGE OF THE DATA FRAME
df['duration'].max()-df['duration'].min()
Out[17]:
In [19]:
#QUARTILES OF THE DATA
df['duration'].quantile(q=0.25)
Out[19]:
In [20]:
df['duration'].quantile(q=0.50)
Out[20]:
In [21]:
df['duration'].quantile(q=0.75)
Out[21]:
In [22]:
#THE INTER QUARTILE RANGE
df['duration'].quantile(q=0.75) - df['duration'].quantile(q=0.25)
Out[22]:
In [24]:
#Calculating the Range of the Outliers
1.5 * (df['duration'].quantile(q=0.75) - df['duration'].quantile(q=0.25))
Out[24]:
In [25]:
#Standard Deviation
df['duration'].std()
Out[25]:
In [ ]: