In [1]:
import pg8000
from pandas import DataFrame
conn = pg8000.connect(host="training.c1erymiua9dx.us-east-1.rds.amazonaws.com",port=5432,database="training",user="dot_student",password="qgis")

In [3]:
conn.rollback()

In [4]:
cursor = conn.cursor()

In [5]:
database=cursor.execute("SELECT * FROM noise_311")
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_sql("SELECT * FROM noise_311", conn)

In [6]:
df.head()


Out[6]:
b'unique_key' b'created_date' b'closed_date' b'agency' b'agency_name' b'complaint_type' b'descriptor' b'location_type' b'incident_zip' b'incident_address' ... b'bridge_highway_name' b'bridge_highway_direction' b'road_ramp' b'bridge_highway_segment' b'garage_lot_name' b'ferry_direction' b'ferry_terminal_name' b'latitude' b'longitude' b'location'
0 28792167 2014-08-31 23:59:00 2014-09-01 03:52:00 NYPD New York City Police Department Noise - Street/Sidewalk Loud Music/Party Street/Sidewalk 11222 200 KINGSLAND AVENUE ... None None None None None None None 40.723888 -73.941349 (40.723888303549415, -73.94134888943505)
1 28789088 2014-08-31 23:56:00 2014-09-01 06:17:00 NYPD New York City Police Department Noise - Vehicle Car/Truck Music Street/Sidewalk 11234 FLATLANDS AVENUE ... None None None None None None None 40.619489 -73.938051 (40.61948901090983, -73.93805104516916)
2 28791854 2014-08-31 23:54:00 2014-09-01 01:29:00 NYPD New York City Police Department Noise - Commercial Loud Music/Party Club/Bar/Restaurant 10002 161 LUDLOW STREET ... None None None None None None None 40.721410 -73.987694 (40.72141034382407, -73.98769444021134)
3 28789936 2014-08-31 23:52:00 2014-09-01 02:53:00 NYPD New York City Police Department Noise - Street/Sidewalk Loud Music/Party Street/Sidewalk 10033 624 WEST 182 STREET ... None None None None None None None 40.850167 -73.933972 (40.85016671877659, -73.93397220795968)
4 28789931 2014-08-31 23:47:00 2014-09-01 01:06:00 NYPD New York City Police Department Noise - Street/Sidewalk Loud Music/Party Street/Sidewalk 11217 525 DEAN STREET ... None None None None None None None 40.681208 -73.972775 (40.68120794066068, -73.97277535440028)

5 rows × 53 columns


In [7]:
df=df.rename(columns = lambda x : str(x)[1:])
df.columns = [c.replace('\'', '') for c in df.columns]

In [9]:
df['duration']=df['closed_date']-df['created_date']

In [10]:
df['duration'].head()


Out[10]:
0   03:53:00
1   06:21:00
2   01:35:00
3   03:01:00
4   01:19:00
Name: duration, dtype: timedelta64[ns]

In [11]:
## AVERAGE TIME TAKEN TO DEAL WITH A COMPLAINT : 
df['duration'].mean()


Out[11]:
Timedelta('0 days 07:44:22.532500')

In [12]:
df['duration'].max()


Out[12]:
Timedelta('48 days 05:45:00')

In [13]:
df['duration'].min()


Out[13]:
Timedelta('0 days 00:01:00')

In [14]:
df['duration'].median()


Out[14]:
Timedelta('0 days 02:02:00')

In [15]:
df['duration'].mode()


Out[15]:
0   00:21:00
dtype: timedelta64[ns]

In [17]:
#RANGE OF THE DATA FRAME
df['duration'].max()-df['duration'].min()


Out[17]:
Timedelta('48 days 05:44:00')

In [19]:
#QUARTILES OF THE DATA
df['duration'].quantile(q=0.25)


Out[19]:
Timedelta('0 days 00:54:00')

In [20]:
df['duration'].quantile(q=0.50)


Out[20]:
Timedelta('0 days 02:02:00')

In [21]:
df['duration'].quantile(q=0.75)


Out[21]:
Timedelta('0 days 04:15:00')

In [22]:
#THE INTER QUARTILE RANGE
df['duration'].quantile(q=0.75) - df['duration'].quantile(q=0.25)


Out[22]:
Timedelta('0 days 03:21:00')

In [24]:
#Calculating the Range of the Outliers
1.5 * (df['duration'].quantile(q=0.75) - df['duration'].quantile(q=0.25))


Out[24]:
Timedelta('0 days 05:01:30')

In [25]:
#Standard Deviation
df['duration'].std()


Out[25]:
Timedelta('1 days 12:06:09.012799')

In [ ]: