In [12]:
import pandas as pd
import pg8000

In [13]:
conn = pg8000.connect(database= 'training', port = 5432, host = 'training.c1erymiua9dx.us-east-1.rds.amazonaws.com', user = 'dot_student', password = 'qgis')

In [14]:
import matplotlib
import numpy
%matplotlib inline

In [19]:
cursor=conn.cursor()
statement="SELECT * FROM information_schema.columns WHERE table_name= 'noise_311'"
cursor.execute(statement)
column_names=[]
for row in cursor.fetchall():
    column_names.append(row[3]) #item four have all the column names
print(column_names)
conn.rollback()


['unique_key', 'created_date', 'closed_date', 'agency', 'agency_name', 'complaint_type', 'descriptor', 'location_type', 'incident_zip', 'incident_address', 'street_name', 'cross_street_1', 'cross_street_2', 'intersection_street_1', 'intersection_street_2', 'address_type', 'city', 'landmark', 'facility_type', 'status', 'due_date', 'resolution_description', 'resolution_action_updated_date', 'community_board', 'borough', 'x_coordinate', 'y_coordinate', 'park_facility_name', 'park_borough', 'school_name', 'school_number', 'school_region', 'school_code', 'school_phone_number', 'school_address', 'school_city', 'school_state', 'school_zip', 'school_not_found', 'school_or_citywide_complaint', 'vehicle_type', 'taxi_company_borough', 'taxi_pick_up_location', 'bridge_highway_name', 'bridge_highway_direction', 'road_ramp', 'bridge_highway_segment', 'garage_lot_name', 'ferry_direction', 'ferry_terminal_name', 'latitude', 'longitude', 'location']

In [20]:
statement="SELECT unique_key,created_date,closed_date,complaint_type FROM noise_311"
cursor.execute(statement)
select_data = []
for row in cursor:
    select_data.append(row)

In [21]:
conn.rollback()

In [23]:
df=pd.DataFrame(select_data)
df.columns = ['created date', 'closed date', 'agency', 'complaint type']#assign name to each column
df.head()


Out[23]:
created date closed date agency complaint type
0 28792167 2014-08-31 23:59:00 2014-09-01 03:52:00 Noise - Street/Sidewalk
1 28789088 2014-08-31 23:56:00 2014-09-01 06:17:00 Noise - Vehicle
2 28791854 2014-08-31 23:54:00 2014-09-01 01:29:00 Noise - Commercial
3 28789936 2014-08-31 23:52:00 2014-09-01 02:53:00 Noise - Street/Sidewalk
4 28789931 2014-08-31 23:47:00 2014-09-01 01:06:00 Noise - Street/Sidewalk

In [24]:
df.groupby(by='complaint type').count()


Out[24]:
created date closed date agency
complaint type
Collection Truck Noise 31 31 31
Noise 2825 2825 2825
Noise - Commercial 9557 9557 9557
Noise - Helicopter 311 311 311
Noise - House of Worship 358 358 358
Noise - Park 2380 2380 2380
Noise - Street/Sidewalk 16337 16337 16337
Noise - Vehicle 5816 5816 5816

In [8]:
df['open_time'] = df[b'closed_date'] - df[b'created_date']

In [9]:
df['open_time'].describe()


Out[9]:
count                     37615
mean     0 days 07:44:22.532500
std      1 days 12:06:09.012799
min             0 days 00:01:00
25%             0 days 00:54:00
50%             0 days 02:02:00
75%             0 days 04:15:00
max            48 days 05:45:00
Name: open_time, dtype: object

In [10]:
interquartile = df['open_time'].quantile(q=0.75)- df['open_time'].quantile(q=0.25)
interquartile


Out[10]:
Timedelta('0 days 03:21:00')

In [ ]: