In [1]:
import pg8000
import pandas as pd
import matplotlib
import numpy
%matplotlib inline
conn = pg8000.connect(host="training.c1erymiua9dx.us-east-1.rds.amazonaws.com", port=5432, database="training", user="dot_student", password="qgis")

In [2]:
cursor = conn.cursor()

In [3]:
conn.rollback()

In [4]:
cursor.execute("SELECT * FROM noise_311")
results = []
for item in cursor.fetchall():
    results.append(item)

In [11]:
cursor.execute("select column_name from information_schema.columns where table_name='noise_311'")
columns_names = []
for item in cursor.fetchall():
    print(item)
    columns_names.append(item)


['unique_key']
['created_date']
['closed_date']
['agency']
['agency_name']
['complaint_type']
['descriptor']
['location_type']
['incident_zip']
['incident_address']
['street_name']
['cross_street_1']
['cross_street_2']
['intersection_street_1']
['intersection_street_2']
['address_type']
['city']
['landmark']
['facility_type']
['status']
['due_date']
['resolution_description']
['resolution_action_updated_date']
['community_board']
['borough']
['x_coordinate']
['y_coordinate']
['park_facility_name']
['park_borough']
['school_name']
['school_number']
['school_region']
['school_code']
['school_phone_number']
['school_address']
['school_city']
['school_state']
['school_zip']
['school_not_found']
['school_or_citywide_complaint']
['vehicle_type']
['taxi_company_borough']
['taxi_pick_up_location']
['bridge_highway_name']
['bridge_highway_direction']
['road_ramp']
['bridge_highway_segment']
['garage_lot_name']
['ferry_direction']
['ferry_terminal_name']
['latitude']
['longitude']
['location']

In [24]:
df= pd.DataFrame(results)

In [25]:
df.columns = ['unique_key', 'created_date', 'closed_date', 'agency', 'agency_name', 'complaint_type','descriptor','location_type',
'incident_zip', 
'incident_address',
'street_name', 
'cross_street_1',
'cross_street_2', 
'intersection_street_1',
'intersection_street_2',
'address_type',
'city',
'landmark',
'facility_type',
'status',
'due_date', 
'resolution_description',
'resolution_action_updated_date',
'community_board',
'borough',
'x_coordinate',
'y_coordinate',
'park_facility_name',
'park_borough',
'school_name',
'school_number',
'school_region',
'school_code',
'school_phone_number',
'school_address',
'school_city',
'school_state',
'school_zip',
'school_not_found',
'school_or_citywide_complaint',
'vehicle_type',
'taxi_company_borough',
'taxi_pick_up_location',
'bridge_highway_name',
'bridge_highway_direction',
'road_ramp',
'bridge_highway_segment',
'garage_lot_name',
'ferry_direction',
'ferry_terminal_name',
'latitude',
'longitude',
'location']

In [26]:
df.head()


Out[26]:
unique_key created_date closed_date agency agency_name complaint_type descriptor location_type incident_zip incident_address ... bridge_highway_name bridge_highway_direction road_ramp bridge_highway_segment garage_lot_name ferry_direction ferry_terminal_name latitude longitude location
0 28792167 2014-08-31 23:59:00 2014-09-01 03:52:00 NYPD New York City Police Department Noise - Street/Sidewalk Loud Music/Party Street/Sidewalk 11222 200 KINGSLAND AVENUE ... None None None None None None None 40.7238883 -73.94134889 (40.723888303549415, -73.94134888943505)
1 28789088 2014-08-31 23:56:00 2014-09-01 06:17:00 NYPD New York City Police Department Noise - Vehicle Car/Truck Music Street/Sidewalk 11234 FLATLANDS AVENUE ... None None None None None None None 40.61948901 -73.93805105 (40.61948901090983, -73.93805104516916)
2 28791854 2014-08-31 23:54:00 2014-09-01 01:29:00 NYPD New York City Police Department Noise - Commercial Loud Music/Party Club/Bar/Restaurant 10002 161 LUDLOW STREET ... None None None None None None None 40.72141034 -73.98769444 (40.72141034382407, -73.98769444021134)
3 28789936 2014-08-31 23:52:00 2014-09-01 02:53:00 NYPD New York City Police Department Noise - Street/Sidewalk Loud Music/Party Street/Sidewalk 10033 624 WEST 182 STREET ... None None None None None None None 40.85016672 -73.93397221 (40.85016671877659, -73.93397220795968)
4 28789931 2014-08-31 23:47:00 2014-09-01 01:06:00 NYPD New York City Police Department Noise - Street/Sidewalk Loud Music/Party Street/Sidewalk 11217 525 DEAN STREET ... None None None None None None None 40.68120794 -73.97277535 (40.68120794066068, -73.97277535440028)

5 rows × 53 columns


In [27]:
df.columns


Out[27]:
Index(['unique_key', 'created_date', 'closed_date', 'agency', 'agency_name',
       'complaint_type', 'descriptor', 'location_type', 'incident_zip',
       'incident_address', 'street_name', 'cross_street_1', 'cross_street_2',
       'intersection_street_1', 'intersection_street_2', 'address_type',
       'city', 'landmark', 'facility_type', 'status', 'due_date',
       'resolution_description', 'resolution_action_updated_date',
       'community_board', 'borough', 'x_coordinate', 'y_coordinate',
       'park_facility_name', 'park_borough', 'school_name', 'school_number',
       'school_region', 'school_code', 'school_phone_number', 'school_address',
       'school_city', 'school_state', 'school_zip', 'school_not_found',
       'school_or_citywide_complaint', 'vehicle_type', 'taxi_company_borough',
       'taxi_pick_up_location', 'bridge_highway_name',
       'bridge_highway_direction', 'road_ramp', 'bridge_highway_segment',
       'garage_lot_name', 'ferry_direction', 'ferry_terminal_name', 'latitude',
       'longitude', 'location'],
      dtype='object')

In [28]:
df['new_created_date'] = pd.to_datetime(df['created_date'], 
                                format='%m/%d/%y %H:%M:%S')

In [29]:
df['new_closed_date'] = pd.to_datetime(df['closed_date'], 
                                format='%m/%d/%y %H:%M:%S')

In [30]:
df['time_open'] = df['new_closed_date'] - df['new_created_date']

In [31]:
df.head()


Out[31]:
unique_key created_date closed_date agency agency_name complaint_type descriptor location_type incident_zip incident_address ... bridge_highway_segment garage_lot_name ferry_direction ferry_terminal_name latitude longitude location new_created_date new_closed_date time_open
0 28792167 2014-08-31 23:59:00 2014-09-01 03:52:00 NYPD New York City Police Department Noise - Street/Sidewalk Loud Music/Party Street/Sidewalk 11222 200 KINGSLAND AVENUE ... None None None None 40.7238883 -73.94134889 (40.723888303549415, -73.94134888943505) 2014-08-31 23:59:00 2014-09-01 03:52:00 03:53:00
1 28789088 2014-08-31 23:56:00 2014-09-01 06:17:00 NYPD New York City Police Department Noise - Vehicle Car/Truck Music Street/Sidewalk 11234 FLATLANDS AVENUE ... None None None None 40.61948901 -73.93805105 (40.61948901090983, -73.93805104516916) 2014-08-31 23:56:00 2014-09-01 06:17:00 06:21:00
2 28791854 2014-08-31 23:54:00 2014-09-01 01:29:00 NYPD New York City Police Department Noise - Commercial Loud Music/Party Club/Bar/Restaurant 10002 161 LUDLOW STREET ... None None None None 40.72141034 -73.98769444 (40.72141034382407, -73.98769444021134) 2014-08-31 23:54:00 2014-09-01 01:29:00 01:35:00
3 28789936 2014-08-31 23:52:00 2014-09-01 02:53:00 NYPD New York City Police Department Noise - Street/Sidewalk Loud Music/Party Street/Sidewalk 10033 624 WEST 182 STREET ... None None None None 40.85016672 -73.93397221 (40.85016671877659, -73.93397220795968) 2014-08-31 23:52:00 2014-09-01 02:53:00 03:01:00
4 28789931 2014-08-31 23:47:00 2014-09-01 01:06:00 NYPD New York City Police Department Noise - Street/Sidewalk Loud Music/Party Street/Sidewalk 11217 525 DEAN STREET ... None None None None 40.68120794 -73.97277535 (40.68120794066068, -73.97277535440028) 2014-08-31 23:47:00 2014-09-01 01:06:00 01:19:00

5 rows × 56 columns


In [32]:
df['time_open'].value_counts()


Out[32]:
0 days 00:21:00     222
0 days 00:10:00     221
0 days 00:36:00     212
0 days 00:33:00     211
0 days 00:31:00     210
0 days 00:27:00     209
0 days 00:25:00     207
0 days 00:28:00     206
0 days 00:23:00     204
0 days 00:26:00     202
0 days 00:32:00     201
0 days 00:30:00     200
0 days 00:38:00     199
0 days 00:44:00     199
0 days 00:42:00     196
0 days 00:17:00     196
0 days 00:48:00     195
0 days 00:34:00     195
0 days 00:49:00     195
0 days 00:41:00     193
0 days 00:22:00     192
0 days 00:11:00     190
0 days 00:40:00     190
0 days 00:24:00     189
0 days 00:20:00     188
0 days 00:14:00     188
0 days 00:53:00     188
0 days 00:39:00     187
0 days 00:37:00     186
0 days 00:35:00     185
                   ... 
2 days 10:31:00       1
2 days 02:26:00       1
3 days 12:39:00       1
0 days 19:11:00       1
1 days 16:44:00       1
12 days 14:25:00      1
2 days 01:27:00       1
2 days 16:03:00       1
0 days 15:09:00       1
2 days 20:54:00       1
0 days 23:09:00       1
10 days 10:52:00      1
1 days 01:58:00       1
4 days 00:40:00       1
3 days 19:55:00       1
10 days 15:37:00      1
1 days 21:14:00       1
14 days 17:01:00      1
1 days 04:58:00       1
7 days 04:12:00       1
2 days 01:59:00       1
1 days 19:53:00       1
3 days 12:02:00       1
1 days 19:59:00       1
3 days 06:12:00       1
1 days 15:57:00       1
12 days 15:18:00      1
14 days 01:31:00      1
3 days 21:17:00       1
1 days 05:38:00       1
Name: time_open, dtype: int64

In [33]:
time_open = pd.DataFrame(df['time_open'][df['time_open'] > pd.Timedelta(minutes=0)].dropna())

In [34]:
time_open


Out[34]:
time_open
0 03:53:00
1 06:21:00
2 01:35:00
3 03:01:00
4 01:19:00
5 03:06:00
6 01:04:00
7 04:18:00
8 04:48:00
9 03:25:00
10 04:52:00
11 02:36:00
12 03:54:00
13 05:13:00
14 03:06:00
15 01:00:00
16 03:27:00
17 13:32:00
18 01:45:00
19 02:48:00
20 05:54:00
21 00:07:00
22 00:33:00
23 02:51:00
24 05:27:00
25 00:21:00
26 02:12:00
27 02:46:00
28 02:56:00
29 05:13:00
... ...
37585 03:02:00
37586 04:14:00
37587 01:51:00
37588 04:24:00
37589 00:42:00
37590 06:16:00
37591 01:51:00
37592 00:39:00
37593 04:47:00
37594 00:47:00
37595 00:50:00
37596 01:58:00
37597 01:28:00
37598 01:57:00
37599 02:07:00
37600 02:21:00
37601 03:05:00
37602 02:56:00
37603 00:55:00
37604 01:14:00
37605 01:35:00
37606 14:02:00
37607 03:12:00
37608 01:42:00
37609 01:12:00
37610 10:24:00
37611 02:24:00
37612 04:38:00
37613 02:14:00
37614 07:38:00

37615 rows × 1 columns


In [35]:
time_open.median()


Out[35]:
time_open   02:02:00
dtype: timedelta64[ns]

In [36]:
time_open.mean()


Out[36]:
time_open   07:44:22.532500
dtype: timedelta64[ns]

In [37]:
time_open.mode()


Out[37]:
time_open
0 00:21:00

In [38]:
time_open['time_open'].quantile(q=0.25)


Out[38]:
Timedelta('0 days 00:54:00')

In [39]:
time_open['time_open'].quantile(q=0.5)


Out[39]:
Timedelta('0 days 02:02:00')

In [40]:
time_open['time_open'].quantile(q=0.75)


Out[40]:
Timedelta('0 days 04:15:00')

In [41]:
iqr = (time_open['time_open'].quantile(q=0.75)) - (time_open['time_open'].quantile(q=0.25))

In [42]:
iqr


Out[42]:
Timedelta('0 days 03:21:00')

In [43]:
ual = (iqr * 1.5) + time_open.median()

In [44]:
ual


Out[44]:
time_open   07:03:30
dtype: timedelta64[ns]

In [45]:
lal = (iqr * 1.5) - time_open.median()

In [46]:
lal


Out[46]:
time_open   02:59:30
dtype: timedelta64[ns]

In [47]:
numpy.std(time_open)


Out[47]:
time_open   1 days 12:06:07.285166
dtype: timedelta64[ns]

In [48]:
time_open.max()


Out[48]:
time_open   48 days 05:45:00
dtype: timedelta64[ns]

In [49]:
time_open.min()


Out[49]:
time_open   00:01:00
dtype: timedelta64[ns]

In [50]:
data_range = time_open.max() - time_open.min()

In [51]:
data_range


Out[51]:
time_open   48 days 05:44:00
dtype: timedelta64[ns]

In [52]:
(time_open.astype('timedelta64[m]')).hist(bins=100, figsize=(10,5))


Out[52]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x112e38f60>]], dtype=object)