In [13]:
import pandas as pd
import pg8000
conn = pg8000.connect(user='dot_student', host='training.c1erymiua9dx.us-east-1.rds.amazonaws.com', port=5432, database='training', password='qgis')
import dateutil.parser

In [14]:
conn.rollback()

In [15]:
cursor = conn.cursor()
print(type(cursor))


<class 'pg8000.core.Cursor'>

In [16]:
cursor.execute("select * from noise_311")

In [17]:
df = pd.read_sql("select * from noise_311", conn)

In [18]:
df.head()


Out[18]:
b'unique_key' b'created_date' b'closed_date' b'agency' b'agency_name' b'complaint_type' b'descriptor' b'location_type' b'incident_zip' b'incident_address' ... b'bridge_highway_name' b'bridge_highway_direction' b'road_ramp' b'bridge_highway_segment' b'garage_lot_name' b'ferry_direction' b'ferry_terminal_name' b'latitude' b'longitude' b'location'
0 28792167 2014-08-31 23:59:00 2014-09-01 03:52:00 NYPD New York City Police Department Noise - Street/Sidewalk Loud Music/Party Street/Sidewalk 11222 200 KINGSLAND AVENUE ... None None None None None None None 40.723888 -73.941349 (40.723888303549415, -73.94134888943505)
1 28789088 2014-08-31 23:56:00 2014-09-01 06:17:00 NYPD New York City Police Department Noise - Vehicle Car/Truck Music Street/Sidewalk 11234 FLATLANDS AVENUE ... None None None None None None None 40.619489 -73.938051 (40.61948901090983, -73.93805104516916)
2 28791854 2014-08-31 23:54:00 2014-09-01 01:29:00 NYPD New York City Police Department Noise - Commercial Loud Music/Party Club/Bar/Restaurant 10002 161 LUDLOW STREET ... None None None None None None None 40.721410 -73.987694 (40.72141034382407, -73.98769444021134)
3 28789936 2014-08-31 23:52:00 2014-09-01 02:53:00 NYPD New York City Police Department Noise - Street/Sidewalk Loud Music/Party Street/Sidewalk 10033 624 WEST 182 STREET ... None None None None None None None 40.850167 -73.933972 (40.85016671877659, -73.93397220795968)
4 28789931 2014-08-31 23:47:00 2014-09-01 01:06:00 NYPD New York City Police Department Noise - Street/Sidewalk Loud Music/Party Street/Sidewalk 11217 525 DEAN STREET ... None None None None None None None 40.681208 -73.972775 (40.68120794066068, -73.97277535440028)

5 rows × 53 columns


In [20]:
df.columns =[                    b'unique_key',                   'opened',
                          'closed',                         b'agency',
                          b'agency_name',                 b'complaint_type',
                           b'descriptor',                  b'location_type',
                         b'incident_zip',               b'incident_address',
                          b'street_name',                 b'cross_street_1',
                       b'cross_street_2',          b'intersection_street_1',
                b'intersection_street_2',                   b'address_type',
                                 b'city',                       b'landmark',
                        b'facility_type',                         b'status',
                             b'due_date',         b'resolution_description',
       b'resolution_action_updated_date',                b'community_board',
                              b'borough',                   b'x_coordinate',
                         b'y_coordinate',             b'park_facility_name',
                         b'park_borough',                    b'school_name',
                        b'school_number',                  b'school_region',
                          b'school_code',            b'school_phone_number',
                       b'school_address',                    b'school_city',
                         b'school_state',                     b'school_zip',
                     b'school_not_found',   b'school_or_citywide_complaint',
                         b'vehicle_type',           b'taxi_company_borough',
                b'taxi_pick_up_location',            b'bridge_highway_name',
             b'bridge_highway_direction',                      b'road_ramp',
               b'bridge_highway_segment',                b'garage_lot_name',
                      b'ferry_direction',            b'ferry_terminal_name',
                             b'latitude',                      b'longitude',
                             b'location']

In [21]:
df['open_time'] = (df['closed']-df['opened']).astype('timedelta64[h]')

In [28]:
df['open_time'].describe()


Out[28]:
count    37615.000000
mean         7.261146
std         36.100546
min          0.000000
25%          0.000000
50%          2.000000
75%          4.000000
max       1157.000000
Name: open_time, dtype: float64

In [30]:
#The data is cleaner and less standard deviation. No NaN values, no NaT values.

In [ ]: