In [13]:
import pandas as pd
import pg8000
conn = pg8000.connect(user='dot_student', host='training.c1erymiua9dx.us-east-1.rds.amazonaws.com', port=5432, database='training', password='qgis')
import dateutil.parser
In [14]:
conn.rollback()
In [15]:
cursor = conn.cursor()
print(type(cursor))
In [16]:
cursor.execute("select * from noise_311")
In [17]:
df = pd.read_sql("select * from noise_311", conn)
In [18]:
df.head()
Out[18]:
In [20]:
df.columns =[ b'unique_key', 'opened',
'closed', b'agency',
b'agency_name', b'complaint_type',
b'descriptor', b'location_type',
b'incident_zip', b'incident_address',
b'street_name', b'cross_street_1',
b'cross_street_2', b'intersection_street_1',
b'intersection_street_2', b'address_type',
b'city', b'landmark',
b'facility_type', b'status',
b'due_date', b'resolution_description',
b'resolution_action_updated_date', b'community_board',
b'borough', b'x_coordinate',
b'y_coordinate', b'park_facility_name',
b'park_borough', b'school_name',
b'school_number', b'school_region',
b'school_code', b'school_phone_number',
b'school_address', b'school_city',
b'school_state', b'school_zip',
b'school_not_found', b'school_or_citywide_complaint',
b'vehicle_type', b'taxi_company_borough',
b'taxi_pick_up_location', b'bridge_highway_name',
b'bridge_highway_direction', b'road_ramp',
b'bridge_highway_segment', b'garage_lot_name',
b'ferry_direction', b'ferry_terminal_name',
b'latitude', b'longitude',
b'location']
In [21]:
df['open_time'] = (df['closed']-df['opened']).astype('timedelta64[h]')
In [28]:
df['open_time'].describe()
Out[28]:
In [30]:
#The data is cleaner and less standard deviation. No NaN values, no NaT values.
In [ ]: