In [54]:
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
%matplotlib inline
from datetime import timedelta
import pg8000
conn = pg8000.connect(host='training.c1erymiua9dx.us-east-1.rds.amazonaws.com', database="training", port=5432, user='dot_student', password='qgis')
In [55]:
cursor = conn.cursor()
statement = "SELECT * FROM information_schema.columns WHERE table_name= 'noise_311'"
cursor.execute(statement)
column_names = []
for row in cursor.fetchall():
column_names.append(row[3])
print(column_names)
In [56]:
statement = "SELECT created_date, closed_date, agency, complaint_type FROM noise_311"
cursor.execute(statement)
complaint_data = []
for row in cursor:
complaint_data.append(row)
In [77]:
df = pd.DataFrame(complaint_data)
df.columns = ['created date', 'closed date', 'agency', 'complaint type']
df.head(3)
Out[77]:
In [60]:
pd.value_counts(df['complaint type'].values, sort=True)
Out[60]:
In [62]:
date1 = df['closed date']
date2 = df['created date']
timeopen = date1 - date2
df1 = pd.DataFrame(timeopen)
df1.columns = ['Time Taken']
df1.describe()
Out[62]:
In [63]:
timeopen.astype('timedelta64[h]')
a = (timeopen / pd.Timedelta(hours=1)).dropna()
plottable_dates = pd.DataFrame(a)
plottable_dates.describe()
Out[63]:
In [78]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
n, bins, patches = ax.hist(plottable_dates, bins=100, range=(0, 25), histtype='bar')
for patch in patches:
patch.set_facecolor('darkgrey')
plt.title('Time Taken to Respond to Noise Complaints')
plt.xlabel('Time (in hours)')
Out[78]:
Time taken to respond to noise complaints is, overall, shorter, and there are no questionable negative times.