Perform a basic statistical analysis of the time DOT 311 (table is called dot_311) complaints are open (subtract closed date from created date)
Host: training.c1erymiua9dx.us-east-1.rds.amazonaws.com Port: 5432 Database: training Username: dot_student Password: qgis
In [1]:
!pip install pg8000
In [2]:
import pg8000
conn = pg8000.connect(host='training.c1erymiua9dx.us-east-1.rds.amazonaws.com', port= 5432, database= 'training', user= 'dot_student', password= 'qgis')
In [3]:
cursor = conn.cursor()
In [4]:
cursor.execute("select column_name from information_schema.columns where table_name='dot_311'")
column_name=[]
for item in cursor.fetchall():
column_name.append(item)
# print(item)
In [5]:
cursor.execute("select * from dot_311")
training=[]
for item in cursor.fetchall():
#print(item)
training.append(item)
In [6]:
import pandas as pd
In [7]:
df = pd.DataFrame(training)
df.columns = ['gid', 'unique_key', 'agency', 'agency nam', 'complaint', 'descriptor', 'location t', 'incident z', 'incident a', 'street nam', 'cross stre', 'cross st_1', 'intersecti', 'intersec_1', 'address ty', 'city', 'landmark', 'facility t', 'status', 'due date', 'resolution', 'resoluti_1', 'community', 'borough', 'x coordina', 'y coordina', 'park facil', 'park borou', 'school nam', 'school num', 'school reg', 'school cod', 'school pho', 'school add', 'school cit', 'school sta', 'school zip', 'school not', 'school or', 'vehicle ty', 'taxi compa', 'taxi pick', 'bridge hig', 'bridge h_1', 'road ramp', 'bridge h_2', 'garage lot', 'ferry dire', 'ferry term', 'latitude', 'longitude', 'location', 'geom', 'created_date', 'closed_date']
In [8]:
df.describe()
Out[8]:
In [139]:
df['closed_date_clean'] = pd.to_datetime(df['closed_date'],
format='%m/%d/%y %H:%M:%S')
In [140]:
df['created_date_clean'] = pd.to_datetime(df['created_date'],
format='%m/%d/%y %H:%M:%S')
In [141]:
df['open_complaints_time'] = df['closed_date_clean'] - df['created_date_clean']
In [142]:
df.columns
Out[142]:
In [152]:
df.head()
Out[152]:
In [153]:
df['open_complaints_time'].mode()
Out[153]:
In [154]:
df['open_complaints_time'].median()
Out[154]:
In [157]:
#How to find corresponding complaint?
#df[df['open_complaints_time'] == '21:48:15']
Out[157]:
In [158]:
df['open_complaints_time'].mean()
Out[158]:
In [161]:
df['open_complaints_time'].max()
Out[161]:
In [163]:
df['open_complaints_time'].min()
#WHAT WHY
Out[163]:
In [164]:
df['open_complaints_time'].describe()
Out[164]:
In [165]:
df['open_complaints_time'].quantile(q=0.25)
Out[165]:
In [166]:
df['open_complaints_time'].quantile(q=0.75)
Out[166]:
In [ ]: