In [1]:
import pg8000
import datetime
import pandas as pd
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
In [2]:
conn = pg8000.connect(database= 'training', port = 5432, host = 'training.c1erymiua9dx.us-east-1.rds.amazonaws.com', user = 'dot_student', password = 'qgis')
In [34]:
conn.rollback()
In [3]:
cursor = conn.cursor()
In [4]:
cursor.execute("""SELECT column_name FROM information_schema.columns WHERE table_name='noise_311'""")
cursor.fetchall()
Out[4]:
In [5]:
cursor.execute("""SELECT created_date, closed_date
FROM noise_311
LIMIT 1""")
cursor.fetchone()
Out[5]:
In [6]:
cursor.execute("""SELECT created_date, closed_date
FROM noise_311""")
results = cursor.fetchall()
In [7]:
timelen = []
for created, closed in results:
if created and closed:
dif = closed - created
timelen.append(dif.days * 1440 + dif.seconds/60)
How many total results:
In [8]:
len(results)
Out[8]:
How many results had both a created and closed time. They all do!
In [9]:
len(timelen)
Out[9]:
And all of these are a few seconds long!
In [10]:
num = 0
for time in timelen:
if time <= 0:
num += 1
print(num)
In [13]:
df = pd.DataFrame({'timelen': timelen})
In [14]:
df.hist(bins = 28, range = (-120, 1560))
Out[14]:
In [15]:
df['timelen'].mean()
Out[15]:
In [16]:
df['timelen'].median()
Out[16]:
In [17]:
df['timelen'].mode()
Out[17]:
In [19]:
max(df['timelen']) - min(df['timelen'])
Out[19]:
In [20]:
df['timelen'].std()
Out[20]:
In [21]:
df['timelen'].quantile(q=0.75) - df['timelen'].quantile(q=0.25)
Out[21]:
In [ ]: