In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
import dateutil.parser

In [2]:
import pg8000
conn = pg8000.connect(host='training.c1erymiua9dx.us-east-1.rds.amazonaws.com', database="training", port=5432, user='dot_student', password='qgis')
cursor = conn.cursor()

In [3]:
# Getting the column names
statement = "select * from INFORMATION_SCHEMA.COLUMNS where table_name = 'dot_311'"
cursor.execute(statement)
columns = []
for row in cursor.fetchall():
    print(row)
    columns.append(row[3])
print(*columns)


['training', 'public', 'dot_311', 'gid', 1, "nextval('dot_311_gid_seq'::regclass)", 'NO', 'integer', None, None, 32, 2, 0, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'int4', None, None, None, None, '1', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'unique_key', 2, None, 'YES', 'numeric', None, None, 10, 10, 0, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'numeric', None, None, None, None, '2', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'agency', 5, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '5', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'agency nam', 6, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '6', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'complaint', 7, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '7', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'descriptor', 8, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '8', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'location t', 9, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '9', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'incident z', 10, None, 'YES', 'numeric', None, None, 10, 10, 0, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'numeric', None, None, None, None, '10', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'incident a', 11, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '11', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'street nam', 12, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '12', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'cross stre', 13, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '13', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'cross st_1', 14, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '14', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'intersecti', 15, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '15', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'intersec_1', 16, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '16', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'address ty', 17, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '17', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'city', 18, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '18', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'landmark', 19, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '19', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'facility t', 20, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '20', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'status', 21, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '21', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'due date', 22, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '22', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'resolution', 23, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '23', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'resoluti_1', 24, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '24', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'community', 25, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '25', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'borough', 26, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '26', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'x coordina', 27, None, 'YES', 'numeric', None, None, 10, 10, 0, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'numeric', None, None, None, None, '27', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'y coordina', 28, None, 'YES', 'numeric', None, None, 10, 10, 0, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'numeric', None, None, None, None, '28', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'park facil', 29, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '29', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'park borou', 30, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '30', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school nam', 31, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '31', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school num', 32, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '32', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school reg', 33, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '33', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school cod', 34, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '34', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school pho', 35, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '35', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school add', 36, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '36', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school cit', 37, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '37', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school sta', 38, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '38', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school zip', 39, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '39', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school not', 40, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '40', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school or', 41, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '41', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'vehicle ty', 42, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '42', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'taxi compa', 43, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '43', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'taxi pick', 44, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '44', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'bridge hig', 45, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '45', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'bridge h_1', 46, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '46', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'road ramp', 47, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '47', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'bridge h_2', 48, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '48', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'garage lot', 49, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '49', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'ferry dire', 50, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '50', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'ferry term', 51, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '51', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'latitude', 52, None, 'YES', 'numeric', None, None, None, 10, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'numeric', None, None, None, None, '52', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'longitude', 53, None, 'YES', 'numeric', None, None, None, 10, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'numeric', None, None, None, None, '53', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'location', 54, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '54', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'geom', 55, None, 'YES', 'USER-DEFINED', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'public', 'geometry', None, None, None, None, '55', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'created_date', 56, None, 'YES', 'timestamp without time zone', None, None, None, None, None, 6, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'timestamp', None, None, None, None, '56', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'closed_date', 57, None, 'YES', 'timestamp without time zone', None, None, None, None, None, 6, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'timestamp', None, None, None, None, '57', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
gid unique_key agency agency nam complaint descriptor location t incident z incident a street nam cross stre cross st_1 intersecti intersec_1 address ty city landmark facility t status due date resolution resoluti_1 community borough x coordina y coordina park facil park borou school nam school num school reg school cod school pho school add school cit school sta school zip school not school or vehicle ty taxi compa taxi pick bridge hig bridge h_1 road ramp bridge h_2 garage lot ferry dire ferry term latitude longitude location geom created_date closed_date

In [4]:
statement = "SELECT unique_key, complaint, created_date, closed_date FROM dot_311"
cursor.execute(statement)
complaints = []
for row in cursor:
    complaints.append(row)

In [5]:
import pandas as pd
df = pd.DataFrame(complaints)
df.columns = ['unique_key', 'complaint', 'created_date', 'closed_date']
df_original = df.copy()

In [ ]:


In [6]:
df['time'] = df['closed_date']-df['created_date']
df.head(3)


Out[6]:
unique_key complaint created_date closed_date time
0 32570549 Traffic Signal Condition 2016-02-01 00:12:00 2016-02-01 01:15:00 01:03:00
1 32572958 Traffic Signal Condition 2016-02-01 00:14:00 2016-02-01 00:54:00 00:40:00
2 32573576 Traffic Signal Condition 2016-02-01 00:15:00 2016-02-01 01:30:00 01:15:00

In [7]:
df['time'].describe()


Out[7]:
count                       5650
mean      6 days 15:05:05.589380
std      14 days 12:05:38.260805
min           -19 days +09:29:00
25%              0 days 01:14:00
50%              0 days 21:48:15
75%       4 days 00:30:48.500000
max             89 days 18:54:00
Name: time, dtype: object

In [8]:
df['time'].value_counts().sort_values(ascending=False).head(10)


Out[8]:
0 days 00:00:00      521
-3 days +00:00:00    184
-6 days +00:00:00    112
-2 days +00:00:00     27
-2 days +23:59:00     26
0 days 00:20:00       23
0 days 01:32:00       10
0 days 01:26:00       10
0 days 02:02:00        9
0 days 01:34:00        9
Name: time, dtype: int64

In [9]:
df = df[df['time'] > pd.Timedelta(hours=0)].dropna()
df['time'].describe()


Out[9]:
count                       4547
mean      8 days 16:15:03.514405
std      15 days 10:48:08.619990
min              0 days 00:00:01
25%              0 days 12:41:58
50%              1 days 03:33:17
75%       6 days 00:30:23.500000
max             89 days 18:54:00
Name: time, dtype: object

In [10]:
df['time'].value_counts().sort_values(ascending=False).head(10)


Out[10]:
00:20:00    23
01:32:00    10
01:26:00    10
02:02:00     9
00:25:00     9
01:34:00     9
01:27:00     9
00:40:00     9
00:45:00     8
01:42:00     8
Name: time, dtype: int64

In [11]:
df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 4547 entries, 0 to 5850
Data columns (total 5 columns):
unique_key      4547 non-null object
complaint       4547 non-null object
created_date    4547 non-null datetime64[ns]
closed_date     4547 non-null datetime64[ns]
time            4547 non-null timedelta64[ns]
dtypes: datetime64[ns](2), object(2), timedelta64[ns](1)
memory usage: 213.1+ KB

In [12]:
df['time'].astype('timedelta64[h]').head(5)


Out[12]:
0    1.0
1    0.0
2    1.0
3    2.0
4    0.0
Name: time, dtype: float64

In [14]:
df['time']=(df['time'])/(pd.Timedelta(hours=1))
df['time'].head(10)


Out[14]:
0     1.050000
1     0.666667
2     1.250000
3     2.300000
4     0.683333
5     1.650000
6     1.133333
7     0.633333
8     0.166667
9    21.962222
Name: time, dtype: float64

In [19]:
df['time'].describe()


Out[19]:
count    4547.000000
mean      208.250976
std       370.802394
min         0.000278
25%        12.699444
50%        27.554722
75%       144.506528
max      2154.900000
Name: time, dtype: float64

In [26]:
df['time'].hist(bins=100)
plt.xlabel('Time (hours)')
plt.ylabel('# of complaints')


Out[26]:
<matplotlib.text.Text at 0x1896a0f0>

Additional Analysis


In [28]:
# Frequency Table

graph01 = df['complaint'].value_counts().head(3).sort_values(ascending=True).plot(kind='barh')
print("Top 3 popular type of complaint\n")
print(df['complaint'].value_counts().head(3).sort_values(ascending=False))
graph01


Top 3 popular type of complaint

Street Condition            1707
Street Light Condition       983
Traffic Signal Condition     807
Name: complaint, dtype: int64
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x18a7d518>

In [29]:
df.index = df['created_date']
df.head(2)


Out[29]:
unique_key complaint created_date closed_date time
created_date
2016-02-01 00:12:00 32570549 Traffic Signal Condition 2016-02-01 00:12:00 2016-02-01 01:15:00 1.050000
2016-02-01 00:14:00 32572958 Traffic Signal Condition 2016-02-01 00:14:00 2016-02-01 00:54:00 0.666667

In [30]:
df.groupby(df.index.hour).count().plot(y='unique_key',kind='bar')


Out[30]:
<matplotlib.axes._subplots.AxesSubplot at 0x18af2128>

In [ ]: