In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
import dateutil.parser
In [2]:
import pg8000
conn = pg8000.connect(host='training.c1erymiua9dx.us-east-1.rds.amazonaws.com', database="training", port=5432, user='dot_student', password='qgis')
cursor = conn.cursor()
In [3]:
# Getting the column names
statement = "select * from INFORMATION_SCHEMA.COLUMNS where table_name = 'dot_311'"
cursor.execute(statement)
columns = []
for row in cursor.fetchall():
print(row)
columns.append(row[3])
print(*columns)
['training', 'public', 'dot_311', 'gid', 1, "nextval('dot_311_gid_seq'::regclass)", 'NO', 'integer', None, None, 32, 2, 0, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'int4', None, None, None, None, '1', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'unique_key', 2, None, 'YES', 'numeric', None, None, 10, 10, 0, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'numeric', None, None, None, None, '2', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'agency', 5, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '5', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'agency nam', 6, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '6', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'complaint', 7, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '7', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'descriptor', 8, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '8', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'location t', 9, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '9', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'incident z', 10, None, 'YES', 'numeric', None, None, 10, 10, 0, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'numeric', None, None, None, None, '10', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'incident a', 11, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '11', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'street nam', 12, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '12', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'cross stre', 13, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '13', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'cross st_1', 14, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '14', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'intersecti', 15, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '15', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'intersec_1', 16, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '16', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'address ty', 17, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '17', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'city', 18, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '18', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'landmark', 19, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '19', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'facility t', 20, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '20', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'status', 21, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '21', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'due date', 22, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '22', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'resolution', 23, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '23', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'resoluti_1', 24, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '24', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'community', 25, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '25', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'borough', 26, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '26', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'x coordina', 27, None, 'YES', 'numeric', None, None, 10, 10, 0, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'numeric', None, None, None, None, '27', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'y coordina', 28, None, 'YES', 'numeric', None, None, 10, 10, 0, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'numeric', None, None, None, None, '28', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'park facil', 29, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '29', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'park borou', 30, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '30', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school nam', 31, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '31', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school num', 32, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '32', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school reg', 33, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '33', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school cod', 34, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '34', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school pho', 35, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '35', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school add', 36, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '36', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school cit', 37, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '37', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school sta', 38, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '38', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school zip', 39, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '39', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school not', 40, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '40', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'school or', 41, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '41', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'vehicle ty', 42, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '42', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'taxi compa', 43, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '43', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'taxi pick', 44, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '44', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'bridge hig', 45, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '45', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'bridge h_1', 46, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '46', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'road ramp', 47, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '47', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'bridge h_2', 48, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '48', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'garage lot', 49, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '49', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'ferry dire', 50, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '50', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'ferry term', 51, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '51', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'latitude', 52, None, 'YES', 'numeric', None, None, None, 10, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'numeric', None, None, None, None, '52', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'longitude', 53, None, 'YES', 'numeric', None, None, None, 10, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'numeric', None, None, None, None, '53', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'location', 54, None, 'YES', 'character varying', 254, 1016, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'varchar', None, None, None, None, '54', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'geom', 55, None, 'YES', 'USER-DEFINED', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'training', 'public', 'geometry', None, None, None, None, '55', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'created_date', 56, None, 'YES', 'timestamp without time zone', None, None, None, None, None, 6, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'timestamp', None, None, None, None, '56', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
['training', 'public', 'dot_311', 'closed_date', 57, None, 'YES', 'timestamp without time zone', None, None, None, None, None, 6, None, None, None, None, None, None, None, None, None, None, None, 'training', 'pg_catalog', 'timestamp', None, None, None, None, '57', 'NO', 'NO', None, None, None, None, None, None, 'NEVER', None, 'YES']
gid unique_key agency agency nam complaint descriptor location t incident z incident a street nam cross stre cross st_1 intersecti intersec_1 address ty city landmark facility t status due date resolution resoluti_1 community borough x coordina y coordina park facil park borou school nam school num school reg school cod school pho school add school cit school sta school zip school not school or vehicle ty taxi compa taxi pick bridge hig bridge h_1 road ramp bridge h_2 garage lot ferry dire ferry term latitude longitude location geom created_date closed_date
In [4]:
statement = "SELECT unique_key, complaint, created_date, closed_date FROM dot_311"
cursor.execute(statement)
complaints = []
for row in cursor:
complaints.append(row)
In [5]:
import pandas as pd
df = pd.DataFrame(complaints)
df.columns = ['unique_key', 'complaint', 'created_date', 'closed_date']
df_original = df.copy()
In [ ]:
In [6]:
df['time'] = df['closed_date']-df['created_date']
df.head(3)
Out[6]:
unique_key
complaint
created_date
closed_date
time
0
32570549
Traffic Signal Condition
2016-02-01 00:12:00
2016-02-01 01:15:00
01:03:00
1
32572958
Traffic Signal Condition
2016-02-01 00:14:00
2016-02-01 00:54:00
00:40:00
2
32573576
Traffic Signal Condition
2016-02-01 00:15:00
2016-02-01 01:30:00
01:15:00
In [7]:
df['time'].describe()
Out[7]:
count 5650
mean 6 days 15:05:05.589380
std 14 days 12:05:38.260805
min -19 days +09:29:00
25% 0 days 01:14:00
50% 0 days 21:48:15
75% 4 days 00:30:48.500000
max 89 days 18:54:00
Name: time, dtype: object
In [8]:
df['time'].value_counts().sort_values(ascending=False).head(10)
Out[8]:
0 days 00:00:00 521
-3 days +00:00:00 184
-6 days +00:00:00 112
-2 days +00:00:00 27
-2 days +23:59:00 26
0 days 00:20:00 23
0 days 01:32:00 10
0 days 01:26:00 10
0 days 02:02:00 9
0 days 01:34:00 9
Name: time, dtype: int64
In [9]:
df = df[df['time'] > pd.Timedelta(hours=0)].dropna()
df['time'].describe()
Out[9]:
count 4547
mean 8 days 16:15:03.514405
std 15 days 10:48:08.619990
min 0 days 00:00:01
25% 0 days 12:41:58
50% 1 days 03:33:17
75% 6 days 00:30:23.500000
max 89 days 18:54:00
Name: time, dtype: object
In [10]:
df['time'].value_counts().sort_values(ascending=False).head(10)
Out[10]:
00:20:00 23
01:32:00 10
01:26:00 10
02:02:00 9
00:25:00 9
01:34:00 9
01:27:00 9
00:40:00 9
00:45:00 8
01:42:00 8
Name: time, dtype: int64
In [11]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 4547 entries, 0 to 5850
Data columns (total 5 columns):
unique_key 4547 non-null object
complaint 4547 non-null object
created_date 4547 non-null datetime64[ns]
closed_date 4547 non-null datetime64[ns]
time 4547 non-null timedelta64[ns]
dtypes: datetime64[ns](2), object(2), timedelta64[ns](1)
memory usage: 213.1+ KB
In [12]:
df['time'].astype('timedelta64[h]').head(5)
Out[12]:
0 1.0
1 0.0
2 1.0
3 2.0
4 0.0
Name: time, dtype: float64
In [14]:
df['time']=(df['time'])/(pd.Timedelta(hours=1))
df['time'].head(10)
Out[14]:
0 1.050000
1 0.666667
2 1.250000
3 2.300000
4 0.683333
5 1.650000
6 1.133333
7 0.633333
8 0.166667
9 21.962222
Name: time, dtype: float64
In [19]:
df['time'].describe()
Out[19]:
count 4547.000000
mean 208.250976
std 370.802394
min 0.000278
25% 12.699444
50% 27.554722
75% 144.506528
max 2154.900000
Name: time, dtype: float64
In [26]:
df['time'].hist(bins=100)
plt.xlabel('Time (hours)')
plt.ylabel('# of complaints')
Out[26]:
<matplotlib.text.Text at 0x1896a0f0>
In [28]:
# Frequency Table
graph01 = df['complaint'].value_counts().head(3).sort_values(ascending=True).plot(kind='barh')
print("Top 3 popular type of complaint\n")
print(df['complaint'].value_counts().head(3).sort_values(ascending=False))
graph01
Top 3 popular type of complaint
Street Condition 1707
Street Light Condition 983
Traffic Signal Condition 807
Name: complaint, dtype: int64
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x18a7d518>
In [29]:
df.index = df['created_date']
df.head(2)
Out[29]:
unique_key
complaint
created_date
closed_date
time
created_date
2016-02-01 00:12:00
32570549
Traffic Signal Condition
2016-02-01 00:12:00
2016-02-01 01:15:00
1.050000
2016-02-01 00:14:00
32572958
Traffic Signal Condition
2016-02-01 00:14:00
2016-02-01 00:54:00
0.666667
In [30]:
df.groupby(df.index.hour).count().plot(y='unique_key',kind='bar')
Out[30]:
<matplotlib.axes._subplots.AxesSubplot at 0x18af2128>
In [ ]:
Content source: ledeprogram/algorithms
Similar notebooks: