In [53]:
import pandas as pd
import pg8000
import dateutil.parser
%matplotlib inline
In [38]:
conn = pg8000.connect(host="training.c1erymiua9dx.us-east-1.rds.amazonaws.com", user='dot_student', password='qgis', database='training')
cursor = conn.cursor()
In [39]:
cursor.execute("select * from dot_311")
data = []
for row in cursor.fetchall():
data.append(row)
In [40]:
#column name
statement = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS WHERE table_name = 'dot_311'"
cursor.execute(statement)
columns = []
for row in cursor.fetchall():
columns.append(row[0])
print(columns)
['gid', 'unique_key', 'agency', 'agency nam', 'complaint', 'descriptor', 'location t', 'incident z', 'incident a', 'street nam', 'cross stre', 'cross st_1', 'intersecti', 'intersec_1', 'address ty', 'city', 'landmark', 'facility t', 'status', 'due date', 'resolution', 'resoluti_1', 'community', 'borough', 'x coordina', 'y coordina', 'park facil', 'park borou', 'school nam', 'school num', 'school reg', 'school cod', 'school pho', 'school add', 'school cit', 'school sta', 'school zip', 'school not', 'school or', 'vehicle ty', 'taxi compa', 'taxi pick', 'bridge hig', 'bridge h_1', 'road ramp', 'bridge h_2', 'garage lot', 'ferry dire', 'ferry term', 'latitude', 'longitude', 'location', 'geom', 'created_date', 'closed_date']
In [41]:
df=pd.DataFrame(data=data, columns=columns)
In [42]:
df.head()
Out[42]:
gid
unique_key
agency
agency nam
complaint
descriptor
location t
incident z
incident a
street nam
...
bridge h_2
garage lot
ferry dire
ferry term
latitude
longitude
location
geom
created_date
closed_date
0
2
32570549
DOT
Department of Transportation
Traffic Signal Condition
Controller
None
10301
None
None
...
None
None
None
None
40.613803753157917
-74.113373738864198
(40.61380375315792, -74.1133737388642)
0101000020E61000006829ED83418752C0D7FC121F914E...
2016-02-01 00:12:00
2016-02-01 01:15:00
1
3
32572958
DOT
Department of Transportation
Traffic Signal Condition
Controller
None
10301
None
None
...
None
None
None
None
40.626545297464929
-74.091582334480577
(40.62654529746493, -74.09158233448058)
0101000020E610000011DF267CDC8552C08809E5A23250...
2016-02-01 00:14:00
2016-02-01 00:54:00
2
4
32573576
DOT
Department of Transportation
Traffic Signal Condition
Controller
None
10310
None
None
...
None
None
None
None
40.629249554787492
-74.123220752604595
(40.62924955478749, -74.1232207526046)
0101000020E610000004A84BD9E28752C0DC6AD93F8B50...
2016-02-01 00:15:00
2016-02-01 01:30:00
3
5
32572389
DOT
Department of Transportation
Traffic Signal Condition
Controller
None
None
None
None
...
None
None
None
None
None
None
None
0101000020E610000004A84BD9E28752C0DC6AD93F8B50...
2016-02-01 00:17:00
2016-02-01 02:35:00
4
6
32570548
DOT
Department of Transportation
Traffic Signal Condition
Controller
None
None
None
None
...
None
None
None
None
None
None
None
0101000020E610000004A84BD9E28752C0DC6AD93F8B50...
2016-02-01 00:19:00
2016-02-01 01:00:00
5 rows × 55 columns
In [43]:
new_df=df[['unique_key','agency nam','complaint','created_date','closed_date','status','due date','borough']]
In [44]:
new_df.columns
Out[44]:
Index(['unique_key', 'agency nam', 'complaint', 'created_date', 'closed_date',
'status', 'due date', 'borough'],
dtype='object')
In [46]:
new_df['new_created_date'] = pd.to_datetime(new_df['created_date'],format='%m/%d/%y %H:%M:%S')
/usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
if __name__ == '__main__':
In [47]:
new_df['new_closed_date'] = pd.to_datetime(new_df['closed_date'],format='%m/%d/%y %H:%M:%S')
/usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
if __name__ == '__main__':
In [48]:
new_df['time']=new_df['closed_date']-new_df['created_date']
/usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
if __name__ == '__main__':
In [49]:
# clean the dirty data
def time_to_day(td):
try: #not sure all the NaN data types, use try
return td.days
except:
return 0 #turn all the null number into 0
new_df['cleantime']=new_df['time'].apply(time_to_day)
/usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:7: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
In [50]:
new_df['cleantime'].describe()
Out[50]:
count 5851.000000
mean 6.021022
std 14.192718
min -19.000000
25% 0.000000
50% 0.000000
75% 3.000000
max 89.000000
Name: cleantime, dtype: float64
In [73]:
df1 = new_df[new_df['cleantime']>0] #remove the negative cleantime
In [74]:
#standard deviation: average distance of each data point from mean
df1
Out[74]:
unique_key
agency nam
complaint
created_date
closed_date
status
due date
borough
new_created_date
new_closed_date
time
cleantime
11
32570682
Department of Transportation
Street Light Condition
2016-02-01 00:44:00
2016-02-08 12:29:00
Closed
None
BROOKLYN
2016-02-01 00:44:00
2016-02-08 12:29:00
7 days 11:45:00
7
12
32572906
Department of Transportation
Traffic Signal Condition
2016-02-01 00:50:00
2016-02-10 09:10:00
Closed
None
BROOKLYN
2016-02-01 00:50:00
2016-02-10 09:10:00
9 days 08:20:00
9
16
32570317
Department of Transportation
Street Condition
2016-02-01 01:18:49
2016-02-03 16:59:00
Closed
03/02/2016 01:18:49 AM
BROOKLYN
2016-02-01 01:18:49
2016-02-03 16:59:00
2 days 15:40:11
2
17
32581124
Department of Transportation
Street Light Condition
2016-02-01 01:20:00
2016-02-05 12:04:00
Closed
None
QUEENS
2016-02-01 01:20:00
2016-02-05 12:04:00
4 days 10:44:00
4
18
32579277
Department of Transportation
Street Light Condition
2016-02-01 01:36:00
2016-02-04 10:37:00
Closed
None
BRONX
2016-02-01 01:36:00
2016-02-04 10:37:00
3 days 09:01:00
3
19
32580720
Department of Transportation
Street Light Condition
2016-02-01 01:36:00
2016-02-02 13:09:00
Closed
None
STATEN ISLAND
2016-02-01 01:36:00
2016-02-02 13:09:00
1 days 11:33:00
1
21
32580285
Department of Transportation
Street Light Condition
2016-02-01 02:07:00
2016-02-08 15:25:00
Closed
None
QUEENS
2016-02-01 02:07:00
2016-02-08 15:25:00
7 days 13:18:00
7
22
32575419
Department of Transportation
Sidewalk Condition
2016-02-01 02:32:08
2016-02-04 12:06:14
Closed
04/01/2016 02:32:08 AM
BROOKLYN
2016-02-01 02:32:08
2016-02-04 12:06:14
3 days 09:34:06
3
24
32581170
Department of Transportation
Street Light Condition
2016-02-01 03:21:00
2016-02-02 14:57:00
Closed
None
BROOKLYN
2016-02-01 03:21:00
2016-02-02 14:57:00
1 days 11:36:00
1
27
32577440
Department of Transportation
Street Light Condition
2016-02-01 03:26:00
2016-02-03 10:03:00
Closed
None
MANHATTAN
2016-02-01 03:26:00
2016-02-03 10:03:00
2 days 06:37:00
2
30
32618578
Department of Transportation
Street Light Condition
2016-02-06 15:22:00
2016-02-08 15:23:00
Closed
None
BRONX
2016-02-06 15:22:00
2016-02-08 15:23:00
2 days 00:01:00
2
31
32580055
Department of Transportation
Street Condition
2016-02-01 03:59:57
2016-02-02 07:25:35
Closed
05/01/2016 03:59:57 AM
BRONX
2016-02-01 03:59:57
2016-02-02 07:25:35
1 days 03:25:38
1
32
32574624
Department of Transportation
Traffic Signal Condition
2016-02-01 04:05:00
2016-03-31 09:45:00
Closed
None
QUEENS
2016-02-01 04:05:00
2016-03-31 09:45:00
59 days 05:40:00
59
34
32576077
Department of Transportation
Traffic Signal Condition
2016-02-01 04:48:00
2016-02-24 11:15:00
Closed
None
QUEENS
2016-02-01 04:48:00
2016-02-24 11:15:00
23 days 06:27:00
23
37
32577388
Department of Transportation
Street Light Condition
2016-02-01 05:46:00
2016-02-02 13:12:00
Closed
None
BRONX
2016-02-01 05:46:00
2016-02-02 13:12:00
1 days 07:26:00
1
38
32578910
Department of Transportation
Traffic Signal Condition
2016-02-01 05:59:00
2016-04-04 08:30:00
Closed
None
MANHATTAN
2016-02-01 05:59:00
2016-04-04 08:30:00
63 days 02:31:00
63
43
32577924
Department of Transportation
Sidewalk Condition
2016-02-01 06:48:29
2016-02-02 10:12:11
Closed
02/04/2016 06:48:29 AM
BRONX
2016-02-01 06:48:29
2016-02-02 10:12:11
1 days 03:23:42
1
45
32578695
Department of Transportation
Street Condition
2016-02-01 06:56:07
2016-02-02 09:10:00
Closed
None
STATEN ISLAND
2016-02-01 06:56:07
2016-02-02 09:10:00
1 days 02:13:53
1
47
32574619
Department of Transportation
Traffic Signal Condition
2016-02-01 06:57:00
2016-02-02 09:45:00
Closed
None
MANHATTAN
2016-02-01 06:57:00
2016-02-02 09:45:00
1 days 02:48:00
1
49
32588484
Department of Transportation
Street Condition
2016-02-01 07:00:00
2016-02-02 12:54:00
Closed
None
STATEN ISLAND
2016-02-01 07:00:00
2016-02-02 12:54:00
1 days 05:54:00
1
51
32587340
Department of Transportation
Street Condition
2016-02-01 07:00:00
2016-02-02 09:55:00
Closed
None
STATEN ISLAND
2016-02-01 07:00:00
2016-02-02 09:55:00
1 days 02:55:00
1
52
32586269
Department of Transportation
Street Condition
2016-02-01 07:00:00
2016-02-02 10:40:00
Closed
None
STATEN ISLAND
2016-02-01 07:00:00
2016-02-02 10:40:00
1 days 03:40:00
1
53
32585093
Department of Transportation
Street Condition
2016-02-01 07:00:00
2016-02-02 13:30:00
Closed
None
STATEN ISLAND
2016-02-01 07:00:00
2016-02-02 13:30:00
1 days 06:30:00
1
54
32586236
Department of Transportation
Street Condition
2016-02-01 07:00:00
2016-02-02 12:50:00
Closed
None
STATEN ISLAND
2016-02-01 07:00:00
2016-02-02 12:50:00
1 days 05:50:00
1
55
32589645
Department of Transportation
Street Condition
2016-02-01 07:00:00
2016-02-02 08:47:00
Closed
None
STATEN ISLAND
2016-02-01 07:00:00
2016-02-02 08:47:00
1 days 01:47:00
1
56
32589543
Department of Transportation
Street Condition
2016-02-01 07:00:00
2016-02-02 13:10:00
Closed
None
STATEN ISLAND
2016-02-01 07:00:00
2016-02-02 13:10:00
1 days 06:10:00
1
57
32586272
Department of Transportation
Street Condition
2016-02-01 07:00:00
2016-02-02 10:25:00
Closed
None
STATEN ISLAND
2016-02-01 07:00:00
2016-02-02 10:25:00
1 days 03:25:00
1
58
32585094
Department of Transportation
Street Condition
2016-02-01 07:00:00
2016-02-02 11:20:00
Closed
None
STATEN ISLAND
2016-02-01 07:00:00
2016-02-02 11:20:00
1 days 04:20:00
1
59
32590796
Department of Transportation
Street Condition
2016-02-01 07:00:00
2016-02-02 11:10:00
Closed
None
STATEN ISLAND
2016-02-01 07:00:00
2016-02-02 11:10:00
1 days 04:10:00
1
60
32586270
Department of Transportation
Street Condition
2016-02-01 07:00:00
2016-02-02 08:10:00
Closed
None
STATEN ISLAND
2016-02-01 07:00:00
2016-02-02 08:10:00
1 days 01:10:00
1
...
...
...
...
...
...
...
...
...
...
...
...
...
5815
32611950
Department of Transportation
Street Light Condition
2016-02-06 21:47:00
2016-02-08 12:25:00
Closed
None
BROOKLYN
2016-02-06 21:47:00
2016-02-08 12:25:00
1 days 14:38:00
1
5816
32614779
Department of Transportation
Street Condition
2016-02-06 21:52:09
2016-02-09 11:00:00
Closed
None
QUEENS
2016-02-06 21:52:09
2016-02-09 11:00:00
2 days 13:07:51
2
5817
32616693
Department of Transportation
Street Condition
2016-02-06 21:52:28
2016-02-10 07:56:00
Closed
None
QUEENS
2016-02-06 21:52:28
2016-02-10 07:56:00
3 days 10:03:32
3
5818
32618591
Department of Transportation
Street Condition
2016-02-06 21:56:54
2016-02-08 11:00:00
Closed
None
MANHATTAN
2016-02-06 21:56:54
2016-02-08 11:00:00
1 days 13:03:06
1
5819
32614712
Department of Transportation
Street Condition
2016-02-06 21:59:19
2016-02-09 13:21:00
Closed
None
QUEENS
2016-02-06 21:59:19
2016-02-09 13:21:00
2 days 15:21:41
2
5820
32619052
Department of Transportation
Street Condition
2016-02-06 22:06:38
2016-02-11 13:07:20
Closed
02/16/2016 10:06:38 PM
BROOKLYN
2016-02-06 22:06:38
2016-02-11 13:07:20
4 days 15:00:42
4
5821
32614085
Department of Transportation
Street Condition
2016-02-06 22:07:24
2016-02-09 15:23:13
Closed
02/15/2016 11:36:03 AM
BRONX
2016-02-06 22:07:24
2016-02-09 15:23:13
2 days 17:15:49
2
5822
32612339
Department of Transportation
Street Condition
2016-02-06 22:23:07
2016-02-08 03:08:37
Closed
02/16/2016 10:23:07 PM
MANHATTAN
2016-02-06 22:23:07
2016-02-08 03:08:37
1 days 04:45:30
1
5823
32612333
Department of Transportation
Street Condition
2016-02-06 22:25:28
2016-02-08 03:09:49
Closed
02/16/2016 10:25:28 PM
MANHATTAN
2016-02-06 22:25:28
2016-02-08 03:09:49
1 days 04:44:21
1
5824
32614717
Department of Transportation
Street Light Condition
2016-02-06 22:27:00
2016-02-08 11:26:00
Closed
None
QUEENS
2016-02-06 22:27:00
2016-02-08 11:26:00
1 days 12:59:00
1
5825
32617685
Department of Transportation
Street Light Condition
2016-02-06 22:32:00
2016-02-09 12:07:00
Closed
None
BROOKLYN
2016-02-06 22:32:00
2016-02-09 12:07:00
2 days 13:35:00
2
5826
32615195
Department of Transportation
Street Condition
2016-02-06 22:34:14
2016-02-08 03:08:37
Closed
02/16/2016 10:23:07 PM
MANHATTAN
2016-02-06 22:34:14
2016-02-08 03:08:37
1 days 04:34:23
1
5827
32613947
Department of Transportation
Highway Condition
2016-02-06 22:34:34
2016-02-08 14:39:00
Closed
02/16/2016 10:34:34 PM
BROOKLYN
2016-02-06 22:34:34
2016-02-08 14:39:00
1 days 16:04:26
1
5828
32612566
Department of Transportation
Street Light Condition
2016-02-06 22:35:00
2016-03-16 10:39:00
Closed
None
BROOKLYN
2016-02-06 22:35:00
2016-03-16 10:39:00
38 days 12:04:00
38
5829
32614725
Department of Transportation
Street Light Condition
2016-02-06 22:41:00
2016-02-11 09:43:00
Closed
None
QUEENS
2016-02-06 22:41:00
2016-02-11 09:43:00
4 days 11:02:00
4
5830
32617705
Department of Transportation
Street Sign - Missing
2016-02-06 22:42:29
2016-02-26 10:13:26
Closed
08/06/2016 07:35:46 AM
MANHATTAN
2016-02-06 22:42:29
2016-02-26 10:13:26
19 days 11:30:57
19
5832
32617828
Department of Transportation
Curb Condition
2016-02-06 22:47:30
2016-02-12 15:18:14
Closed
03/07/2016 10:47:30 PM
BRONX
2016-02-06 22:47:30
2016-02-12 15:18:14
5 days 16:30:44
5
5833
32618570
Department of Transportation
Street Sign - Missing
2016-02-06 22:49:43
2016-02-22 10:04:38
Closed
08/06/2016 07:43:15 AM
MANHATTAN
2016-02-06 22:49:43
2016-02-22 10:04:38
15 days 11:14:55
15
5834
32616052
Department of Transportation
Street Condition
2016-02-06 22:53:35
2016-02-17 09:21:23
Closed
02/16/2016 10:53:35 PM
BROOKLYN
2016-02-06 22:53:35
2016-02-17 09:21:23
10 days 10:27:48
10
5835
32612279
Department of Transportation
Street Condition
2016-02-06 22:55:05
2016-02-08 04:09:00
Closed
02/16/2016 10:55:05 PM
MANHATTAN
2016-02-06 22:55:05
2016-02-08 04:09:00
1 days 05:13:55
1
5836
32614241
Department of Transportation
Street Condition
2016-02-06 22:56:01
2016-02-08 13:32:54
Closed
02/16/2016 10:56:01 PM
MANHATTAN
2016-02-06 22:56:01
2016-02-08 13:32:54
1 days 14:36:53
1
5837
32617383
Department of Transportation
Street Light Condition
2016-02-06 22:59:00
2016-02-08 11:27:00
Closed
None
QUEENS
2016-02-06 22:59:00
2016-02-08 11:27:00
1 days 12:28:00
1
5838
32618573
Department of Transportation
Street Condition
2016-02-06 23:02:17
2016-02-08 23:46:00
Closed
None
MANHATTAN
2016-02-06 23:02:17
2016-02-08 23:46:00
2 days 00:43:43
2
5839
32617669
Department of Transportation
Street Condition
2016-02-06 23:02:35
2016-02-16 09:45:00
Closed
None
MANHATTAN
2016-02-06 23:02:35
2016-02-16 09:45:00
9 days 10:42:25
9
5840
32612533
Department of Transportation
Street Light Condition
2016-02-06 23:06:00
2016-02-08 21:50:00
Closed
None
MANHATTAN
2016-02-06 23:06:00
2016-02-08 21:50:00
1 days 22:44:00
1
5844
32613217
Department of Transportation
Street Condition
2016-02-06 23:36:15
2016-02-08 06:52:08
Closed
02/16/2016 11:36:15 PM
MANHATTAN
2016-02-06 23:36:15
2016-02-08 06:52:08
1 days 07:15:53
1
5846
32617301
Department of Transportation
Street Light Condition
2016-02-06 23:37:00
2016-02-08 22:50:00
Closed
None
MANHATTAN
2016-02-06 23:37:00
2016-02-08 22:50:00
1 days 23:13:00
1
5848
32612807
Department of Transportation
Street Light Condition
2016-02-06 23:53:00
2016-02-12 22:30:00
Closed
None
MANHATTAN
2016-02-06 23:53:00
2016-02-12 22:30:00
5 days 22:37:00
5
5849
32617735
Department of Transportation
Bridge Condition
2016-02-06 23:54:02
2016-02-08 10:07:29
Closed
08/04/2016 11:54:02 PM
QUEENS
2016-02-06 23:54:02
2016-02-08 10:07:29
1 days 10:13:27
1
5850
32619242
Department of Transportation
Street Light Condition
2016-02-07 00:00:00
2016-02-10 15:19:00
Closed
None
BRONX
2016-02-07 00:00:00
2016-02-10 15:19:00
3 days 15:19:00
3
2607 rows × 12 columns
In [52]:
#range
df1['cleantime'].max()-df1['cleantime'].min()
Out[52]:
108
In [79]:
#Interquartile Range: middle 50% of the data
iqr=df1['cleantime'].quantile(q=0.75)-df1['cleantime'].quantile(q=0.25)
iqr
Out[79]:
27.0
In [81]:
#ual
ual = df1['cleantime'].quantile(q=0.75) + (iqr* 1.5)
ual
Out[81]:
68.5
In [84]:
#lal
lal=df1['cleantime'].quantile(q=0.25) - (iqr*1.5)
lal
Out[84]:
-39.5
In [87]:
#outlier
outlier = df1[df1['cleantime']>ual]
In [88]:
outlier.describe()
Out[88]:
time
cleantime
count
33
33.000000
mean
78 days 14:22:40.303030
78.030303
std
6 days 14:42:00.990269
6.659039
min
69 days 00:41:00
69.000000
25%
71 days 23:58:00
71.000000
50%
78 days 18:49:00
78.000000
75%
84 days 03:08:00
84.000000
max
89 days 18:54:00
89.000000
In [ ]:
Content source: ledeprogram/algorithms
Similar notebooks: