In [53]:
import pandas as pd
import pg8000
import dateutil.parser
%matplotlib inline

In [38]:
conn = pg8000.connect(host="training.c1erymiua9dx.us-east-1.rds.amazonaws.com", user='dot_student', password='qgis', database='training')
cursor = conn.cursor()

In [39]:
cursor.execute("select * from dot_311")
data = []
for row in cursor.fetchall():
    data.append(row)

In [40]:
#column name 
statement = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS WHERE table_name = 'dot_311'"
cursor.execute(statement)
columns = []
for row in cursor.fetchall():
    columns.append(row[0])
print(columns)


['gid', 'unique_key', 'agency', 'agency nam', 'complaint', 'descriptor', 'location t', 'incident z', 'incident a', 'street nam', 'cross stre', 'cross st_1', 'intersecti', 'intersec_1', 'address ty', 'city', 'landmark', 'facility t', 'status', 'due date', 'resolution', 'resoluti_1', 'community', 'borough', 'x coordina', 'y coordina', 'park facil', 'park borou', 'school nam', 'school num', 'school reg', 'school cod', 'school pho', 'school add', 'school cit', 'school sta', 'school zip', 'school not', 'school or', 'vehicle ty', 'taxi compa', 'taxi pick', 'bridge hig', 'bridge h_1', 'road ramp', 'bridge h_2', 'garage lot', 'ferry dire', 'ferry term', 'latitude', 'longitude', 'location', 'geom', 'created_date', 'closed_date']

In [41]:
df=pd.DataFrame(data=data, columns=columns)

In [42]:
df.head()


Out[42]:
gid unique_key agency agency nam complaint descriptor location t incident z incident a street nam ... bridge h_2 garage lot ferry dire ferry term latitude longitude location geom created_date closed_date
0 2 32570549 DOT Department of Transportation Traffic Signal Condition Controller None 10301 None None ... None None None None 40.613803753157917 -74.113373738864198 (40.61380375315792, -74.1133737388642) 0101000020E61000006829ED83418752C0D7FC121F914E... 2016-02-01 00:12:00 2016-02-01 01:15:00
1 3 32572958 DOT Department of Transportation Traffic Signal Condition Controller None 10301 None None ... None None None None 40.626545297464929 -74.091582334480577 (40.62654529746493, -74.09158233448058) 0101000020E610000011DF267CDC8552C08809E5A23250... 2016-02-01 00:14:00 2016-02-01 00:54:00
2 4 32573576 DOT Department of Transportation Traffic Signal Condition Controller None 10310 None None ... None None None None 40.629249554787492 -74.123220752604595 (40.62924955478749, -74.1232207526046) 0101000020E610000004A84BD9E28752C0DC6AD93F8B50... 2016-02-01 00:15:00 2016-02-01 01:30:00
3 5 32572389 DOT Department of Transportation Traffic Signal Condition Controller None None None None ... None None None None None None None 0101000020E610000004A84BD9E28752C0DC6AD93F8B50... 2016-02-01 00:17:00 2016-02-01 02:35:00
4 6 32570548 DOT Department of Transportation Traffic Signal Condition Controller None None None None ... None None None None None None None 0101000020E610000004A84BD9E28752C0DC6AD93F8B50... 2016-02-01 00:19:00 2016-02-01 01:00:00

5 rows × 55 columns


In [43]:
new_df=df[['unique_key','agency nam','complaint','created_date','closed_date','status','due date','borough']]

In [44]:
new_df.columns


Out[44]:
Index(['unique_key', 'agency nam', 'complaint', 'created_date', 'closed_date',
       'status', 'due date', 'borough'],
      dtype='object')

In [46]:
new_df['new_created_date'] = pd.to_datetime(new_df['created_date'],format='%m/%d/%y %H:%M:%S')


/usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [47]:
new_df['new_closed_date'] = pd.to_datetime(new_df['closed_date'],format='%m/%d/%y %H:%M:%S')


/usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [48]:
new_df['time']=new_df['closed_date']-new_df['created_date']


/usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [49]:
# clean the dirty data
def time_to_day(td):
    try: #not sure all the NaN data types, use try
        return td.days
    except:
        return 0 #turn all the null number into 0
new_df['cleantime']=new_df['time'].apply(time_to_day)


/usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [50]:
new_df['cleantime'].describe()


Out[50]:
count    5851.000000
mean        6.021022
std        14.192718
min       -19.000000
25%         0.000000
50%         0.000000
75%         3.000000
max        89.000000
Name: cleantime, dtype: float64

In [73]:
df1 = new_df[new_df['cleantime']>0] #remove the negative cleantime

In [74]:
#standard deviation: average distance of each data point from mean
df1


Out[74]:
unique_key agency nam complaint created_date closed_date status due date borough new_created_date new_closed_date time cleantime
11 32570682 Department of Transportation Street Light Condition 2016-02-01 00:44:00 2016-02-08 12:29:00 Closed None BROOKLYN 2016-02-01 00:44:00 2016-02-08 12:29:00 7 days 11:45:00 7
12 32572906 Department of Transportation Traffic Signal Condition 2016-02-01 00:50:00 2016-02-10 09:10:00 Closed None BROOKLYN 2016-02-01 00:50:00 2016-02-10 09:10:00 9 days 08:20:00 9
16 32570317 Department of Transportation Street Condition 2016-02-01 01:18:49 2016-02-03 16:59:00 Closed 03/02/2016 01:18:49 AM BROOKLYN 2016-02-01 01:18:49 2016-02-03 16:59:00 2 days 15:40:11 2
17 32581124 Department of Transportation Street Light Condition 2016-02-01 01:20:00 2016-02-05 12:04:00 Closed None QUEENS 2016-02-01 01:20:00 2016-02-05 12:04:00 4 days 10:44:00 4
18 32579277 Department of Transportation Street Light Condition 2016-02-01 01:36:00 2016-02-04 10:37:00 Closed None BRONX 2016-02-01 01:36:00 2016-02-04 10:37:00 3 days 09:01:00 3
19 32580720 Department of Transportation Street Light Condition 2016-02-01 01:36:00 2016-02-02 13:09:00 Closed None STATEN ISLAND 2016-02-01 01:36:00 2016-02-02 13:09:00 1 days 11:33:00 1
21 32580285 Department of Transportation Street Light Condition 2016-02-01 02:07:00 2016-02-08 15:25:00 Closed None QUEENS 2016-02-01 02:07:00 2016-02-08 15:25:00 7 days 13:18:00 7
22 32575419 Department of Transportation Sidewalk Condition 2016-02-01 02:32:08 2016-02-04 12:06:14 Closed 04/01/2016 02:32:08 AM BROOKLYN 2016-02-01 02:32:08 2016-02-04 12:06:14 3 days 09:34:06 3
24 32581170 Department of Transportation Street Light Condition 2016-02-01 03:21:00 2016-02-02 14:57:00 Closed None BROOKLYN 2016-02-01 03:21:00 2016-02-02 14:57:00 1 days 11:36:00 1
27 32577440 Department of Transportation Street Light Condition 2016-02-01 03:26:00 2016-02-03 10:03:00 Closed None MANHATTAN 2016-02-01 03:26:00 2016-02-03 10:03:00 2 days 06:37:00 2
30 32618578 Department of Transportation Street Light Condition 2016-02-06 15:22:00 2016-02-08 15:23:00 Closed None BRONX 2016-02-06 15:22:00 2016-02-08 15:23:00 2 days 00:01:00 2
31 32580055 Department of Transportation Street Condition 2016-02-01 03:59:57 2016-02-02 07:25:35 Closed 05/01/2016 03:59:57 AM BRONX 2016-02-01 03:59:57 2016-02-02 07:25:35 1 days 03:25:38 1
32 32574624 Department of Transportation Traffic Signal Condition 2016-02-01 04:05:00 2016-03-31 09:45:00 Closed None QUEENS 2016-02-01 04:05:00 2016-03-31 09:45:00 59 days 05:40:00 59
34 32576077 Department of Transportation Traffic Signal Condition 2016-02-01 04:48:00 2016-02-24 11:15:00 Closed None QUEENS 2016-02-01 04:48:00 2016-02-24 11:15:00 23 days 06:27:00 23
37 32577388 Department of Transportation Street Light Condition 2016-02-01 05:46:00 2016-02-02 13:12:00 Closed None BRONX 2016-02-01 05:46:00 2016-02-02 13:12:00 1 days 07:26:00 1
38 32578910 Department of Transportation Traffic Signal Condition 2016-02-01 05:59:00 2016-04-04 08:30:00 Closed None MANHATTAN 2016-02-01 05:59:00 2016-04-04 08:30:00 63 days 02:31:00 63
43 32577924 Department of Transportation Sidewalk Condition 2016-02-01 06:48:29 2016-02-02 10:12:11 Closed 02/04/2016 06:48:29 AM BRONX 2016-02-01 06:48:29 2016-02-02 10:12:11 1 days 03:23:42 1
45 32578695 Department of Transportation Street Condition 2016-02-01 06:56:07 2016-02-02 09:10:00 Closed None STATEN ISLAND 2016-02-01 06:56:07 2016-02-02 09:10:00 1 days 02:13:53 1
47 32574619 Department of Transportation Traffic Signal Condition 2016-02-01 06:57:00 2016-02-02 09:45:00 Closed None MANHATTAN 2016-02-01 06:57:00 2016-02-02 09:45:00 1 days 02:48:00 1
49 32588484 Department of Transportation Street Condition 2016-02-01 07:00:00 2016-02-02 12:54:00 Closed None STATEN ISLAND 2016-02-01 07:00:00 2016-02-02 12:54:00 1 days 05:54:00 1
51 32587340 Department of Transportation Street Condition 2016-02-01 07:00:00 2016-02-02 09:55:00 Closed None STATEN ISLAND 2016-02-01 07:00:00 2016-02-02 09:55:00 1 days 02:55:00 1
52 32586269 Department of Transportation Street Condition 2016-02-01 07:00:00 2016-02-02 10:40:00 Closed None STATEN ISLAND 2016-02-01 07:00:00 2016-02-02 10:40:00 1 days 03:40:00 1
53 32585093 Department of Transportation Street Condition 2016-02-01 07:00:00 2016-02-02 13:30:00 Closed None STATEN ISLAND 2016-02-01 07:00:00 2016-02-02 13:30:00 1 days 06:30:00 1
54 32586236 Department of Transportation Street Condition 2016-02-01 07:00:00 2016-02-02 12:50:00 Closed None STATEN ISLAND 2016-02-01 07:00:00 2016-02-02 12:50:00 1 days 05:50:00 1
55 32589645 Department of Transportation Street Condition 2016-02-01 07:00:00 2016-02-02 08:47:00 Closed None STATEN ISLAND 2016-02-01 07:00:00 2016-02-02 08:47:00 1 days 01:47:00 1
56 32589543 Department of Transportation Street Condition 2016-02-01 07:00:00 2016-02-02 13:10:00 Closed None STATEN ISLAND 2016-02-01 07:00:00 2016-02-02 13:10:00 1 days 06:10:00 1
57 32586272 Department of Transportation Street Condition 2016-02-01 07:00:00 2016-02-02 10:25:00 Closed None STATEN ISLAND 2016-02-01 07:00:00 2016-02-02 10:25:00 1 days 03:25:00 1
58 32585094 Department of Transportation Street Condition 2016-02-01 07:00:00 2016-02-02 11:20:00 Closed None STATEN ISLAND 2016-02-01 07:00:00 2016-02-02 11:20:00 1 days 04:20:00 1
59 32590796 Department of Transportation Street Condition 2016-02-01 07:00:00 2016-02-02 11:10:00 Closed None STATEN ISLAND 2016-02-01 07:00:00 2016-02-02 11:10:00 1 days 04:10:00 1
60 32586270 Department of Transportation Street Condition 2016-02-01 07:00:00 2016-02-02 08:10:00 Closed None STATEN ISLAND 2016-02-01 07:00:00 2016-02-02 08:10:00 1 days 01:10:00 1
... ... ... ... ... ... ... ... ... ... ... ... ...
5815 32611950 Department of Transportation Street Light Condition 2016-02-06 21:47:00 2016-02-08 12:25:00 Closed None BROOKLYN 2016-02-06 21:47:00 2016-02-08 12:25:00 1 days 14:38:00 1
5816 32614779 Department of Transportation Street Condition 2016-02-06 21:52:09 2016-02-09 11:00:00 Closed None QUEENS 2016-02-06 21:52:09 2016-02-09 11:00:00 2 days 13:07:51 2
5817 32616693 Department of Transportation Street Condition 2016-02-06 21:52:28 2016-02-10 07:56:00 Closed None QUEENS 2016-02-06 21:52:28 2016-02-10 07:56:00 3 days 10:03:32 3
5818 32618591 Department of Transportation Street Condition 2016-02-06 21:56:54 2016-02-08 11:00:00 Closed None MANHATTAN 2016-02-06 21:56:54 2016-02-08 11:00:00 1 days 13:03:06 1
5819 32614712 Department of Transportation Street Condition 2016-02-06 21:59:19 2016-02-09 13:21:00 Closed None QUEENS 2016-02-06 21:59:19 2016-02-09 13:21:00 2 days 15:21:41 2
5820 32619052 Department of Transportation Street Condition 2016-02-06 22:06:38 2016-02-11 13:07:20 Closed 02/16/2016 10:06:38 PM BROOKLYN 2016-02-06 22:06:38 2016-02-11 13:07:20 4 days 15:00:42 4
5821 32614085 Department of Transportation Street Condition 2016-02-06 22:07:24 2016-02-09 15:23:13 Closed 02/15/2016 11:36:03 AM BRONX 2016-02-06 22:07:24 2016-02-09 15:23:13 2 days 17:15:49 2
5822 32612339 Department of Transportation Street Condition 2016-02-06 22:23:07 2016-02-08 03:08:37 Closed 02/16/2016 10:23:07 PM MANHATTAN 2016-02-06 22:23:07 2016-02-08 03:08:37 1 days 04:45:30 1
5823 32612333 Department of Transportation Street Condition 2016-02-06 22:25:28 2016-02-08 03:09:49 Closed 02/16/2016 10:25:28 PM MANHATTAN 2016-02-06 22:25:28 2016-02-08 03:09:49 1 days 04:44:21 1
5824 32614717 Department of Transportation Street Light Condition 2016-02-06 22:27:00 2016-02-08 11:26:00 Closed None QUEENS 2016-02-06 22:27:00 2016-02-08 11:26:00 1 days 12:59:00 1
5825 32617685 Department of Transportation Street Light Condition 2016-02-06 22:32:00 2016-02-09 12:07:00 Closed None BROOKLYN 2016-02-06 22:32:00 2016-02-09 12:07:00 2 days 13:35:00 2
5826 32615195 Department of Transportation Street Condition 2016-02-06 22:34:14 2016-02-08 03:08:37 Closed 02/16/2016 10:23:07 PM MANHATTAN 2016-02-06 22:34:14 2016-02-08 03:08:37 1 days 04:34:23 1
5827 32613947 Department of Transportation Highway Condition 2016-02-06 22:34:34 2016-02-08 14:39:00 Closed 02/16/2016 10:34:34 PM BROOKLYN 2016-02-06 22:34:34 2016-02-08 14:39:00 1 days 16:04:26 1
5828 32612566 Department of Transportation Street Light Condition 2016-02-06 22:35:00 2016-03-16 10:39:00 Closed None BROOKLYN 2016-02-06 22:35:00 2016-03-16 10:39:00 38 days 12:04:00 38
5829 32614725 Department of Transportation Street Light Condition 2016-02-06 22:41:00 2016-02-11 09:43:00 Closed None QUEENS 2016-02-06 22:41:00 2016-02-11 09:43:00 4 days 11:02:00 4
5830 32617705 Department of Transportation Street Sign - Missing 2016-02-06 22:42:29 2016-02-26 10:13:26 Closed 08/06/2016 07:35:46 AM MANHATTAN 2016-02-06 22:42:29 2016-02-26 10:13:26 19 days 11:30:57 19
5832 32617828 Department of Transportation Curb Condition 2016-02-06 22:47:30 2016-02-12 15:18:14 Closed 03/07/2016 10:47:30 PM BRONX 2016-02-06 22:47:30 2016-02-12 15:18:14 5 days 16:30:44 5
5833 32618570 Department of Transportation Street Sign - Missing 2016-02-06 22:49:43 2016-02-22 10:04:38 Closed 08/06/2016 07:43:15 AM MANHATTAN 2016-02-06 22:49:43 2016-02-22 10:04:38 15 days 11:14:55 15
5834 32616052 Department of Transportation Street Condition 2016-02-06 22:53:35 2016-02-17 09:21:23 Closed 02/16/2016 10:53:35 PM BROOKLYN 2016-02-06 22:53:35 2016-02-17 09:21:23 10 days 10:27:48 10
5835 32612279 Department of Transportation Street Condition 2016-02-06 22:55:05 2016-02-08 04:09:00 Closed 02/16/2016 10:55:05 PM MANHATTAN 2016-02-06 22:55:05 2016-02-08 04:09:00 1 days 05:13:55 1
5836 32614241 Department of Transportation Street Condition 2016-02-06 22:56:01 2016-02-08 13:32:54 Closed 02/16/2016 10:56:01 PM MANHATTAN 2016-02-06 22:56:01 2016-02-08 13:32:54 1 days 14:36:53 1
5837 32617383 Department of Transportation Street Light Condition 2016-02-06 22:59:00 2016-02-08 11:27:00 Closed None QUEENS 2016-02-06 22:59:00 2016-02-08 11:27:00 1 days 12:28:00 1
5838 32618573 Department of Transportation Street Condition 2016-02-06 23:02:17 2016-02-08 23:46:00 Closed None MANHATTAN 2016-02-06 23:02:17 2016-02-08 23:46:00 2 days 00:43:43 2
5839 32617669 Department of Transportation Street Condition 2016-02-06 23:02:35 2016-02-16 09:45:00 Closed None MANHATTAN 2016-02-06 23:02:35 2016-02-16 09:45:00 9 days 10:42:25 9
5840 32612533 Department of Transportation Street Light Condition 2016-02-06 23:06:00 2016-02-08 21:50:00 Closed None MANHATTAN 2016-02-06 23:06:00 2016-02-08 21:50:00 1 days 22:44:00 1
5844 32613217 Department of Transportation Street Condition 2016-02-06 23:36:15 2016-02-08 06:52:08 Closed 02/16/2016 11:36:15 PM MANHATTAN 2016-02-06 23:36:15 2016-02-08 06:52:08 1 days 07:15:53 1
5846 32617301 Department of Transportation Street Light Condition 2016-02-06 23:37:00 2016-02-08 22:50:00 Closed None MANHATTAN 2016-02-06 23:37:00 2016-02-08 22:50:00 1 days 23:13:00 1
5848 32612807 Department of Transportation Street Light Condition 2016-02-06 23:53:00 2016-02-12 22:30:00 Closed None MANHATTAN 2016-02-06 23:53:00 2016-02-12 22:30:00 5 days 22:37:00 5
5849 32617735 Department of Transportation Bridge Condition 2016-02-06 23:54:02 2016-02-08 10:07:29 Closed 08/04/2016 11:54:02 PM QUEENS 2016-02-06 23:54:02 2016-02-08 10:07:29 1 days 10:13:27 1
5850 32619242 Department of Transportation Street Light Condition 2016-02-07 00:00:00 2016-02-10 15:19:00 Closed None BRONX 2016-02-07 00:00:00 2016-02-10 15:19:00 3 days 15:19:00 3

2607 rows × 12 columns


In [52]:
#range
df1['cleantime'].max()-df1['cleantime'].min()


Out[52]:
108

In [79]:
#Interquartile Range: middle 50% of the data
iqr=df1['cleantime'].quantile(q=0.75)-df1['cleantime'].quantile(q=0.25)
iqr


Out[79]:
27.0

In [81]:
#ual
ual = df1['cleantime'].quantile(q=0.75) + (iqr* 1.5)
ual


Out[81]:
68.5

In [84]:
#lal
lal=df1['cleantime'].quantile(q=0.25) - (iqr*1.5) 
lal


Out[84]:
-39.5

In [87]:
#outlier
outlier = df1[df1['cleantime']>ual]

In [88]:
outlier.describe()


Out[88]:
time cleantime
count 33 33.000000
mean 78 days 14:22:40.303030 78.030303
std 6 days 14:42:00.990269 6.659039
min 69 days 00:41:00 69.000000
25% 71 days 23:58:00 71.000000
50% 78 days 18:49:00 78.000000
75% 84 days 03:08:00 84.000000
max 89 days 18:54:00 89.000000

In [ ]: