In [2]:
#Perform a basic statistical analysis of the time DOT 311 (table is called dot_311) complaints are open
#(subtract closed date from created date)
In [3]:
import pandas as pd
import numpy as np
import pg8000
In [4]:
#pandas.read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, chunksize=None)
In [5]:
con = pg8000.connect(host="training.c1erymiua9dx.us-east-1.rds.amazonaws.com", database="training", user="dot_student", password="qgis")
df = pd.read_sql_query("SELECT * FROM dot_311", con)
In [6]:
df.head()
Out[6]:
b'gid'
b'unique_key'
b'agency'
b'agency nam'
b'complaint'
b'descriptor'
b'location t'
b'incident z'
b'incident a'
b'street nam'
...
b'bridge h_2'
b'garage lot'
b'ferry dire'
b'ferry term'
b'latitude'
b'longitude'
b'location'
b'geom'
b'created_date'
b'closed_date'
0
2
32570549.0
DOT
Department of Transportation
Traffic Signal Condition
Controller
None
10301.0
None
None
...
None
None
None
None
40.613804
-74.113374
(40.61380375315792, -74.1133737388642)
0101000020E61000006829ED83418752C0D7FC121F914E...
2016-02-01 00:12:00
2016-02-01 01:15:00
1
3
32572958.0
DOT
Department of Transportation
Traffic Signal Condition
Controller
None
10301.0
None
None
...
None
None
None
None
40.626545
-74.091582
(40.62654529746493, -74.09158233448058)
0101000020E610000011DF267CDC8552C08809E5A23250...
2016-02-01 00:14:00
2016-02-01 00:54:00
2
4
32573576.0
DOT
Department of Transportation
Traffic Signal Condition
Controller
None
10310.0
None
None
...
None
None
None
None
40.629250
-74.123221
(40.62924955478749, -74.1232207526046)
0101000020E610000004A84BD9E28752C0DC6AD93F8B50...
2016-02-01 00:15:00
2016-02-01 01:30:00
3
5
32572389.0
DOT
Department of Transportation
Traffic Signal Condition
Controller
None
NaN
None
None
...
None
None
None
None
NaN
NaN
None
0101000020E610000004A84BD9E28752C0DC6AD93F8B50...
2016-02-01 00:17:00
2016-02-01 02:35:00
4
6
32570548.0
DOT
Department of Transportation
Traffic Signal Condition
Controller
None
NaN
None
None
...
None
None
None
None
NaN
NaN
None
0101000020E610000004A84BD9E28752C0DC6AD93F8B50...
2016-02-01 00:19:00
2016-02-01 01:00:00
5 rows × 55 columns
In [7]:
df.columns
Out[7]:
Index([ b'gid', b'unique_key', b'agency', b'agency nam',
b'complaint', b'descriptor', b'location t', b'incident z',
b'incident a', b'street nam', b'cross stre', b'cross st_1',
b'intersecti', b'intersec_1', b'address ty', b'city',
b'landmark', b'facility t', b'status', b'due date',
b'resolution', b'resoluti_1', b'community', b'borough',
b'x coordina', b'y coordina', b'park facil', b'park borou',
b'school nam', b'school num', b'school reg', b'school cod',
b'school pho', b'school add', b'school cit', b'school sta',
b'school zip', b'school not', b'school or', b'vehicle ty',
b'taxi compa', b'taxi pick', b'bridge hig', b'bridge h_1',
b'road ramp', b'bridge h_2', b'garage lot', b'ferry dire',
b'ferry term', b'latitude', b'longitude', b'location',
b'geom', b'created_date', b'closed_date'],
dtype='object')
In [8]:
df['open_length'] = df["closed_date"] - df["created_date"]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\indexes\base.py in get_loc(self, key, method, tolerance)
1944 try:
-> 1945 return self._engine.get_loc(key)
1946 except KeyError:
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4066)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3930)()
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12408)()
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12359)()
KeyError: 'closed_date'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-8-252aaac1e2d7> in <module>()
----> 1 df['open_length'] = df["closed_date"] - df["created_date"]
c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1995 return self._getitem_multilevel(key)
1996 else:
-> 1997 return self._getitem_column(key)
1998
1999 def _getitem_column(self, key):
c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
2002 # get column
2003 if self.columns.is_unique:
-> 2004 return self._get_item_cache(key)
2005
2006 # duplicate columns & possible reduce dimensionality
c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1348 res = cache.get(item)
1349 if res is None:
-> 1350 values = self._data.get(item)
1351 res = self._box_item_values(item, values)
1352 cache[item] = res
c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3288
3289 if not isnull(item):
-> 3290 loc = self.items.get_loc(item)
3291 else:
3292 indexer = np.arange(len(self.items))[isnull(self.items)]
c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\indexes\base.py in get_loc(self, key, method, tolerance)
1945 return self._engine.get_loc(key)
1946 except KeyError:
-> 1947 return self._engine.get_loc(self._maybe_cast_indexer(key))
1948
1949 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4066)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3930)()
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12408)()
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12359)()
KeyError: 'closed_date'
In [ ]:
In [ ]:
df['open_length'] = df.apply df['b\\created_date\\'] - df['b\\closed_date\\']
In [ ]:
df['b'created_date']
In [ ]:
In [ ]:
Content source: ledeprogram/algorithms
Similar notebooks: