In [2]:
#Perform a basic statistical analysis of the time DOT 311 (table is called dot_311) complaints are open 
#(subtract closed date from created date)

In [3]:
import pandas as pd
import numpy as np
import pg8000

In [4]:
#pandas.read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, chunksize=None)

In [5]:
con = pg8000.connect(host="training.c1erymiua9dx.us-east-1.rds.amazonaws.com", database="training", user="dot_student", password="qgis")
df = pd.read_sql_query("SELECT * FROM dot_311", con)

In [6]:
df.head()


Out[6]:
b'gid' b'unique_key' b'agency' b'agency nam' b'complaint' b'descriptor' b'location t' b'incident z' b'incident a' b'street nam' ... b'bridge h_2' b'garage lot' b'ferry dire' b'ferry term' b'latitude' b'longitude' b'location' b'geom' b'created_date' b'closed_date'
0 2 32570549.0 DOT Department of Transportation Traffic Signal Condition Controller None 10301.0 None None ... None None None None 40.613804 -74.113374 (40.61380375315792, -74.1133737388642) 0101000020E61000006829ED83418752C0D7FC121F914E... 2016-02-01 00:12:00 2016-02-01 01:15:00
1 3 32572958.0 DOT Department of Transportation Traffic Signal Condition Controller None 10301.0 None None ... None None None None 40.626545 -74.091582 (40.62654529746493, -74.09158233448058) 0101000020E610000011DF267CDC8552C08809E5A23250... 2016-02-01 00:14:00 2016-02-01 00:54:00
2 4 32573576.0 DOT Department of Transportation Traffic Signal Condition Controller None 10310.0 None None ... None None None None 40.629250 -74.123221 (40.62924955478749, -74.1232207526046) 0101000020E610000004A84BD9E28752C0DC6AD93F8B50... 2016-02-01 00:15:00 2016-02-01 01:30:00
3 5 32572389.0 DOT Department of Transportation Traffic Signal Condition Controller None NaN None None ... None None None None NaN NaN None 0101000020E610000004A84BD9E28752C0DC6AD93F8B50... 2016-02-01 00:17:00 2016-02-01 02:35:00
4 6 32570548.0 DOT Department of Transportation Traffic Signal Condition Controller None NaN None None ... None None None None NaN NaN None 0101000020E610000004A84BD9E28752C0DC6AD93F8B50... 2016-02-01 00:19:00 2016-02-01 01:00:00

5 rows × 55 columns


In [7]:
df.columns


Out[7]:
Index([         b'gid',   b'unique_key',       b'agency',   b'agency nam',
          b'complaint',   b'descriptor',   b'location t',   b'incident z',
         b'incident a',   b'street nam',   b'cross stre',   b'cross st_1',
         b'intersecti',   b'intersec_1',   b'address ty',         b'city',
           b'landmark',   b'facility t',       b'status',     b'due date',
         b'resolution',   b'resoluti_1',    b'community',      b'borough',
         b'x coordina',   b'y coordina',   b'park facil',   b'park borou',
         b'school nam',   b'school num',   b'school reg',   b'school cod',
         b'school pho',   b'school add',   b'school cit',   b'school sta',
         b'school zip',   b'school not',    b'school or',   b'vehicle ty',
         b'taxi compa',    b'taxi pick',   b'bridge hig',   b'bridge h_1',
          b'road ramp',   b'bridge h_2',   b'garage lot',   b'ferry dire',
         b'ferry term',     b'latitude',    b'longitude',     b'location',
               b'geom', b'created_date',  b'closed_date'],
      dtype='object')

In [8]:
df['open_length'] = df["closed_date"] - df["created_date"]


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\indexes\base.py in get_loc(self, key, method, tolerance)
   1944             try:
-> 1945                 return self._engine.get_loc(key)
   1946             except KeyError:

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4066)()

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3930)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12408)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12359)()

KeyError: 'closed_date'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-8-252aaac1e2d7> in <module>()
----> 1 df['open_length'] = df["closed_date"] - df["created_date"]

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   1995             return self._getitem_multilevel(key)
   1996         else:
-> 1997             return self._getitem_column(key)
   1998 
   1999     def _getitem_column(self, key):

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
   2002         # get column
   2003         if self.columns.is_unique:
-> 2004             return self._get_item_cache(key)
   2005 
   2006         # duplicate columns & possible reduce dimensionality

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
   1348         res = cache.get(item)
   1349         if res is None:
-> 1350             values = self._data.get(item)
   1351             res = self._box_item_values(item, values)
   1352             cache[item] = res

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
   3288 
   3289             if not isnull(item):
-> 3290                 loc = self.items.get_loc(item)
   3291             else:
   3292                 indexer = np.arange(len(self.items))[isnull(self.items)]

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\indexes\base.py in get_loc(self, key, method, tolerance)
   1945                 return self._engine.get_loc(key)
   1946             except KeyError:
-> 1947                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   1948 
   1949         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4066)()

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3930)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12408)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12359)()

KeyError: 'closed_date'

In [ ]:


In [ ]:
df['open_length'] = df.apply df['b\\created_date\\'] - df['b\\closed_date\\']

In [ ]:
df['b'created_date']

In [ ]:


In [ ]: