In [9]:
import pandas as pd
import os

In [10]:
files = os.listdir('../raw_data')

In [11]:
data = {f: pd.read_excel('../raw_data/'+f) for f in files }


---------------------------------------------------------------------------
XLRDError                                 Traceback (most recent call last)
<ipython-input-11-16ab705ff402> in <module>()
----> 1 data = {f: pd.read_excel('../raw_data/'+f) for f in files }

<ipython-input-11-16ab705ff402> in <dictcomp>(.0)
----> 1 data = {f: pd.read_excel('../raw_data/'+f) for f in files }

/home/larry/envs/cityview/lib/python3.5/site-packages/pandas/io/excel.py in read_excel(io, sheetname, header, skiprows, skip_footer, index_col, names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, true_values, false_values, engine, squeeze, **kwds)
    189 
    190     if not isinstance(io, ExcelFile):
--> 191         io = ExcelFile(io, engine=engine)
    192 
    193     return io._parse_excel(

/home/larry/envs/cityview/lib/python3.5/site-packages/pandas/io/excel.py in __init__(self, io, **kwds)
    247             self.book = xlrd.open_workbook(file_contents=data)
    248         elif isinstance(io, compat.string_types):
--> 249             self.book = xlrd.open_workbook(io)
    250         else:
    251             raise ValueError('Must explicitly set engine if not passing in'

/home/larry/envs/cityview/lib/python3.5/site-packages/xlrd/__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
    439         formatting_info=formatting_info,
    440         on_demand=on_demand,
--> 441         ragged_rows=ragged_rows,
    442         )
    443     return bk

/home/larry/envs/cityview/lib/python3.5/site-packages/xlrd/book.py in open_workbook_xls(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
     89         t1 = time.clock()
     90         bk.load_time_stage_1 = t1 - t0
---> 91         biff_version = bk.getbof(XL_WORKBOOK_GLOBALS)
     92         if not biff_version:
     93             raise XLRDError("Can't determine file's BIFF version")

/home/larry/envs/cityview/lib/python3.5/site-packages/xlrd/book.py in getbof(self, rqd_stream)
   1228             bof_error('Expected BOF record; met end of file')
   1229         if opcode not in bofcodes:
-> 1230             bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8])
   1231         length = self.get2bytes()
   1232         if length == MY_EOF:

/home/larry/envs/cityview/lib/python3.5/site-packages/xlrd/book.py in bof_error(msg)
   1222         if DEBUG: print("reqd: 0x%04x" % rqd_stream, file=self.logfile)
   1223         def bof_error(msg):
-> 1224             raise XLRDError('Unsupported format, or corrupt file: ' + msg)
   1225         savpos = self._position
   1226         opcode = self.get2bytes()

XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b',larry,p'

In [ ]:
parking = data['Hack4Cause_Parking Citations FY 2008.xls']
parking.columns

In [ ]:
clean = parking.dropna(how='all')
clean

In [ ]:
%load_ext sql

%sql "postgresql://cityview:testthis@localhost/cityview"

In [ ]: