In [7]:
%pylab inline
import pandas as pd
from glob import glob
import os

pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 12)


Populating the interactive namespace from numpy and matplotlib

In [2]:
fileArray = sort(glob('data/raw/*.csv'))
fileArray


Out[2]:
array(['data/raw/SPY_2009.csv', 'data/raw/SPY_2010.csv',
       'data/raw/SPY_2011.csv', 'data/raw/SPY_2012.csv',
       'data/raw/SPY_2013.csv', 'data/raw/SPY_2014.csv'], 
      dtype='<U21')

In [3]:
loadedDataList = [0.] * len(fileArray)

for i in range(len(fileArray)):
    
    loadedDataList[i] = pd.DataFrame.from_csv(fileArray[i],index_col=False)
    
optionsDataFrame = pd.concat(loadedDataList)


/usr/local/lib/python3.4/dist-packages/pandas/io/parsers.py:1150: DtypeWarning: Columns (19) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)

In [4]:
optionsDataFrame.columns


Out[4]:
Index(['underlying', 'underlying_last', ' exchange', 'optionroot', 'optionext', 'type', 'expiration', 'quotedate', 'strike', 'last', 'bid', 'ask', 'volume', 'openinterest', 'impliedvol', 'delta', 'gamma', 'theta', 'vega', 'optionalias'], dtype='object')

In [8]:
optionsDataFrame


Out[8]:
underlying underlying_last exchange optionroot optionext type ... impliedvol delta gamma theta vega optionalias
0 SPY 100.20 * SXO090919C00020000 NaN call ... 2.9366 0.9979 0.0105 -1.2618 0.1445 SXOIT
1 SPY 100.20 * SXO090919C00024000 NaN call ... 2.6070 0.9976 0.0132 -1.2512 0.1611 SXOIX
2 SPY 100.20 * SXO090919C00025000 NaN call ... 2.5337 0.9975 0.0139 -1.2485 0.1653 SXOIY
3 SPY 100.20 * SXO090919C00026000 NaN call ... 2.4635 0.9975 0.0147 -1.2457 0.1695 SXOIZ
4 SPY 100.20 * SXO090919C00027000 NaN call ... 2.3964 0.9974 0.0155 -1.2438 0.1739 SXOIA
5 SPY 100.20 * SIY090919C00028000 NaN call ... 2.3315 0.9973 0.0163 -1.2408 0.1782 SIYIB
6 SPY 100.20 * SIY090919C00029000 NaN call ... 2.2690 0.9973 0.0172 -1.2372 0.1825 SIYIC
7 SPY 100.20 * SIY090919C00030000 NaN call ... 2.2087 0.9972 0.0181 -1.2339 0.1868 SIYID
8 SPY 100.20 * SIY090919C00031000 NaN call ... 2.1503 0.9971 0.0190 -1.2299 0.1911 SIYIE
9 SPY 100.20 * SIY090919C00032000 NaN call ... 2.0939 0.9970 0.0200 -1.2257 0.1955 SIYIF
... ... ... ... ... ... ... ... ... ... ... ... ... ...
679402 SPY 196.43 * SPY161216P00255000 NaN put ... 0.2077 -0.7542 0.5268 -1.0551 90.6617 NaN
679403 SPY 196.43 * SPY161216P00260000 NaN put ... 0.2121 -0.7671 0.5008 -1.0402 88.0257 NaN
679404 SPY 196.43 * SPY161216P00265000 NaN put ... 0.2166 -0.7786 0.4765 -1.0266 85.5447 NaN
679405 SPY 196.43 * SPY161216P00270000 NaN put ... 0.2217 -0.7882 0.4539 -1.0189 83.3749 NaN
679406 SPY 196.43 * SPY161216P00275000 NaN put ... 0.2281 -0.7949 0.4329 -1.0259 81.8151 NaN
679407 SPY 196.43 * SPY161216P00280000 NaN put ... 0.2338 -0.8017 0.4139 -1.0273 80.1846 NaN
679408 SPY 196.43 * SPY161216P00285000 NaN put ... 0.2393 -0.8081 0.3964 -1.0268 78.6002 NaN
679409 SPY 196.43 * SPY161216P00290000 NaN put ... 0.2441 -0.8147 0.3803 -1.0205 76.9170 NaN
679410 SPY 196.43 * SPY161216P00295000 NaN put ... 0.2495 -0.8198 0.3656 -1.0223 75.5944 NaN
679411 SPY 196.43 * SPY161216P00300000 NaN put ... 0.2549 -0.8244 0.3520 -1.0242 74.3589 NaN

3377075 rows × 20 columns


In [8]:
# optionsDataFrame['quotedate'] = pd.to_datetime(optionsDataFrame['quotedate'])
# optionsDataFrame['expiration'] = pd.to_datetime(optionsDataFrame['expiration'])

In [9]:
toSave = optionsDataFrame[['optionroot',
                           'quotedate','expiration','underlying_last','type',
                           'strike','last','bid','ask','volume',
                           'impliedvol','delta', 'gamma', 'theta', 'vega']]
toSave


Out[9]:
optionroot quotedate expiration underlying_last type strike ... volume impliedvol delta gamma theta vega
0 SXO090919C00020000 09/01/2009 09/19/2009 100.20 call 20 ... 0 2.9366 0.9979 0.0105 -1.2618 0.1445
1 SXO090919C00024000 09/01/2009 09/19/2009 100.20 call 24 ... 0 2.6070 0.9976 0.0132 -1.2512 0.1611
2 SXO090919C00025000 09/01/2009 09/19/2009 100.20 call 25 ... 0 2.5337 0.9975 0.0139 -1.2485 0.1653
3 SXO090919C00026000 09/01/2009 09/19/2009 100.20 call 26 ... 0 2.4635 0.9975 0.0147 -1.2457 0.1695
4 SXO090919C00027000 09/01/2009 09/19/2009 100.20 call 27 ... 0 2.3964 0.9974 0.0155 -1.2438 0.1739
5 SIY090919C00028000 09/01/2009 09/19/2009 100.20 call 28 ... 0 2.3315 0.9973 0.0163 -1.2408 0.1782
6 SIY090919C00029000 09/01/2009 09/19/2009 100.20 call 29 ... 0 2.2690 0.9973 0.0172 -1.2372 0.1825
7 SIY090919C00030000 09/01/2009 09/19/2009 100.20 call 30 ... 4 2.2087 0.9972 0.0181 -1.2339 0.1868
8 SIY090919C00031000 09/01/2009 09/19/2009 100.20 call 31 ... 0 2.1503 0.9971 0.0190 -1.2299 0.1911
9 SIY090919C00032000 09/01/2009 09/19/2009 100.20 call 32 ... 0 2.0939 0.9970 0.0200 -1.2257 0.1955
... ... ... ... ... ... ... ... ... ... ... ... ... ...
679402 SPY161216P00255000 10/24/2014 12/17/2016 196.43 put 255 ... 0 0.2077 -0.7542 0.5268 -1.0551 90.6617
679403 SPY161216P00260000 10/24/2014 12/17/2016 196.43 put 260 ... 0 0.2121 -0.7671 0.5008 -1.0402 88.0257
679404 SPY161216P00265000 10/24/2014 12/17/2016 196.43 put 265 ... 0 0.2166 -0.7786 0.4765 -1.0266 85.5447
679405 SPY161216P00270000 10/24/2014 12/17/2016 196.43 put 270 ... 0 0.2217 -0.7882 0.4539 -1.0189 83.3749
679406 SPY161216P00275000 10/24/2014 12/17/2016 196.43 put 275 ... 0 0.2281 -0.7949 0.4329 -1.0259 81.8151
679407 SPY161216P00280000 10/24/2014 12/17/2016 196.43 put 280 ... 0 0.2338 -0.8017 0.4139 -1.0273 80.1846
679408 SPY161216P00285000 10/24/2014 12/17/2016 196.43 put 285 ... 0 0.2393 -0.8081 0.3964 -1.0268 78.6002
679409 SPY161216P00290000 10/24/2014 12/17/2016 196.43 put 290 ... 0 0.2441 -0.8147 0.3803 -1.0205 76.9170
679410 SPY161216P00295000 10/24/2014 12/17/2016 196.43 put 295 ... 0 0.2495 -0.8198 0.3656 -1.0223 75.5944
679411 SPY161216P00300000 10/24/2014 12/17/2016 196.43 put 300 ... 106 0.2549 -0.8244 0.3520 -1.0242 74.3589

3377075 rows × 15 columns


In [10]:
toSave.to_csv('data/pandas/SPY.csv')

In [ ]: