Import necessary modules


In [1]:
import time
import datetime
import pandas as pd
import numpy as np
import datetime as dt
from collections import OrderedDict
from copy import copy
import warnings
import matplotlib.pyplot as plt
%matplotlib inline

Filepath management


In [2]:
project_dir = r'/Users/hudson/Code/marketModel/'
price_path = project_dir + r'stock_price_data/'

"BarChart" api access


In [3]:
apikey = 'a207db3b2e61eac30ed9b9cd18b2e0d0'

def construct_barChart_url(sym, start_date, freq='minutes', interval='1', api_key=apikey):
    '''Function to construct barchart api url
    Get the barChart url for
    * sym -- the ticker symbol
    * start_date -- the earliest time in format yyyymmddhhmmss
    * freq -- ['days', 'hours', 'minutes', 'seconds']
    * api_key the barChart api key
    '''
    url = 'http://marketdata.websol.barchart.com/getHistory.csv?' +\
            'key={}&symbol={}&type={}&startDate={}'\
    .format(api_key, sym, freq, start_date)

    return url

In [4]:
api_test_url = construct_barChart_url('GOOG', '20170701000000', 'minutes', apikey)
goog = pd.read_csv(api_test_url, parse_dates=['timestamp'])

In [5]:
print goog.head()
print goog.describe()


  symbol           timestamp  tradingDay     open    high       low    close  \
0   GOOG 2017-06-07 13:30:00  2017-06-07  979.650  982.64  978.9001  982.640   
1   GOOG 2017-06-07 13:31:00  2017-06-07  983.315  983.94  981.3300  981.970   
2   GOOG 2017-06-07 13:32:00  2017-06-07  981.990  982.70  981.6200  982.649   
3   GOOG 2017-06-07 13:33:00  2017-06-07  982.200  982.56  981.3000  981.520   
4   GOOG 2017-06-07 13:34:00  2017-06-07  981.920  982.26  981.5400  982.090   

   volume  
0   25071  
1    7533  
2    4347  
3    8057  
4    1329  
               open          high           low         close         volume
count  23938.000000  23938.000000  23938.000000  23938.000000   23938.000000
mean     937.034462    937.280703    936.776159    937.023761    2785.592823
std       19.890428     19.879601     19.901709     19.888237    8059.826463
min      894.860000    895.740000    894.790000    895.000000     100.000000
25%      921.810000    922.010000    921.580000    921.800000     700.000000
50%      931.650000    931.902350    931.330000    931.620000    1450.000000
75%      953.150000    953.370000    952.860000    953.103750    2979.750000
max      985.850000    986.200000    985.680000    985.860000  597376.000000

Now start pulling in data for our sample stock symbols


In [22]:
symbols = pd.read_csv(project_dir + 'data/stock_data/symbols.csv')

In [36]:
# Pull data for all the test symbols
days_prior_to_now = 30
current = datetime.datetime.now()
starttime = (current - datetime.timedelta(days=days_prior_to_now)).strftime('%Y%m%d%H%M00')
print "stock data start time: " + starttime
prices = {}

for i, symbol in enumerate(symbols.ticker_symbol):
    print "Progress: {} of {} tickers. Current ticker: {}".format(i, len(symbols), symbol)
    
    # Construct the appropriate URL
    url = construct_barChart_url(symbol, starttime, 'minutes', apikey)
        
    try:
        # Read the data from the url
        data = pd.read_csv(url, parse_dates=['timestamp']).set_index('timestamp')

        # Drop the symbol and trading day columns
        data = data.drop(['symbol','tradingDay'], axis=1)

        # Convert the times to eastern time zone
        data.index = data.index.tz_localize('utc').tz_convert('US/Eastern')

        # Add data to prices dictionary
        prices[symbol] = data
    except:
        print "Failed to load data for " + symbol
        continue


stock data start time: 20170808161300
Progress: 0 of 26 tickers. Current ticker: ARDM
Progress: 1 of 26 tickers. Current ticker: MICR
Progress: 2 of 26 tickers. Current ticker: AVIR
Progress: 3 of 26 tickers. Current ticker: SEAC
Progress: 4 of 26 tickers. Current ticker: CPST
Progress: 5 of 26 tickers. Current ticker: GBR
Progress: 6 of 26 tickers. Current ticker: DTRM
Progress: 7 of 26 tickers. Current ticker: WTT
Progress: 8 of 26 tickers. Current ticker: ASTC
Progress: 9 of 26 tickers. Current ticker: REFR
Progress: 10 of 26 tickers. Current ticker: JVA
Progress: 11 of 26 tickers. Current ticker: ATLC
Progress: 12 of 26 tickers. Current ticker: CYAN
Progress: 13 of 26 tickers. Current ticker: VSR
Progress: 14 of 26 tickers. Current ticker: MOC
Progress: 15 of 26 tickers. Current ticker: JCS
Progress: 16 of 26 tickers. Current ticker: CODA
Progress: 17 of 26 tickers. Current ticker: PAVM
Progress: 18 of 26 tickers. Current ticker: FORD
Progress: 19 of 26 tickers. Current ticker: LRAD
Progress: 20 of 26 tickers. Current ticker: UTSI
Progress: 21 of 26 tickers. Current ticker: APHB
Progress: 22 of 26 tickers. Current ticker: LEU
Progress: 23 of 26 tickers. Current ticker: BNSO
Progress: 24 of 26 tickers. Current ticker: ZDGE
Progress: 25 of 26 tickers. Current ticker: WSTL

In [37]:
# Concatenate all of the stock data into a multiIndex dataframe
stock_data = pd.concat(prices.values(), keys=prices.keys())
stock_data.index.set_names(['ticker', 'timestamp'], inplace=True)
stock_data


Out[37]:
open high low close volume
ticker timestamp
ASTC 2017-08-09 09:30:00-04:00 0.7900 0.7900 0.7900 0.7900 300
2017-08-09 09:35:00-04:00 0.7900 0.7900 0.7900 0.7900 300
2017-08-09 10:03:00-04:00 0.8399 0.8399 0.8399 0.8399 100
2017-08-09 10:31:00-04:00 0.8200 0.8399 0.8200 0.8399 1686
2017-08-09 10:32:00-04:00 0.8100 0.8100 0.8100 0.8100 400
2017-08-09 10:38:00-04:00 0.8162 0.8162 0.8162 0.8162 5000
2017-08-09 11:01:00-04:00 0.8382 0.8382 0.8382 0.8382 1200
2017-08-09 11:49:00-04:00 0.8228 0.8228 0.8228 0.8228 500
2017-08-09 12:06:00-04:00 0.8300 0.8300 0.8100 0.8100 200
2017-08-09 12:21:00-04:00 0.8380 0.8380 0.8380 0.8380 999
2017-08-09 13:27:00-04:00 0.8300 0.8300 0.8300 0.8300 100
2017-08-09 13:53:00-04:00 0.8300 0.8300 0.8300 0.8300 100
2017-08-09 15:21:00-04:00 0.8130 0.8130 0.8130 0.8130 108
2017-08-09 15:56:00-04:00 0.8205 0.8205 0.8205 0.8205 1500
2017-08-09 15:57:00-04:00 0.8200 0.8200 0.8200 0.8200 1500
2017-08-09 16:00:00-04:00 0.8100 0.8100 0.8100 0.8100 303
2017-08-10 09:33:00-04:00 0.8199 0.8199 0.8199 0.8199 100
2017-08-10 09:38:00-04:00 0.8101 0.8101 0.8100 0.8100 7040
2017-08-10 09:42:00-04:00 0.8109 0.8109 0.8109 0.8109 1000
2017-08-10 09:52:00-04:00 0.8100 0.8125 0.8100 0.8125 1000
2017-08-10 10:01:00-04:00 0.8100 0.8100 0.8100 0.8100 100
2017-08-10 12:21:00-04:00 0.8150 0.8200 0.8150 0.8200 2000
2017-08-10 12:22:00-04:00 0.8200 0.8500 0.8149 0.8500 4701
2017-08-10 12:23:00-04:00 0.8350 0.8600 0.8163 0.8363 19654
2017-08-10 12:24:00-04:00 0.8150 0.8300 0.8150 0.8200 5900
2017-08-10 12:25:00-04:00 0.8000 0.8000 0.8000 0.8000 600
2017-08-10 12:52:00-04:00 0.8012 0.8024 0.8000 0.8024 5480
2017-08-10 13:48:00-04:00 0.8100 0.8100 0.8100 0.8100 500
2017-08-10 13:49:00-04:00 0.8100 0.8100 0.8100 0.8100 4500
2017-08-10 14:11:00-04:00 0.8150 0.8150 0.8150 0.8150 100
... ... ... ... ... ... ...
MOC 2017-09-01 13:43:00-04:00 2.9300 2.9700 2.9300 2.9600 3951
2017-09-01 14:20:00-04:00 2.9632 2.9632 2.9632 2.9632 494
2017-09-01 15:17:00-04:00 2.9600 2.9600 2.9600 2.9600 200
2017-09-01 15:19:00-04:00 2.9600 2.9600 2.9600 2.9600 359
2017-09-01 15:20:00-04:00 2.9600 2.9600 2.9600 2.9600 100
2017-09-01 15:37:00-04:00 3.0000 3.0200 3.0000 3.0200 5402
2017-09-01 15:38:00-04:00 3.0000 3.0000 3.0000 3.0000 8301
2017-09-01 15:45:00-04:00 3.0200 3.0200 2.9900 2.9900 200
2017-09-05 09:35:00-04:00 2.8500 2.8500 2.8500 2.8500 100
2017-09-05 09:45:00-04:00 3.0325 3.0325 3.0325 3.0325 750
2017-09-05 09:46:00-04:00 3.0000 3.0045 2.8800 2.9161 4701
2017-09-05 09:47:00-04:00 2.8600 2.8600 2.8600 2.8600 100
2017-09-05 10:05:00-04:00 2.9600 2.9600 2.9600 2.9600 100
2017-09-05 10:15:00-04:00 2.9500 2.9500 2.9500 2.9500 700
2017-09-05 10:16:00-04:00 2.9500 3.0900 2.9500 3.0101 8100
2017-09-05 10:17:00-04:00 3.0100 3.0216 2.9900 3.0216 10339
2017-09-05 10:18:00-04:00 2.9500 2.9500 2.9500 2.9500 1800
2017-09-05 10:50:00-04:00 2.9200 2.9200 2.8600 2.9092 1300
2017-09-05 10:51:00-04:00 2.9099 2.9500 2.8700 2.9500 3800
2017-09-05 10:52:00-04:00 2.8800 2.9200 2.8800 2.9200 300
2017-09-05 10:58:00-04:00 2.9700 2.9700 2.9700 2.9700 100
2017-09-05 12:12:00-04:00 2.9600 2.9600 2.9600 2.9600 100
2017-09-05 13:47:00-04:00 2.8800 2.9300 2.8800 2.9300 4700
2017-09-05 13:48:00-04:00 2.9700 2.9700 2.9700 2.9700 200
2017-09-05 13:49:00-04:00 3.0200 3.0396 3.0000 3.0000 8500
2017-09-05 15:43:00-04:00 2.9400 2.9400 2.9000 2.9000 900
2017-09-05 15:44:00-04:00 2.9300 2.9800 2.8500 2.9800 400
2017-09-05 16:00:00-04:00 2.9900 2.9900 2.9900 2.9900 300
2017-09-06 10:30:00-04:00 2.9800 2.9800 2.9800 2.9800 100
2017-09-06 11:35:00-04:00 2.9800 2.9800 2.9800 2.9800 100

15160 rows × 5 columns

Write to hdf


In [38]:
stock_data.to_hdf(project_dir + 'data/stock_data/raw_stock_data.hdf', 'table')

In [39]:
stock_data.reset_index().loc[:,('ticker', 'timestamp')].groupby('ticker').agg([np.min,np.max])


Out[39]:
timestamp
amin amax
ticker
APHB 2017-08-09 09:30:00-04:00 2017-09-06 11:55:00-04:00
ARDM 2017-08-09 09:30:00-04:00 2017-09-06 11:53:00-04:00
ASTC 2017-08-09 09:30:00-04:00 2017-09-06 11:51:00-04:00
ATLC 2017-08-09 09:30:00-04:00 2017-09-06 10:53:00-04:00
AVIR 2017-08-09 09:30:00-04:00 2017-09-06 11:51:00-04:00
BNSO 2017-08-09 10:05:00-04:00 2017-09-05 16:00:00-04:00
CODA 2017-08-09 10:30:00-04:00 2017-09-06 11:08:00-04:00
CPST 2017-08-09 09:30:00-04:00 2017-09-06 11:58:00-04:00
CYAN 2017-08-09 12:34:00-04:00 2017-09-06 10:34:00-04:00
DTRM 2017-08-09 09:30:00-04:00 2017-09-06 11:58:00-04:00
FORD 2017-08-09 09:30:00-04:00 2017-09-06 11:41:00-04:00
GBR 2017-08-09 09:38:00-04:00 2017-09-06 11:59:00-04:00
JCS 2017-08-09 09:30:00-04:00 2017-09-06 11:54:00-04:00
JVA 2017-08-09 10:04:00-04:00 2017-09-06 10:47:00-04:00
LEU 2017-08-09 09:30:00-04:00 2017-09-06 11:43:00-04:00
LRAD 2017-08-09 09:30:00-04:00 2017-09-06 11:08:00-04:00
MICR 2017-08-09 10:05:00-04:00 2017-09-06 11:35:00-04:00
MOC 2017-08-09 10:27:00-04:00 2017-09-06 11:35:00-04:00
PAVM 2017-08-09 11:49:00-04:00 2017-09-06 11:53:00-04:00
REFR 2017-08-09 09:30:00-04:00 2017-09-06 11:58:00-04:00
SEAC 2017-08-09 09:45:00-04:00 2017-09-06 11:59:00-04:00
UTSI 2017-08-09 09:45:00-04:00 2017-09-06 11:51:00-04:00
VSR 2017-08-09 09:30:00-04:00 2017-09-06 11:57:00-04:00
WSTL 2017-08-09 09:30:00-04:00 2017-09-06 11:20:00-04:00
WTT 2017-08-09 10:14:00-04:00 2017-09-06 11:58:00-04:00
ZDGE 2017-08-09 09:30:00-04:00 2017-09-06 10:38:00-04:00

In [ ]: