In [1]:
import time
import datetime
import pandas as pd
import numpy as np
import datetime as dt
from collections import OrderedDict
from copy import copy
import warnings
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
project_dir = r'/Users/hudson/Code/marketModel/'
price_path = project_dir + r'stock_price_data/'
In [3]:
apikey = 'a207db3b2e61eac30ed9b9cd18b2e0d0'
def construct_barChart_url(sym, start_date, freq='minutes', interval='1', api_key=apikey):
'''Function to construct barchart api url
Get the barChart url for
* sym -- the ticker symbol
* start_date -- the earliest time in format yyyymmddhhmmss
* freq -- ['days', 'hours', 'minutes', 'seconds']
* api_key the barChart api key
'''
url = 'http://marketdata.websol.barchart.com/getHistory.csv?' +\
'key={}&symbol={}&type={}&startDate={}'\
.format(api_key, sym, freq, start_date)
return url
In [4]:
api_test_url = construct_barChart_url('GOOG', '20170701000000', 'minutes', apikey)
goog = pd.read_csv(api_test_url, parse_dates=['timestamp'])
In [5]:
print goog.head()
print goog.describe()
In [22]:
symbols = pd.read_csv(project_dir + 'data/stock_data/symbols.csv')
In [36]:
# Pull data for all the test symbols
days_prior_to_now = 30
current = datetime.datetime.now()
starttime = (current - datetime.timedelta(days=days_prior_to_now)).strftime('%Y%m%d%H%M00')
print "stock data start time: " + starttime
prices = {}
for i, symbol in enumerate(symbols.ticker_symbol):
print "Progress: {} of {} tickers. Current ticker: {}".format(i, len(symbols), symbol)
# Construct the appropriate URL
url = construct_barChart_url(symbol, starttime, 'minutes', apikey)
try:
# Read the data from the url
data = pd.read_csv(url, parse_dates=['timestamp']).set_index('timestamp')
# Drop the symbol and trading day columns
data = data.drop(['symbol','tradingDay'], axis=1)
# Convert the times to eastern time zone
data.index = data.index.tz_localize('utc').tz_convert('US/Eastern')
# Add data to prices dictionary
prices[symbol] = data
except:
print "Failed to load data for " + symbol
continue
In [37]:
# Concatenate all of the stock data into a multiIndex dataframe
stock_data = pd.concat(prices.values(), keys=prices.keys())
stock_data.index.set_names(['ticker', 'timestamp'], inplace=True)
stock_data
Out[37]:
In [38]:
stock_data.to_hdf(project_dir + 'data/stock_data/raw_stock_data.hdf', 'table')
In [39]:
stock_data.reset_index().loc[:,('ticker', 'timestamp')].groupby('ticker').agg([np.min,np.max])
Out[39]:
In [ ]: