In [2]:
#
# stkDf 1.0 (stock Dataframe version 1.0): Python module - retrieves stock quote data from Yahoo Finance
# and enables you to create concatenated dataframes for data analysis on daily stock performance
# as compared to the S&P 500 Index
#
# Copyright (c) 2013 Joel O'Neil (joel.oneil@hotmail.com)
#
# license: GNU LGPL
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# Requires: Python 2.7/3.2+
#Importing all Necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas.io.data as web
import datetime
%matplotlib inline
####Creating custom libraries#####
#Global Variables to be initialized
#stock_symbol = "AAPL"
#stock_name = "Apple"
start = datetime.datetime(2013, 9, 10)
end = datetime.datetime(2013, 12, 6)
trend_loc = '/Users/joeloneil/Python/newdata1.csv'
#reading price data from Yahoo! finance
def index_price(stock_symbol, stock_name, start, end):
read_data = web.DataReader(stock_symbol, 'yahoo', start, end)
price = read_data['Adj Close'].pct_change()
price.name = stock_name+'_Price'
return price
#loading S&P pct change data
sandp_price = index_price("^GSPC", "S&P", start, end)
#reading price data from Yahoo! finance
def stk_price(stock_symbol, stock_name, start, end):
read_data = web.DataReader(stock_symbol, 'yahoo', start, end)
price = read_data['Adj Close'].pct_change()
price.name = stock_symbol+'_Price'
return price
#reading volume data from Yahoo! finance
def stk_vol(stock_symbol, stock_name, start, end):
read_data = web.DataReader(stock_symbol, 'yahoo', start, end)
vol = read_data['Volume'].pct_change()
vol.name = stock_symbol+'_Vol'
return vol
#loading google trend data
def stk_trend(trend_loc, stock_name, stock_symbol):
read_trend = pd.read_csv(trend_loc, sep=',', index_col='Day')
trend = read_trend[stock_name].astype(float)
trend.name = stock_symbol+'_Trend'
return trend
###creating stock dataframe####
def stkDf(stock_symbol,
stock_name,
start,
end,
trend_loc,
sandp_price,
targetV):
stkDf_price = stk_price(stock_symbol, stock_name, start, end)
stkDf_vol = stk_vol(stock_symbol, stock_name, start, end)
stkDf_trend = stk_trend(trend_loc, stock_name, stock_symbol)
stock_dataframe = pd.concat([stkDf_trend, stkDf_vol, stkDf_price, sandp_price], join='outer', axis = 1)
stkDf_tv = target_variable(stkDf_price, sandp_price)
stock_dataframe[targetV] = stkDf_tv
return stock_dataframe
###calculating target variable####
def target_variable(stock_price, sandp_price):
n = 0
tvlist = []
#iterating and comparing stock and S&P data then populating target variable list
for n in range(0, len(stock_price)-1):
tvariable = int(stock_price[n] > sandp_price[n+1])
tvlist.append(tvariable)
n = 1 + n
#needed to append final value
tvlist.append('??')
return tvlist
In [3]:
###Generating Training data for all 10 stocks by creating & contatenating dataframes####
#Creating Dataframes
apple = stkDf('AAPL', 'Apple', start, end, trend_loc, sandp_price, 'Apple_TV')
amazon = stkDf('AMZN', 'Amazon', start, end, trend_loc, sandp_price, 'Amazon_TV')
#concatenating dataframes
training_data = pd.concat([apple, amazon,], join='outer', axis = 1)
#printing sample
print training_data.head()
#create csv
#training_data.to_csv('/Users/joeloneil/Python/StockModel_TrainingData.csv')