In [2]:
# Joel O'Neil
# PDS - Final Project
#
# stkDf 1.0 (stock Dataframe version 1.0): Python module - retrieves stock quote data from Yahoo Finance
# and enables you to create concatenated dataframes for data analysis on daily stock performance
# as compared to the S&P 500 Index
#
# Copyright (c) 2013 Joel O'Neil (joel.oneil@hotmail.com)
#
#Importing all Necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas.io.data as web
import datetime
%matplotlib inline
####Creating custom libraries#####
#Global Variables to be initialized
start = datetime.datetime(2013, 9, 10)
end = datetime.datetime(2013, 12, 6)
trend_loc = '/Users/joeloneil/Python/newdata1.csv'
#reading index data from Yahoo! finance
def index_price(stock_symbol, stock_name, start, end):
read_data = web.DataReader(stock_symbol, 'yahoo', start, end)
price = read_data['Adj Close'].pct_change()
price.name = stock_name+'_Price'
return price
#loading S&P pct change data
sandp_price = index_price("^GSPC", "S&P", start, end)
#reading price data from Yahoo! finance
def stk_price(stock_symbol, stock_name, start, end):
read_data = web.DataReader(stock_symbol, 'yahoo', start, end)
price = read_data['Adj Close'].pct_change()
price.name = stock_symbol+'_Price'
return price
#reading volume data from Yahoo! finance
def stk_vol(stock_symbol, stock_name, start, end):
read_data = web.DataReader(stock_symbol, 'yahoo', start, end)
vol = read_data['Volume'].pct_change()
vol.name = stock_symbol+'_Vol'
return vol
#loading google trend data
def stk_trend(trend_loc, stock_name, stock_symbol):
read_trend = pd.read_csv(trend_loc, sep=',', index_col='Day')
trend = read_trend[stock_name].astype(float)
trend.name = stock_symbol+'_Trend'
return trend
###creating stock dataframe####
def stkDf(stock_symbol,
stock_name,
start,
end,
trend_loc,
sandp_price,
targetV):
stkDf_price = stk_price(stock_symbol, stock_name, start, end)
stkDf_vol = stk_vol(stock_symbol, stock_name, start, end)
stkDf_trend = stk_trend(trend_loc, stock_name, stock_symbol)
stock_dataframe = pd.concat([stkDf_trend, stkDf_vol, stkDf_price, sandp_price], join='outer', axis = 1)
stkDf_tv = target_variable(stkDf_price, sandp_price)
stock_dataframe[targetV] = stkDf_tv
return stock_dataframe
###calculating target variable####
def target_variable(stock_price, sandp_price):
n = 0
tvlist = []
#iterating and comparing stock and S&P data then populating target variable list
for n in range(0, len(stock_price)-1):
tvariable = int(stock_price[n] > sandp_price[n+1])
tvlist.append(tvariable)
n = 1 + n
#needed to append final value
tvlist.append('??')
return tvlist
In [3]:
###Generating 'sample' Training data for 2 stocks by creating & contatenating dataframes####
#Creating Dataframes
apple = stkDf('AAPL', 'Apple', start, end, trend_loc, sandp_price, 'Apple_TV')
amazon = stkDf('AMZN', 'Amazon', start, end, trend_loc, sandp_price, 'Amazon_TV')
#concatenating dataframes
training_data = pd.concat([apple, amazon,], join='outer', axis = 1)
#printing sample
print training_data.head()
In [ ]:
###Generating Training data for all 10 stocks by creating & contatenating dataframes####
#main function calls
#Creating Dataframes
apple = stkDf('AAPL', 'Apple', start, end, trend_loc, sandp_price, 'Apple_TV')
amazon = stkDf('AMZN', 'Amazon', start, end, trend_loc, sandp_price, 'Amazon_TV')
cisco = stkDf('CSCO', 'Cisco', start, end, trend_loc, sandp_price, 'Cisco_TV')
facebook = stkDf('FB', 'Facebook', start, end, trend_loc, sandp_price, 'Facebook_TV')
hp = stkDf('HPQ', 'HP', start, end, trend_loc, sandp_price, 'HP_TV')
ibm = stkDf('IBM', 'IBM', start, end, trend_loc, sandp_price, 'IBM_TV')
intel = stkDf('INTC', 'Intel', start, end, trend_loc, sandp_price, 'Intel_TV')
linkedin = stkDf('LNKD', 'LinkedIn', start, end, trend_loc, sandp_price, 'LinkedIn_TV')
microsoft = stkDf('MSFT', 'Microsoft', start, end, trend_loc, sandp_price, 'Microsoft_TV')
yahoo = stkDf('YHOO', 'Yahoo!', start, end, trend_loc, sandp_price, 'Yahoo!_TV')
#concatenating dataframes
training_data = pd.concat([apple,
amazon,
cisco,
facebook,
hp,
ibm,
intel,
linkedin,
microsoft,
yahoo], join='outer', axis = 1)
#code provided below (commented out) to execute conversion to .csv file
#training_data.to_csv('/Users/joeloneil/Python/StockModel_TrainingData.csv')
######## NOTE: output included in .csv file posted on Github. Link included below: #########
##### --> https://github.com/joeljoneil/PDS_Fall2013_Projects/blob/master/StockModel_TrainingData.csv