In [2]:
#  Joel O'Neil
#  PDS - Final Project
#  
#  stkDf 1.0 (stock Dataframe version 1.0): Python module - retrieves stock quote data from Yahoo Finance
#  and enables you to create concatenated dataframes for data analysis on daily stock performance
#  as compared to the S&P 500 Index
#  
#  Copyright (c) 2013 Joel O'Neil (joel.oneil@hotmail.com)
#

#Importing all Necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas.io.data as web
import datetime
%matplotlib inline

####Creating custom libraries#####

#Global Variables to be initialized
start = datetime.datetime(2013, 9, 10)
end = datetime.datetime(2013, 12, 6)
trend_loc = '/Users/joeloneil/Python/newdata1.csv'

#reading index data from Yahoo! finance
def index_price(stock_symbol, stock_name, start, end):
    read_data = web.DataReader(stock_symbol, 'yahoo', start, end)
    price = read_data['Adj Close'].pct_change()
    price.name = stock_name+'_Price'
    return price

#loading S&P pct change data
sandp_price = index_price("^GSPC", "S&P", start, end)

#reading price data from Yahoo! finance
def stk_price(stock_symbol, stock_name, start, end):
    read_data = web.DataReader(stock_symbol, 'yahoo', start, end)
    price = read_data['Adj Close'].pct_change()
    price.name = stock_symbol+'_Price'
    return price

#reading volume data from Yahoo! finance
def stk_vol(stock_symbol, stock_name, start, end):
    read_data = web.DataReader(stock_symbol, 'yahoo', start, end)
    vol = read_data['Volume'].pct_change()
    vol.name = stock_symbol+'_Vol'
    return vol

#loading google trend data
def stk_trend(trend_loc, stock_name, stock_symbol):
    read_trend = pd.read_csv(trend_loc, sep=',', index_col='Day')
    trend = read_trend[stock_name].astype(float)
    trend.name = stock_symbol+'_Trend'
    return trend

###creating stock dataframe####
def stkDf(stock_symbol,
          stock_name,
          start,
          end,
          trend_loc,
          sandp_price,
          targetV):
    stkDf_price = stk_price(stock_symbol, stock_name, start, end)
    stkDf_vol = stk_vol(stock_symbol, stock_name, start, end)
    stkDf_trend = stk_trend(trend_loc, stock_name, stock_symbol)
    stock_dataframe = pd.concat([stkDf_trend, stkDf_vol, stkDf_price, sandp_price], join='outer', axis = 1)
    stkDf_tv = target_variable(stkDf_price, sandp_price)
    stock_dataframe[targetV] = stkDf_tv
    return stock_dataframe

    
###calculating target variable####
def target_variable(stock_price, sandp_price):
    n = 0
    tvlist = []
#iterating and comparing stock and S&P data then populating target variable list
    for n in range(0, len(stock_price)-1):
        tvariable = int(stock_price[n] > sandp_price[n+1])
        tvlist.append(tvariable)
        n = 1 + n
#needed to append final value
    tvlist.append('??')
    return tvlist

In [3]:
###Generating 'sample' Training data for 2 stocks by creating & contatenating dataframes####

#Creating Dataframes
apple = stkDf('AAPL', 'Apple', start, end, trend_loc, sandp_price, 'Apple_TV')
amazon = stkDf('AMZN', 'Amazon', start, end, trend_loc, sandp_price, 'Amazon_TV')

#concatenating dataframes
training_data = pd.concat([apple, amazon,], join='outer', axis = 1)

#printing sample
print training_data.head()


         AAPL_Trend  AAPL_Vol  AAPL_Price  S&P_Price Apple_TV  AMZN_Trend  \
Day                                                                         
9/10/13         100       NaN         NaN        NaN        0          51   
9/11/13          58  0.209233   -0.054436   0.003052        0          50   
9/12/13          35 -0.550403    0.010645  -0.003380        1          50   
9/13/13          31 -0.260402   -0.016491   0.002715        0          49   
9/16/13          26  0.819418   -0.031783   0.005693        0          52   

         AMZN_Vol  AMZN_Price  S&P_Price Amazon_TV  
Day                                                 
9/10/13       NaN         NaN        NaN         0  
9/11/13 -0.020130   -0.002397   0.003052         1  
9/12/13 -0.078274   -0.002603  -0.003380         0  
9/13/13 -0.159133   -0.003145   0.002715         0  
9/16/13  0.338960   -0.006243   0.005693         0  

In [ ]:
###Generating Training data for all 10 stocks by creating & contatenating dataframes####

#main function calls

#Creating Dataframes
apple = stkDf('AAPL', 'Apple', start, end, trend_loc, sandp_price, 'Apple_TV')
amazon = stkDf('AMZN', 'Amazon', start, end, trend_loc, sandp_price, 'Amazon_TV')
cisco = stkDf('CSCO', 'Cisco', start, end, trend_loc, sandp_price, 'Cisco_TV')
facebook = stkDf('FB', 'Facebook', start, end, trend_loc, sandp_price, 'Facebook_TV')
hp = stkDf('HPQ', 'HP', start, end, trend_loc, sandp_price, 'HP_TV')
ibm = stkDf('IBM', 'IBM', start, end, trend_loc, sandp_price, 'IBM_TV')
intel = stkDf('INTC', 'Intel', start, end, trend_loc, sandp_price, 'Intel_TV')
linkedin = stkDf('LNKD', 'LinkedIn', start, end, trend_loc, sandp_price, 'LinkedIn_TV')
microsoft = stkDf('MSFT', 'Microsoft', start, end, trend_loc, sandp_price, 'Microsoft_TV')
yahoo = stkDf('YHOO', 'Yahoo!', start, end, trend_loc, sandp_price, 'Yahoo!_TV')

#concatenating dataframes
training_data = pd.concat([apple,
                           amazon,
                           cisco,
                           facebook,
                           hp,
                           ibm,
                           intel,
                           linkedin,
                           microsoft,
                           yahoo], join='outer', axis = 1)


#code provided below (commented out) to execute conversion to .csv file

#training_data.to_csv('/Users/joeloneil/Python/StockModel_TrainingData.csv')

######## NOTE: output included in .csv file posted on Github. Link included below: #########
##### --> https://github.com/joeljoneil/PDS_Fall2013_Projects/blob/master/StockModel_TrainingData.csv