Part 8

Here we continue with the python for finance series and visualize the big dataframe we just created.


In [1]:
%matplotlib qt
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
import pandas as pd

style.use('ggplot')

In [2]:
df = pd.read_csv('data/sp500_joined_closes.csv', index_col=0)

In [3]:
def visualize_data(df):
    df_corr = df.corr()
#     print(df_corr.head())
    
    data = df_corr.values
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    
    heatmap = ax.pcolor(data, cmap=plt.cm.RdYlGn)
    fig.colorbar(heatmap)
    ax.set_xticks(np.arange(data.shape[0]) + 0.5, minor=False)
    ax.set_yticks(np.arange(data.shape[1]) + 0.5, minor=False)
    ax.invert_yaxis()
    ax.xaxis.tick_top()
    
    column_labels = df_corr.columns
    row_labels = df_corr.index
    
    ax.set_xticklabels(column_labels)
    ax.set_yticklabels(row_labels)
    plt.xticks(rotation=90)
    heatmap.set_clim(-1, 1)
    plt.tight_layout()
    
#     df['AAPL'].plot()

In [4]:
# Careful! This will produce a BIG graph.
visualize_data(df)

Part 9

Here we start preparing the data for ML


In [5]:
import pickle

In [6]:
def process_data_for_labels(ticker, df):
    hm_days = 7 # How many days in the future are we looking
    tickers = df.columns.values
    df.fillna(0, inplace=True)
    
    for i in range(1, hm_days+1):
        df['{}_{}d'.format(ticker, i)] =\
        (df[ticker].shift(-i) - df[ticker]) / df[ticker]
        
    df.fillna(0, inplace=True)
    return tickers, df

In [7]:
process_data_for_labels('XOM', df)


Out[7]:
(array(['MMM', 'ABT', 'ABBV', 'ACN', 'ATVI', 'AYI', 'ADBE', 'AAP', 'AES',
        'AET', 'AMG', 'AFL', 'A', 'APD', 'AKAM', 'ALK', 'ALB', 'AGN', 'LNT',
        'ALXN', 'ALLE', 'ADS', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AEE',
        'AAL', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'ABC', 'AME',
        'AMGN', 'APH', 'APC', 'ADI', 'ANTM', 'AON', 'APA', 'AIV', 'AAPL',
        'AMAT', 'ADM', 'ARNC', 'AJG', 'AIZ', 'T', 'ADSK', 'ADP', 'AN',
        'AZO', 'AVB', 'AVY', 'BHI', 'BLL', 'BAC', 'BK', 'BCR', 'BAX', 'BBT',
        'BDX', 'BBBY', 'BRK-B', 'BBY', 'BIIB', 'BLK', 'HRB', 'BA', 'BWA',
        'BXP', 'BSX', 'BMY', 'AVGO', 'BF-B', 'CHRW', 'CA', 'COG', 'CPB',
        'COF', 'CAH', 'HSIC', 'KMX', 'CCL', 'CAT', 'CBG', 'CBS', 'CELG',
        'CNC', 'CNP', 'CTL', 'CERN', 'CF', 'SCHW', 'CHTR', 'CHK', 'CVX',
        'CMG', 'CB', 'CHD', 'CI', 'XEC', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG',
        'CTXS', 'CLX', 'CME', 'CMS', 'COH', 'KO', 'CTSH', 'CL', 'CMCSA',
        'CMA', 'CAG', 'CXO', 'COP', 'ED', 'STZ', 'GLW', 'COST', 'COTY',
        'CCI', 'CSRA', 'CSX', 'CMI', 'CVS', 'DHI', 'DHR', 'DRI', 'DVA',
        'DE', 'DLPH', 'DAL', 'XRAY', 'DVN', 'DLR', 'DFS', 'DISCA', 'DISCK',
        'DG', 'DLTR', 'D', 'DOV', 'DOW', 'DPS', 'DTE', 'DD', 'DUK', 'DNB',
        'ETFC', 'EMN', 'ETN', 'EBAY', 'ECL', 'EIX', 'EW', 'EA', 'EMR',
        'ENDP', 'ETR', 'EVHC', 'EOG', 'EQT', 'EFX', 'EQIX', 'EQR', 'ESS',
        'EL', 'ES', 'EXC', 'EXPE', 'EXPD', 'ESRX', 'EXR', 'XOM', 'FFIV',
        'FB', 'FAST', 'FRT', 'FDX', 'FIS', 'FITB', 'FSLR', 'FE', 'FISV',
        'FLIR', 'FLS', 'FLR', 'FMC', 'FTI', 'FL', 'F', 'FTV', 'FBHS', 'BEN',
        'FCX', 'FTR', 'GPS', 'GRMN', 'GD', 'GE', 'GGP', 'GIS', 'GM', 'GPC',
        'GILD', 'GPN', 'GS', 'GT', 'GWW', 'HAL', 'HBI', 'HOG', 'HAR', 'HRS',
        'HIG', 'HAS', 'HCA', 'HCP', 'HP', 'HES', 'HPE', 'HOLX', 'HD', 'HON',
        'HRL', 'HST', 'HPQ', 'HUM', 'HBAN', 'IDXX', 'ITW', 'ILMN', 'IR',
        'INTC', 'ICE', 'IBM', 'IP', 'IPG', 'IFF', 'INTU', 'ISRG', 'IVZ',
        'IRM', 'JEC', 'JBHT', 'SJM', 'JNJ', 'JCI', 'JPM', 'JNPR', 'KSU',
        'K', 'KEY', 'KMB', 'KIM', 'KMI', 'KLAC', 'KSS', 'KHC', 'KR', 'LB',
        'LLL', 'LH', 'LRCX', 'LEG', 'LEN', 'LVLT', 'LUK', 'LLY', 'LNC',
        'LLTC', 'LKQ', 'LMT', 'L', 'LOW', 'LYB', 'MTB', 'MAC', 'M', 'MNK',
        'MRO', 'MPC', 'MAR', 'MMC', 'MLM', 'MAS', 'MA', 'MAT', 'MKC', 'MCD',
        'MCK', 'MJN', 'MDT', 'MRK', 'MET', 'MTD', 'KORS', 'MCHP', 'MU',
        'MSFT', 'MAA', 'MHK', 'TAP', 'MDLZ', 'MON', 'MNST', 'MCO', 'MS',
        'MOS', 'MSI', 'MUR', 'MYL', 'NDAQ', 'NOV', 'NAVI', 'NTAP', 'NFLX',
        'NWL', 'NFX', 'NEM', 'NWSA', 'NWS', 'NEE', 'NLSN', 'NKE', 'NI',
        'NBL', 'JWN', 'NSC', 'NTRS', 'NOC', 'NRG', 'NUE', 'NVDA', 'ORLY',
        'OXY', 'OMC', 'OKE', 'ORCL', 'PCAR', 'PH', 'PDCO', 'PAYX', 'PYPL',
        'PNR', 'PBCT', 'PEP', 'PKI', 'PRGO', 'PFE', 'PCG', 'PM', 'PSX',
        'PNW', 'PXD', 'PBI', 'PNC', 'RL', 'PPG', 'PPL', 'PX', 'PCLN', 'PFG',
        'PG', 'PGR', 'PLD', 'PRU', 'PEG', 'PSA', 'PHM', 'PVH', 'QRVO',
        'PWR', 'QCOM', 'DGX', 'RRC', 'RTN', 'O', 'RHT', 'REGN', 'RF', 'RSG',
        'RAI', 'RHI', 'ROK', 'COL', 'ROP', 'ROST', 'RCL', 'R', 'CRM', 'SCG',
        'SLB', 'SNI', 'STX', 'SEE', 'SRE', 'SHW', 'SIG', 'SPG', 'SWKS',
        'SLG', 'SNA', 'SO', 'LUV', 'SWN', 'SE', 'SPGI', 'SWK', 'SPLS',
        'SBUX', 'STT', 'SRCL', 'SYK', 'STI', 'SYMC', 'SYF', 'SYY', 'TROW',
        'TGT', 'TEL', 'TGNA', 'TDC', 'TSO', 'TXN', 'TXT', 'COO', 'HSY',
        'TRV', 'TMO', 'TIF', 'TWX', 'TJX', 'TMK', 'TSS', 'TSCO', 'TDG',
        'RIG', 'TRIP', 'FOXA', 'FOX', 'TSN', 'UDR', 'ULTA', 'USB', 'UA',
        'UAA', 'UNP', 'UAL', 'UNH', 'UPS', 'URI', 'UTX', 'UHS', 'UNM',
        'URBN', 'VFC', 'VLO', 'VAR', 'VTR', 'VRSN', 'VRSK', 'VZ', 'VRTX',
        'VIAB', 'V', 'VNO', 'VMC', 'WMT', 'WBA', 'DIS', 'WM', 'WAT', 'WEC',
        'WFC', 'HCN', 'WDC', 'WU', 'WRK', 'WY', 'WHR', 'WFM', 'WMB', 'WLTW',
        'WYN', 'WYNN', 'XEL', 'XRX', 'XLNX', 'XL', 'XYL', 'YHOO', 'YUM',
        'ZBH', 'ZION', 'ZTS'], dtype=object),
                    MMM        ABT       ABBV         ACN       ATVI  \
 Date                                                                  
 2000-01-03   31.333866   9.517434   0.000000    0.000000   1.259640   
 2000-01-04   30.088811   9.245507   0.000000    0.000000   1.221324   
 2000-01-05   30.960349   9.228512   0.000000    0.000000   1.226114   
 2000-01-06   33.450458   9.551425   0.000000    0.000000   1.202166   
 2000-01-07   34.114487   9.653397   0.000000    0.000000   1.235693   
 2000-01-10   33.948480   9.585416   0.000000    0.000000   1.297956   
 2000-01-11   33.367454   9.449452   0.000000    0.000000   1.355430   
 2000-01-12   33.450458   9.308037   0.000000    0.000000   1.297956   
 2000-01-13   33.450458   9.222642   0.000000    0.000000   1.312325   
 2000-01-14   32.973187   9.427590   0.000000    0.000000   1.297956   
 2000-01-18   32.246906   9.222642   0.000000    0.000000   1.302746   
 2000-01-19   32.807180   9.034774   0.000000    0.000000   1.283588   
 2000-01-20   31.582877   8.761510   0.000000    0.000000   1.211745   
 2000-01-21   31.520624   8.522405   0.000000    0.000000   1.159060   
 2000-01-24   30.794342   8.197904   0.000000    0.000000   1.149481   
 2000-01-25   30.337822   8.095431   0.000000    0.000000   1.111165   
 2000-01-26   31.977144   8.112509   0.000000    0.000000   1.216535   
 2000-01-27   31.956393   8.419931   0.000000    0.000000   1.317114   
 2000-01-28   30.628335   8.710273   0.000000    0.000000   1.254851   
 2000-01-31   31.084855   8.898142   0.000000    0.000000   1.197377   
 2000-02-01   31.250862   8.983536   0.000000    0.000000   1.293167   
 2000-02-02   30.711339   9.034774   0.000000    0.000000   1.331483   
 2000-02-03   30.441577   9.086010   0.000000    0.000000   1.345851   
 2000-02-04   29.570039   9.410511   0.000000    0.000000   1.206956   
 2000-02-07   29.092768   9.239721   0.000000    0.000000   1.149481   
 2000-02-08   29.113519   9.325116   0.000000    0.000000   1.130323   
 2000-02-09   29.092768   9.854564   0.000000    0.000000   1.113556   
 2000-02-10   28.553245   9.598379   0.000000    0.000000   1.106376   
 2000-02-11   27.536450   9.290958   0.000000    0.000000   1.139902   
 2000-02-14   27.515699   9.581300   0.000000    0.000000   1.101586   
 ...                ...        ...        ...         ...        ...   
 2016-11-17  172.770004  40.130777  61.083674  118.370003  39.939999   
 2016-11-18  172.960007  39.584307  59.915461  118.430000  38.389999   
 2016-11-21  171.520004  39.504819  59.816458  119.680000  38.430000   
 2016-11-22  171.910004  37.855473  58.341344  119.480003  37.169998   
 2016-11-23  172.250000  38.729826  59.578857  119.820000  37.259998   
 2016-11-25  173.490005  38.908671  59.905559  120.739998  37.220001   
 2016-11-28  172.500000  38.491369  58.866048  120.760002  37.369999   
 2016-11-29  172.770004  38.481431  60.974773  120.940002  37.950001   
 2016-11-30  171.740005  37.825667  60.192663  119.430000  36.610001   
 2016-12-01  172.630005  37.358682  58.569046  117.660004  35.580002   
 2016-12-02  172.429993  37.656760  58.836349  117.290001  35.709999   
 2016-12-05  171.619995  38.183357  60.252065  117.720001  36.480000   
 2016-12-06  171.800003  38.173419  61.004472  118.410004  36.700001   
 2016-12-07  176.050003  38.233035  59.390757  121.570000  37.450001   
 2016-12-08  175.880005  38.431752  60.291666  121.739998  37.389999   
 2016-12-09  178.490005  38.948416  60.925273  123.309998  36.790001   
 2016-12-12  179.639999  38.948416  61.420278  123.610001  37.169998   
 2016-12-13  178.830002  39.186873  61.370778  125.400002  37.320000   
 2016-12-14  176.600006  38.570853  61.053971  123.650002  36.740002   
 2016-12-15  176.020004  38.680148  61.400478  124.550003  36.990002   
 2016-12-16  177.449997  37.855473  61.598480  123.000000  36.470001   
 2016-12-19  178.160004  38.093934  61.598480  123.650002  36.630001   
 2016-12-20  178.649994  37.944896  61.539078  124.099998  36.830002   
 2016-12-21  178.429993  37.994578  60.836172  117.900002  36.900002   
 2016-12-22  179.199997  38.044256  61.044073  117.790001  36.439999   
 2016-12-23  178.750000  38.173419  61.717281  117.480003  36.500000   
 2016-12-27  178.919998  38.352264  61.875682  117.550003  36.529999   
 2016-12-28  178.080002  37.974706  61.647980  116.610001  36.430000   
 2016-12-29  178.410004  38.064128  62.103384  117.010002  36.480000   
 2016-12-30  178.570007  38.163485  61.994483  117.129997  36.110001   
 
                    AYI        ADBE         AAP        AES         AET  \
 Date                                                                    
 2000-01-03    0.000000   16.274673    0.000000  32.346420    6.290820   
 2000-01-04    0.000000   14.909400    0.000000  31.063717    6.184675   
 2000-01-05    0.000000   15.204175    0.000000  31.398335    6.142218   
 2000-01-06    0.000000   15.328291    0.000000  31.649299    6.149294   
 2000-01-07    0.000000   16.072985    0.000000  32.262765    6.552642   
 2000-01-10    0.000000   16.693563    0.000000  33.712777    6.432345   
 2000-01-11    0.000000   15.545493    0.000000  33.880086    6.439422   
 2000-01-12    0.000000   15.467921    0.000000  34.577207    6.418193   
 2000-01-13    0.000000   16.290187    0.000000  35.162789    6.488956   
 2000-01-14    0.000000   16.460846    0.000000  37.142613    6.368659   
 2000-01-18    0.000000   15.886811    0.000000  34.772401    6.368659   
 2000-01-19    0.000000   15.669609    0.000000  34.967595    6.340354   
 2000-01-20    0.000000   15.840268    0.000000  34.660862    6.297896   
 2000-01-21    0.000000   15.607551    0.000000  35.692601    6.304972   
 2000-01-24    0.000000   15.033516    0.000000  35.692601    6.269591   
 2000-01-25    0.000000   15.638580    0.000000  33.907971    6.262515   
 2000-01-26    0.000000   15.188660    0.000000  32.318535    6.278139   
 2000-01-27    0.000000   15.561008    0.000000  33.852201    6.086386   
 2000-01-28    0.000000   14.288821    0.000000  34.075280    6.079284   
 2000-01-31    0.000000   13.668243    0.000000  35.748371    6.050876   
 2000-02-01    0.000000   13.869931    0.000000  36.027219    6.434383   
 2000-02-02    0.000000   15.219689    0.000000  35.385868    6.292343   
 2000-02-03    0.000000   17.143483    0.000000  35.859910    6.334955   
 2000-02-04    0.000000   18.369126    0.000000  33.573353    6.363363   
 2000-02-07    0.000000   19.129334    0.000000  35.692601    6.334955   
 2000-02-08    0.000000   20.789382    0.000000  37.672425    5.227048   
 2000-02-09    0.000000   21.735764    0.000000  38.313777    4.829338   
 2000-02-10    0.000000   22.651118    0.000000  36.975304    4.573667   
 2000-02-11    0.000000   22.449430    0.000000  36.584916    4.545259   
 2000-02-14    0.000000   22.247742    0.000000  38.146468    4.431628   
 ...                ...         ...         ...        ...         ...   
 2016-11-17  251.571356  105.809998  166.263183  11.309685  125.943869   
 2016-11-18  248.803112  105.019997  162.514465  11.280000  124.626549   
 2016-11-21  251.391476  105.650002  166.842972  11.428421  126.602533   
 2016-11-22  253.470167  105.209999  170.231813  11.289895  128.159365   
 2016-11-23  254.619436  104.209999  170.981557  11.220632  129.067522   
 2016-11-25  257.367703  105.019997  170.801626  11.507579  129.307040   
 2016-11-28  250.621957  104.500000  170.711660  11.814316  128.129427   
 2016-11-29  252.001092  104.949997  169.741990  11.754948  131.762039   
 2016-11-30  251.251565  102.809998  169.662016  11.329474  130.574452   
 2016-12-01  251.301537   99.510002  170.701654  10.913895  134.626209   
 2016-12-02  249.832462   99.730003  170.081870  11.062316  133.219082   
 2016-12-05  252.160995  101.949997  170.471737  11.220632  129.187273   
 2016-12-06  251.691290  101.550003  173.250786  11.210737  128.279116   
 2016-12-07  254.099760  103.360001  176.719601  11.497684  128.388894   
 2016-12-08  246.614490  103.500000  176.459696  11.715369  129.037584   
 2016-12-09  248.673189  104.309998  175.520014  11.794527  128.648376   
 2016-12-12  248.163516  105.160004  172.940894  11.883579  128.009675   
 2016-12-13  249.182862  106.150002  171.811275  12.061684  127.740219   
 2016-12-14  243.956163  105.809998  171.451397  11.814316  126.402942   
 2016-12-15  243.676340  105.099998  172.860919  11.675790  125.165453   
 2016-12-16  248.003613  103.550003  171.851270  11.626316  126.552632   
 2016-12-19  245.415249  105.290001  173.760621  11.715369  123.848133   
 2016-12-20  231.573971  105.769997  175.570000  11.814316  124.377056   
 2016-12-21  230.194836  105.510002  173.509995  11.853894  123.099655   
 2016-12-22  232.153608  104.720001  170.389999  11.656000  124.436939   
 2016-12-23  234.771952  105.019997  170.889999  11.606526  125.694377   
 2016-12-27  235.441528  104.980003  171.839996  11.735158  125.973815   
 2016-12-28  230.654553  103.769997  170.419998  11.517474  124.327162   
 2016-12-29  232.643289  103.680000  170.279999  11.576842  124.197421   
 2016-12-30  230.714513  102.949997  169.119995  11.497684  123.758319   
 
               ...            ZBH       ZION        ZTS    XOM_1d    XOM_2d  \
 Date          ...                                                            
 2000-01-03    ...       0.000000  43.725422   0.000000 -0.019154  0.034318   
 2000-01-04    ...       0.000000  41.608087   0.000000  0.054516  0.109032   
 2000-01-05    ...       0.000000  41.558847   0.000000  0.051698  0.048611   
 2000-01-06    ...       0.000000  42.149731   0.000000 -0.002935 -0.016875   
 2000-01-07    ...       0.000000  42.248211   0.000000 -0.013981 -0.011038   
 2000-01-10    ...       0.000000  41.484986   0.000000  0.002985 -0.002985   
 2000-01-11    ...       0.000000  39.293791   0.000000 -0.005952  0.017113   
 2000-01-12    ...       0.000000  39.638473   0.000000  0.023204  0.002994   
 2000-01-13    ...       0.000000  43.085297   0.000000 -0.019751 -0.008047   
 2000-01-14    ...       0.000000  43.823902   0.000000  0.011940  0.021642   
 2000-01-18    ...       0.000000  41.608087   0.000000  0.009587 -0.012537   
 2000-01-19    ...       0.000000  42.051250   0.000000 -0.021914 -0.006574   
 2000-01-20    ...       0.000000  41.312645   0.000000  0.015683  0.006721   
 2000-01-21    ...       0.000000  41.657327   0.000000 -0.008824 -0.011029   
 2000-01-24    ...       0.000000  40.967963   0.000000 -0.002226 -0.008902   
 2000-01-25    ...       0.000000  42.297452   0.000000 -0.006691 -0.036431   
 2000-01-26    ...       0.000000  42.198971   0.000000 -0.029940 -0.055389   
 2000-01-27    ...       0.000000  45.745752   0.000000 -0.026235  0.023148   
 2000-01-28    ...       0.000000  44.458533   0.000000  0.050713  0.054675   
 2000-01-31    ...       0.000000  46.834936   0.000000  0.003771  0.011312   
 2000-02-01    ...       0.000000  47.330020   0.000000  0.007513 -0.009767   
 2000-02-02    ...       0.000000  46.438869   0.000000 -0.017151 -0.035048   
 2000-02-03    ...       0.000000  45.894277   0.000000 -0.018209 -0.019727   
 2000-02-04    ...       0.000000  43.814924   0.000000 -0.001546 -0.023957   
 2000-02-07    ...       0.000000  43.765416   0.000000 -0.022446 -0.050435   
 2000-02-08    ...       0.000000  45.993293   0.000000 -0.028632 -0.033409   
 2000-02-09    ...       0.000000  43.666399   0.000000 -0.004918 -0.031148   
 2000-02-10    ...       0.000000  43.517874   0.000000 -0.026359  0.007413   
 2000-02-11    ...       0.000000  43.319840   0.000000  0.034687  0.073604   
 2000-02-14    ...       0.000000  42.874265   0.000000  0.037612  0.046607   
 ...           ...            ...        ...        ...       ...       ...   
 2016-11-17    ...     100.565451  37.910000  49.112912  0.000587  0.014783   
 2016-11-18    ...     100.405820  38.209999  49.791571  0.014189  0.016417   
 2016-11-21    ...     101.852443  38.540001  49.901354  0.002197  0.004972   
 2016-11-22    ...     100.575422  38.660000  49.532085  0.002769  0.005076   
 2016-11-23    ...     100.764983  39.119999  50.080999  0.002301 -0.005177   
 2016-11-25    ...     102.121819  39.099998  50.080999 -0.007461 -0.014004   
 2016-11-28    ...     101.742706  38.410000  50.051060 -0.006592  0.009599   
 2016-11-29    ...     103.159399  38.500000  50.350467  0.016298  0.015599   
 2016-11-30    ...     101.622982  39.790001  50.280606 -0.000687 -0.002978   
 2016-12-01    ...      99.557799  40.410000  49.063011 -0.002292  0.002751   
 2016-12-02    ...      99.966841  40.240002  49.003128  0.005055  0.005974   
 2016-12-05    ...     101.193982  40.549999  49.721709  0.000914  0.006744   
 2016-12-06    ...     104.366583  41.919998  50.120921  0.005825  0.008680   
 2016-12-07    ...     104.177023  42.290001  51.158867  0.002839  0.010560   
 2016-12-08    ...     104.206952  43.090000  51.578040  0.007699  0.030118   
 2016-12-09    ...     104.975164  43.230000  51.787625  0.022247  0.040225   
 2016-12-12    ...     105.563788  42.470001  50.819537  0.017586 -0.004397   
 2016-12-13    ...     106.481653  42.230000  50.949282 -0.021603 -0.018255   
 2016-12-14    ...     104.625980  42.200001  50.739697  0.003422  0.006624   
 2016-12-15    ...     102.411145  43.040001  52.216777  0.003191 -0.005171   
 2016-12-16    ...     103.259165  42.029999  52.356500 -0.008335 -0.008225   
 2016-12-19    ...     102.620655  42.700001  52.286638  0.000111 -0.001548   
 2016-12-20    ...     101.613003  43.910000  52.995237 -0.001659  0.004866   
 2016-12-21    ...     101.423450  43.639999  53.015198  0.006535  0.004763   
 2016-12-22    ...     101.333656  43.740002  53.154921 -0.001761 -0.001321   
 2016-12-23    ...     102.081911  43.860001  53.673895  0.000441 -0.004520   
 2016-12-27    ...     102.900002  43.930000  53.614016 -0.004959 -0.004408   
 2016-12-28    ...     103.059998  43.279999  53.334566  0.000554 -0.000443   
 2016-12-29    ...     103.500000  42.790001  53.514211 -0.000996  0.000000   
 2016-12-30    ...     103.199997  43.040001  53.424389  0.000000  0.000000   
 
               XOM_3d    XOM_4d    XOM_5d    XOM_6d    XOM_7d  
 Date                                                          
 2000-01-03  0.087789  0.084597  0.069433  0.072626  0.066241  
 2000-01-04  0.105777  0.090317  0.093572  0.087063  0.112286  
 2000-01-05  0.033951  0.037037  0.030864  0.054784  0.033951  
 2000-01-06 -0.013940 -0.019809  0.002935 -0.016875 -0.005136  
 2000-01-07 -0.016924  0.005887 -0.013981 -0.002208  0.007358  
 2000-01-10  0.020149  0.000000  0.011940  0.021642 -0.000746  
 2000-01-11 -0.002976  0.008929  0.018601 -0.003720  0.011905  
 2000-01-12  0.014970  0.024701  0.002246  0.017964  0.008982  
 2000-01-13  0.001463 -0.020483 -0.005121 -0.013899 -0.016094  
 2000-01-14 -0.000746  0.014925  0.005970  0.003731 -0.002985  
 2000-01-18  0.002950 -0.005900 -0.008112 -0.014749 -0.044248  
 2000-01-19 -0.015340 -0.017531 -0.024105 -0.053324 -0.078159  
 2000-01-20  0.004481 -0.002240 -0.032114 -0.057506 -0.009709  
 2000-01-21 -0.017647 -0.047059 -0.072059 -0.025000 -0.021324  
 2000-01-24 -0.038576 -0.063798 -0.016320 -0.012611 -0.005193  
 2000-01-25 -0.061710 -0.014126 -0.010409 -0.002974 -0.020074  
 2000-01-26 -0.007485 -0.003743  0.003743 -0.013473 -0.031437  
 2000-01-27  0.027006  0.034722  0.016975 -0.001543 -0.003086  
 2000-01-28  0.062599  0.044374  0.025357  0.023772  0.000792  
 2000-01-31 -0.006033 -0.024133 -0.025641 -0.047511 -0.074782  
 2000-02-01 -0.027799 -0.029301 -0.051089 -0.078258 -0.082791  
 2000-02-02 -0.036540 -0.058166 -0.085132 -0.089631 -0.113628  
 2000-02-03 -0.041730 -0.069167 -0.073744 -0.098160 -0.066878  
 2000-02-04 -0.051902 -0.056565 -0.081433 -0.049571 -0.013823  
 2000-02-07 -0.055105 -0.080011 -0.048100 -0.012296 -0.003735  
 2000-02-08 -0.058887 -0.026243  0.010382  0.019141 -0.007930  
 2000-02-09  0.002459  0.040164  0.049180  0.021311 -0.016393  
 2000-02-10  0.045305  0.054366  0.026359 -0.011532 -0.005766  
 2000-02-11  0.082910  0.054146  0.015228  0.021151  0.010152  
 2000-02-14  0.018806 -0.018806 -0.013083 -0.023712 -0.047424  
 ...              ...       ...       ...       ...       ...  
 2016-11-17  0.017013  0.019829  0.022175  0.014549  0.007861  
 2016-11-18  0.019231  0.021576  0.013954  0.007270  0.023687  
 2016-11-21  0.007284 -0.000231 -0.006822  0.009365  0.008672  
 2016-11-22 -0.002423 -0.008999  0.007153  0.006461  0.004153  
 2016-11-23 -0.011735  0.004372  0.003682  0.001381  0.006443  
 2016-11-25  0.002066  0.001377 -0.000918  0.004132  0.005050  
 2016-11-28  0.008905  0.006592  0.011680  0.012605  0.018504  
 2016-11-29  0.013271  0.018394  0.019325  0.025262  0.028172  
 2016-11-30  0.002062  0.002978  0.008820  0.011684  0.019473  
 2016-12-01  0.003668  0.009514  0.012380  0.020174  0.042870  
 2016-12-02  0.011834  0.014706  0.022518  0.045267  0.063649  
 2016-12-05  0.009602  0.017375  0.040009  0.058299  0.035437  
 2016-12-06  0.016446  0.039059  0.057332  0.034491  0.038031  
 2016-12-07  0.033042  0.051209  0.028500  0.032020  0.035313  
 2016-12-08  0.048234  0.025589  0.029099  0.032382  0.023777  
 2016-12-09  0.017753  0.021236  0.024494  0.015955  0.016067  
 2016-12-12 -0.000989  0.002198 -0.006155 -0.006045 -0.007694  
 2016-12-13 -0.015122 -0.023331 -0.023223 -0.024843 -0.018471  
 2016-12-14 -0.001766 -0.001656 -0.003312  0.003202  0.001435  
 2016-12-15 -0.005061 -0.006711 -0.000220 -0.001980 -0.001540  
 2016-12-16 -0.009871 -0.003400 -0.005155 -0.004716 -0.009651  
 2016-12-19  0.004977  0.003207  0.003650 -0.001327 -0.000774  
 2016-12-20  0.003096  0.003539 -0.001438 -0.000885 -0.001880  
 2016-12-21  0.005206  0.000222  0.000775 -0.000222  0.000000  
 2016-12-22 -0.006273 -0.005723 -0.006713  0.000000  0.000000  
 2016-12-23 -0.003969 -0.004961  0.000000  0.000000  0.000000  
 2016-12-27 -0.005399  0.000000  0.000000  0.000000  0.000000  
 2016-12-28  0.000000  0.000000  0.000000  0.000000  0.000000  
 2016-12-29  0.000000  0.000000  0.000000  0.000000  0.000000  
 2016-12-30  0.000000  0.000000  0.000000  0.000000  0.000000  
 
 [4277 rows x 512 columns])

Part 10

Next we start creating the labels for future supervised learning by creating a helper function.


In [23]:
def buy_sell_hold(*args):
    cols = [c for c in args]
    requirement = 0.028 # This is the threshold for buying/selling
    for col in cols:
        if col > requirement:
            return 1
        elif col < -requirement:
            return -1
        else:
            return 0

Part 11

Now we use our helper function to mapaour data to buy/sell/hold accordingly.


In [20]:
from collections import Counter
def extract_featuresets(ticker, df):
    tickers, df = process_data_for_labels(ticker, df)
    
    hm_days = 7 # How many days in the future are we looking
#     for i in range(1, hm_days+1):
#         df['{}_{}d'.format(ticker, i)] =\
#         (df[ticker].shift(-i) - df[ticker]) / df[ticker]
    df['{}_target'.format(ticker)] = list(
        map(buy_sell_hold, *[df['{}_{}d'.format(ticker, i)] for i in range(1, hm_days+1)]))
    
    vals = df['{}_target'.format(ticker)].values.tolist()
    str_vals = [str(i) for i in vals]
    print('Data spread: {}'.format(Counter(str_vals)))
    
    df.fillna(0, inplace=True)
    df.replace([np.inf, -np.inf], np.nan)
    df.dropna(inplace=True)
    
    df_vals = df[[ticker for ticker in tickers]].pct_change()
    df_vals = df_vals.replace([np.inf, -np.inf], 0)
    df_vals.fillna(0, inplace=True)
    
    X, y = df_vals.values, df['{}_target'.format(ticker)].values
    
    return X, y, df

# extract_featuresets('XOM', df) # Just to check if it works

Part 12

Here we use our created features to train a classifier with scikit-learn.


In [21]:
from sklearn import svm, neighbors, cross_validation
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import VotingClassifier, RandomForestClassifier

In [24]:
def do_ml(ticker, df):
    X, y, df = extract_featuresets(ticker, df)
    
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.25)
    
#     clf = neighbors.KNeighborsClassifier()
#     print(X_train.shape, y_train.shape); exit()
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                           ('rfor', RandomForestClassifier())], n_jobs=-1)
    
    clf.fit(X_train, y_train)
    
    confidence = clf.score(X_test, y_test)
    print("Accuracy:", confidence)
    predictions = clf.predict(X_test)
    print("Predicted spread: {}".format(Counter(predictions)))
    
    return confidence

do_ml('BAC', df)


Data spread: Counter({'0': 3578, '1': 365, '-1': 334})
Accuracy: 0.831775700935
Predicted spread: Counter({0: 1063, -1: 5, 1: 2})
Out[24]:
0.83177570093457942

In [ ]: