In [1]:
import graphlab as gl

In [2]:
filename = "./price_EUR_USD_*H1.csv"
sf_eur_usd_h1 = gl.SFrame.read_csv(filename, verbose =False)


[INFO] This non-commercial license of GraphLab Create is assigned to yuecong1104@gmail.comand will expire on August 13, 2016. For commercial licensing options, visit https://dato.com/buy/.

[INFO] Start server at: ipc:///tmp/graphlab_server-14259 - Server binary: /home/cyue/anaconda/lib/python2.7/site-packages/graphlab/unity_server - Server log: /tmp/graphlab_server_1440461372.log
[INFO] GraphLab Server Version: 1.5.2
------------------------------------------------------
Inferred types from first line of file as 
column_type_hints=[int,int,int,int,int,int,float,float,float,float,float,float,float,float,int,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2013-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Read 3135 lines. Lines per second: 139831
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2003-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2005-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2002-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2009-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2014-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2010-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2006-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2012-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2007-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2013-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2003-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2015-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2005-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2008-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2015-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2008-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2004-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2006-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2011-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2004-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2002-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2009-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2012-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2011-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2007-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2014-07-01T00%3A00%3A00Z_H1.csv
PROGRESS: Finished parsing file /home/cyue/autotrade/price_EUR_USD_2010-01-01T00%3A00%3A00Z_H1.csv
PROGRESS: Parsing completed. Parsed 69296 lines in 0.359264 secs.

In [3]:
import logging
gl_logger = logging.getLogger('graphlab')
gl_logger.setLevel(logging.WARNING)

In [4]:
sf_eur_usd_h1 = sf_eur_usd_h1.add_column( sf_eur_usd_h1['year'].astype(str) + '-' + 
                                          sf_eur_usd_h1['month'].astype(str) + '-' + 
                                         sf_eur_usd_h1['day'].astype(str) , 'date' )
sf_eur_usd_h1 = sf_eur_usd_h1.add_column( sf_eur_usd_h1['year'].astype(str)+ '-' + 
                                         sf_eur_usd_h1['month'].astype(str) + '-' + 
                                         sf_eur_usd_h1['day'].astype(str) +':' +
                                         sf_eur_usd_h1['hour'].astype(str), 'date-hour' )
sf_eur_usd_h1 = sf_eur_usd_h1.add_column((sf_eur_usd_h1['openBid'] + sf_eur_usd_h1['openAsk']) /2 , 'open')
sf_eur_usd_h1 = sf_eur_usd_h1.add_column((sf_eur_usd_h1['highBid'] + sf_eur_usd_h1['highAsk']) /2 , 'high')
sf_eur_usd_h1 = sf_eur_usd_h1.add_column((sf_eur_usd_h1['lowBid'] + sf_eur_usd_h1['lowAsk']) /2 , 'low')
sf_eur_usd_h1 = sf_eur_usd_h1.add_column((sf_eur_usd_h1['closeBid'] + sf_eur_usd_h1['closeAsk']) /2 , 'close')
sf_eur_usd_h1 = sf_eur_usd_h1.remove_columns(['openBid','openAsk','highBid','highAsk','lowBid','lowAsk','closeBid','closeAsk','complete'])

In [5]:
sf_eur_usd_h1 = sf_eur_usd_h1.sort(['year','month','day','hour','minute','second'])

In [6]:
sf_eur_usd_h1[sf_eur_usd_h1['volume'] >1000].shape


Out[6]:
(40595, 13)

In [7]:
#sf_eur_usd_h1 = sf_eur_usd_h1[sf_eur_usd_h1['volume'] >1000]
sf_eur_usd_h1_tmp = sf_eur_usd_h1

In [8]:
def reconstruc_volume():
    normal_volume_mean = sf_eur_usd_h1_tmp[sf_eur_usd_h1_tmp['volume'] >5]['volume'].mean()
    volume_list = sf_eur_usd_h1_tmp['volume']
    new_volume_list = volume_list.apply(lambda x: normal_volume_mean if x<=5 else x)
    sf_eur_usd_h1_tmp['volume'] = new_volume_list.astype(int)
reconstruc_volume()
sf_eur_usd_h1= sf_eur_usd_h1_tmp

In [9]:
sf_eur_usd_h1_tmp = sf_eur_usd_h1_tmp.add_column(sf_eur_usd_h1_tmp['open'] *sf_eur_usd_h1_tmp['volume'] ,'open_volume')
sf_eur_usd_h1_tmp = sf_eur_usd_h1_tmp.add_column(sf_eur_usd_h1_tmp['high'] *sf_eur_usd_h1_tmp['volume'] ,'high_volume')
sf_eur_usd_h1_tmp = sf_eur_usd_h1_tmp.add_column(sf_eur_usd_h1_tmp['low'] *sf_eur_usd_h1_tmp['volume'] ,'low_volume')
sf_eur_usd_h1_tmp = sf_eur_usd_h1_tmp.add_column(sf_eur_usd_h1_tmp['close'] *sf_eur_usd_h1_tmp['volume'] ,'close_volume')

In [10]:
sf_eur_usd_d1_tmp = sf_eur_usd_h1_tmp.groupby(['day','month','year','date'],
                                              [gl.aggregate.SUM('open_volume'),
                                               gl.aggregate.SUM('high_volume'),
                                               gl.aggregate.SUM('low_volume'),
                                               gl.aggregate.SUM('close_volume'),
                                               gl.aggregate.SUM('volume'),
                                              ]
                                             )

In [11]:
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.sort(['year','month','day'])

In [12]:
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.add_column(sf_eur_usd_d1_tmp['Sum of open_volume'] /sf_eur_usd_d1_tmp['Sum of volume'] ,'open')
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.add_column(sf_eur_usd_d1_tmp['Sum of high_volume'] /sf_eur_usd_d1_tmp['Sum of volume'] ,'high')
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.add_column(sf_eur_usd_d1_tmp['Sum of low_volume'] /sf_eur_usd_d1_tmp['Sum of volume'] ,'low')
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.add_column(sf_eur_usd_d1_tmp['Sum of close_volume'] /sf_eur_usd_d1_tmp['Sum of volume'] ,'close')
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.rename({'Sum of volume': 'volume'})
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.remove_columns(['Sum of open_volume','Sum of high_volume','Sum of low_volume','Sum of close_volume'])

In [13]:
sf_eur_usd_d1 = sf_eur_usd_d1_tmp

# Use D1 to calculate features, h1 also could be used to calculate features with same logic
sf_eur_usd_features = sf_eur_usd_d1 
#sf_eur_usd_features = sf_eur_usd_h1

In [14]:
pip_unit = 10000
close_list = sf_eur_usd_features ['close']
def close_minus_n_time_unit(time_unit):
    close_list = sf_eur_usd_features ['close']
    if time_unit >=1:
        close_minus_n_time_unit_list = close_list[0: time_unit].append(close_list[:-1*time_unit])
    else:
        print("close_minus_n_time_unit() parameter error!")
    return close_minus_n_time_unit_list

close_minus_1 = close_minus_n_time_unit(1)
close_minus_2 = close_minus_n_time_unit(2)
close_minus_3 = close_minus_n_time_unit(3)

In [15]:
sf_eur_usd_features = sf_eur_usd_features.add_column((close_list - close_minus_1) *pip_unit ,'pips')

In [16]:
predict_pips_list = sf_eur_usd_features['pips'][1:].append(gl.SArray([0.0]))
sf_eur_usd_features = sf_eur_usd_features.add_column(predict_pips_list,'predict pips')

In [17]:
sf_eur_usd_features = sf_eur_usd_features.add_column(predict_pips_list.apply(lambda x: 'sell' if x<0 else 'buy'),'predict action')

In [18]:
def calculate_momentum(time_unit):
    close_list = sf_eur_usd_features ['close']
    close_minus_n = close_minus_n_time_unit(time_unit)
    return close_list - close_minus_n

def calculate_roc(time_unit):
    close_list = sf_eur_usd_features ['close']
    close_minus_n = close_minus_n_time_unit(time_unit)
    return (close_list - close_minus_n)/close_minus_n

In [19]:
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_momentum(3),'momentum_3')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_momentum(4),'momentum_4')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_momentum(5),'momentum_5')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_momentum(8),'momentum_8')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_momentum(9),'momentum_9')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_momentum(10),'momentum_10')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(3),'roc_3')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(4),'roc_4')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(5),'roc_5')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(8),'roc_8')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(9),'roc_9')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(10),'roc_10')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(12),'roc_12')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(13),'roc_13')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(14),'roc_14')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(15),'roc_15')

In [20]:
def calculate_fast_k_d(time_unit):
    #Fast K 100 * [( C - L (n) ) / ( H (n) – L (n) )] . Use the data in same day as initial value
    #L(n) means the lowest Low during the n-day period
    #H(n) means the highest high during the n-day period
    #http://investexcel.net/how-to-calculate-the-stochastic-oscillator/
    high_python_list = list(sf_eur_usd_features ['high'])
    low_python_list = list(sf_eur_usd_features ['low'])
    high_high_list =[]
    low_low_list=[]
    for i in range(close_list.size()):
        start = max(0,i-time_unit)
        high_high = max(high_python_list[start:i+1])
        high_high_list.append(high_high)
        low_low = min(low_python_list[start:i+1])
        low_low_list.append(low_low)
    high_high_list_gl = gl.SArray(high_high_list)
    low_low_list_gl = gl.SArray(low_low_list)
    fast_k = 100 *(close_list - low_low_list_gl) / (high_high_list_gl - low_low_list_gl)
    #remove n/a to zero
    fast_k = fast_k.fillna(0)
    fast_k_1 = fast_k[0:1].append(fast_k[:-1])
    fast_k_2 = fast_k[0:2].append(fast_k[:-2])
    fast_d = (fast_k + fast_k_1 + fast_k_2 ) /3
    return fast_k, fast_d

In [21]:
fast_k_d_3 = calculate_fast_k_d(3)
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_3[0],'fast_k_3')
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_3[1],'fast_d_3')
fast_k_d_4 = calculate_fast_k_d(4)
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_4[0],'fast_k_4')
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_4[1],'fast_d_4')
fast_k_d_5 = calculate_fast_k_d(5)
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_5[0],'fast_k_5')
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_5[1],'fast_d_5')
fast_k_d_8 = calculate_fast_k_d(8)
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_8[0],'fast_k_8')
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_8[1],'fast_d_8')
fast_k_d_9 = calculate_fast_k_d(9)
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_9[0],'fast_k_9')
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_9[1],'fast_d_9')
fast_k_d_10 = calculate_fast_k_d(10)
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_10[0],'fast_k_10')
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_10[1],'fast_d_10')

In [22]:
#calculate average price list

def calculate_avg_price(price_list,time_unit):
    price_list_python = list(price_list)
    avg_price_list_python=[]
    for i in range(price_list.size()):
        start = max(0, i-time_unit)
        avg_price = sum(price_list_python[start:i+1])/(i-start +1.0)
        avg_price_list_python.append(avg_price)
    return gl.SArray(avg_price_list_python)

In [23]:
#Weighted Closing Price
def calculate_weighted_close_price(time_unit):
    close_list = sf_eur_usd_features ['close']
    avg_close_list = calculate_avg_price(close_list,15)
    high_list = sf_eur_usd_features ['high']
    avg_high_list = calculate_avg_price(high_list,15)
    low_list = sf_eur_usd_features ['low']
    avg_low_list = calculate_avg_price(low_list,15)
    weighted_close_price_list = (avg_close_list * 2 + avg_high_list + avg_low_list) / 4.0
    return weighted_close_price_list
    
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_weighted_close_price(15),'weighted close price_15')

In [24]:
def calculate_william_r(time_unit):
    #http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:williams_r
    #%R = (Highest High - Close)/(Highest High - Lowest Low) * -100
    # Lowest Low = lowest low for the look-back period
    # Highest High = highest high for the look-back period
    # %R is multiplied by -100 correct the inversion and move the decimal.
    high_python_list = list(sf_eur_usd_features ['high'])
    low_python_list = list(sf_eur_usd_features ['low'])
    high_high_list =[]
    low_low_list=[]
    for i in range(close_list.size()):
        start = max(0,i-time_unit)
        high_high = max(high_python_list[start:i+1])
        high_high_list.append(high_high)
        low_low = min(low_python_list[start:i+1])
        low_low_list.append(low_low)
    high_high_list_gl = gl.SArray(high_high_list)
    low_low_list_gl = gl.SArray(low_low_list)
    
    william_r = -100 *(high_high_list_gl - close_list ) / (high_high_list_gl - low_low_list_gl)
    #remove n/a to zero
    william_r = william_r.fillna(0)
    return william_r

sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_william_r(6),'william_r_6')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_william_r(7),'william_r_7')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_william_r(8),'william_r_8')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_william_r(9),'william_r_9')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_william_r(10),'william_r_10')

In [25]:
def calculate_william_a_d():
    high_list = sf_eur_usd_features['high']
    high_list_1 = high_list[0:1].append(high_list[:-1])
    low_list = sf_eur_usd_features['low']
    low_list_1 = low_list[0:1].append(low_list[:-1])
    high_sf = gl.SFrame({'now':high_list,'yesterday':high_list_1})
    low_sf = gl.SFrame({'now':low_list,'yesterday':low_list_1})
    true_high = high_sf.apply(lambda x: max(x['now'],x['yesterday']))
    true_low = low_sf.apply(lambda x: min(x['now'],x['yesterday']))
    close_list = sf_eur_usd_features['close']
    close_list_1 = close_list[0:1].append(close_list[:-1])
    close_sf = gl.SFrame({'now_close':close_list,
                          'yesterday_close':close_list_1, 
                          'true_high':true_high,
                          'true_low':true_low})
    today_a_d = close_sf.apply(
                lambda x: ( (x['now_close'] - x['true_low'])  if (x['now_close'] - x['yesterday_close']) > 0 else 
                            (x['now_close'] - x['true_high']) if (x['now_close'] - x['yesterday_close']) < 0 else
                            0)
                )
    today_a_d_1 = today_a_d[0:1].append(today_a_d[:-1])
    william_a_d = today_a_d + today_a_d_1
    return william_a_d

In [26]:
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_william_a_d(),'william_a_d')

In [27]:
def calculate_a_d_line(time_unit):
    #http://www.metastock.com/Customer/Resources/TAAZ/?c=3&p=27
    high_list = sf_eur_usd_features['high']
    low_list = sf_eur_usd_features['low']
    close_list = sf_eur_usd_features['close']
    volume_list = sf_eur_usd_features['volume']
    clv_list = ((close_list - low_list) - (high_list - close_list)) /(high_list - low_list) * volume_list
    clv_list = clv_list.fillna(0)
    clv_list_python = list(clv_list)
    a_d_line_python = []
    for i in range(close_list.size()):
        start = max(0,i-time_unit)
        a_d_value = sum(clv_list_python[start:i+1])
        a_d_line_python.append(a_d_value)
    a_d_line_gl = gl.SArray(a_d_line_python)
    return a_d_line_gl

In [28]:
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_a_d_line(1),'adsoc_1')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_a_d_line(2),'adsoc_2')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_a_d_line(3),'adsoc_3')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_a_d_line(4),'adsoc_4')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_a_d_line(5),'adsoc_5')

In [29]:
#sf_eur_usd_features.show()

In [30]:
def calculate_ema(time_unit):
    #http://investexcel.net/how-to-calculate-macd-in-excel/
    close_list = sf_eur_usd_features['close']
    close_list_python = list(close_list)
    ema_list=[]
    for i in range(close_list.size()):
        if i< time_unit: 
            ema_value = sum(close_list_python[0:i+1]) / float(i+1)
            ema_list.append(ema_value)
        else:
            ema_value = (close_list_python[i] * (2.0 /(time_unit +1.0)) +
                         ema_list[i-1] *(1.0-(2.0/(time_unit+1.0))) )
            ema_list.append(ema_value)
    return gl.SArray(ema_list)

macd = calculate_ema(12) - calculate_ema(26)
sf_eur_usd_features = sf_eur_usd_features.add_column(macd,'macd')

In [31]:
def calculate_cci_20():
    high_list = sf_eur_usd_features['high']
    low_list = sf_eur_usd_features['low']
    close_list = sf_eur_usd_features['close']
    typical_price_list = (high_list + low_list + close_list) /3.0
    typical_price_list_python  = list(typical_price_list)
    sma_typical_price_list_python = []
    cci_list_python =[]
    for i in range(close_list.size()):
        start = max(0, i-19)
        sma_typical_price = sum(typical_price_list_python[start:i+1])/(i-start +1.0)
        sma_typical_price_list_python.append(sma_typical_price)
        std_typical_price = sum(
            [abs(tmp_typical_price - sma_typical_price ) 
                for tmp_typical_price in sma_typical_price_list_python[start:i+1] 
            ]) /(i-start +1.0)
        if std_typical_price == 0.0:
            cci =0.0
        else:
            cci = ( (typical_price_list_python[i] - sma_typical_price ) /
               (0.015*std_typical_price) )
        cci_list_python.append(cci)
    return gl.SArray(cci_list_python).astype(int)

sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_cci_20(),'cci_20')

In [32]:
def calculate_Bollinger_Bands_20():
    close_list = sf_eur_usd_features['close']
    close_list_python = list(close_list)
    std_close_list_python = []
    sma_list_python = []
    for i in range(close_list.size()):
        start = max(0, i-19)
        sma_close = sum(close_list_python[start:i+1])/ (i-start +1.0)
        std_close = sum(
            [abs(tmp_close_price - sma_close ) 
                for tmp_close_price in close_list_python[start:i+1] 
            ]) /(i-start +1.0)
        std_close_list_python.append(std_close)
        sma_list_python.append(sma_close)
    sma_list = gl.SArray(sma_list_python)
    std_close_list = gl.SArray(std_close_list_python)
    return (sma_list - std_close_list *2), (sma_list + std_close_list *2)

bb_list = calculate_Bollinger_Bands_20()
sf_eur_usd_features = sf_eur_usd_features.add_column(bb_list[0],'Bollinger_Bands_20_down')
sf_eur_usd_features = sf_eur_usd_features.add_column(bb_list[1],'Bollinger_Bands_20_up')

In [33]:
#sf_eur_usd_features[['Bollinger_Bands_20_down','Bollinger_Bands_20_up','close']]

In [34]:
def calculate_heikin_ashi(time_unit):
    #http://www.investopedia.com/articles/technical/04/092204.asp
    high_list = sf_eur_usd_features['high']
    high_list_avg =  calculate_avg_price(high_list,time_unit)
    
    low_list = sf_eur_usd_features['low']
    low_list_avg =  calculate_avg_price(low_list,time_unit)
    
    close_list = sf_eur_usd_features['close']
    close_list_avg =  calculate_avg_price(close_list,time_unit)
    close_list_avg_1 = close_list_avg[0:1].append(close_list_avg[:-1])    
    
    open_list = sf_eur_usd_features['open']
    open_list_avg =  calculate_avg_price(open_list,time_unit)
    open_list_avg_1 = open_list_avg[0:1].append(open_list_avg[:-1])
    
    xclose_list = (open_list_avg + high_list_avg + low_list_avg + close_list_avg)/4.0
    
    xopen_list = (open_list_avg_1 + close_list_avg_1) /2.0
    
    sf_heikin_ashi = gl.SFrame({
                                'high': high_list_avg,
                                'low': low_list_avg,
                                'xopen': xopen_list,
                                'xclose': xclose_list
                                 })
    #(xhigh_list,xlow_list) = sf_heikin_ashi.apply(
    #    lambda x: (max(x['high'],x['xopen'],x['xclose']), min(x['low'],x['xopen'],x['xclose'])))
    xhigh_list = sf_heikin_ashi.apply(
        lambda x: max(x['high'],x['xopen'],x['xclose']) )
    xlow_list = sf_heikin_ashi.apply(
        lambda x: min(x['low'],x['xopen'],x['xclose']) )
    return xopen_list, xhigh_list, xlow_list, xclose_list

heikin_ashi_lists_15 =calculate_heikin_ashi(15)

In [35]:
sf_eur_usd_features = sf_eur_usd_features.add_column(heikin_ashi_lists_15[0],'heikin_ashi_open')
sf_eur_usd_features = sf_eur_usd_features.add_column(heikin_ashi_lists_15[1],'heikin_ashi_high')
sf_eur_usd_features = sf_eur_usd_features.add_column(heikin_ashi_lists_15[2],'heikin_ashi_low')
sf_eur_usd_features = sf_eur_usd_features.add_column(heikin_ashi_lists_15[3],'heikin_ashi_close')

In [36]:
#2day high/low average
high_list = sf_eur_usd_features['high']
high_list_1 = high_list[0:1].append(high_list[:-1])
low_list = sf_eur_usd_features['low']
low_list_1 = low_list[0:1].append(low_list[:-1])
high_low_avg_2day = ( high_list + high_list_1 + low_list + low_list_1 ) /4.0
high_avg_2day = ( high_list + high_list_1) /2.0
low_avg_2day = ( low_list + low_list_1) /2.0
high_low_avg_1day = ( high_list + low_list ) /2.0

sf_eur_usd_features = sf_eur_usd_features.add_column(high_low_avg_2day,'high_low_avg_2day')
sf_eur_usd_features = sf_eur_usd_features.add_column(high_low_avg_1day,'high_low_avg_1day')
sf_eur_usd_features = sf_eur_usd_features.add_column(high_avg_2day,'high_avg_2day')
sf_eur_usd_features = sf_eur_usd_features.add_column(low_avg_2day,'low_avg_2day')

In [37]:
def calculate_slope(time_unit):
    close_list = sf_eur_usd_features['close']
    close_list_n = close_list[0:time_unit].append(close_list[:-1*time_unit])
    slope_list = (close_list - close_list_n) / time_unit
    return slope_list

sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(3),'close_slope_3')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(4),'close_slope_4')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(5),'close_slope_5')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(8),'close_slope_8')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(10),'close_slope_10')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(12),'close_slope_12')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(15),'close_slope_15')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(20),'close_slope_20')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(25),'close_slope_25')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(30),'close_slope_30')

In [38]:
import math

def calculate_garch_1_1():
    #http://investexcel.net/garch-excel/
    close_list = sf_eur_usd_features['close']
    close_list_1 = close_list[0:1].append(close_list[:-1])
    Residual_list =  close_list - close_list_1
    suqred_Residual_list = Residual_list * Residual_list
    lagged_1_suqred_Residual_list = suqred_Residual_list[0:1].append(suqred_Residual_list[:-1])
    unconditional_variance = close_list.var()
    w = 0.00000397106501352437
    alpha = 0.0824292201426092
    beta = 0.874056639703639
    conditional_variance_list =[]
    sqrt_conditional_variance_list =[]
    log_like_conditional_variance_list =[]
    for i in range(close_list.size()):
        if i==0:
            conditional_variance = unconditional_variance
        else:
            conditional_variance = w + alpha*lagged_1_suqred_Residual_list[i] + beta*conditional_variance_list[i-1]
        
        conditional_variance_list.append(conditional_variance)
        
        sqrt_conditional_variance_list.append(
            math.sqrt(conditional_variance)
        )
        
        log_like_conditional_variance_list.append(
            math.log(
                (1.0/math.sqrt(2*3.1415927*conditional_variance))*
                  math.exp(-0.5*suqred_Residual_list[i]/conditional_variance)
            )
        )
    return gl.SArray(log_like_conditional_variance_list), gl.SArray(sqrt_conditional_variance_list)

garch_1_1_list = calculate_garch_1_1()
sf_eur_usd_features = sf_eur_usd_features.add_column(garch_1_1_list[0],'garch_1_1_log_like')
sf_eur_usd_features = sf_eur_usd_features.add_column(garch_1_1_list[1],'garch_1_1_sqrt')

In [39]:
import numpy as np
def calculate_fft(time_unit):
    #Only reserve a0,a1,b1,a2,b2 factors for each sequence of fft
    #The data which is not long enough to generate time_unit length of data will be avoid to calculate
    if time_unit <5:
        print "calculate_fft: time_unit parameter is too small."
        exit(-1)
    close_list = sf_eur_usd_features['close']
    close_list_python = list(sf_eur_usd_features['close'])
    fft_a0 =[]
    fft_a1 =[]
    fft_b1 =[]
    fft_a2 =[]
    fft_b2 =[]
    for i in range(close_list.size()):
        if i-time_unit +1 < 0:
            a0=a1=b1=a2=b2=0.0
        else:
            start = i-time_unit +1
            fft_source = close_list_python[start:i+1]
            fft_trans = np.fft.fft(fft_source)
            a0 = fft_trans[0].real
            a1 = fft_trans[1].real
            a2 = fft_trans[2].real
            b1 = fft_trans[1].imag
            b2 = fft_trans[2].imag
        fft_a0.append(a0)
        fft_a1.append(a1)
        fft_b1.append(b1)
        fft_a2.append(a2)
        fft_b2.append(b2)
    return gl.SArray(fft_a0),gl.SArray(fft_a1),gl.SArray(fft_b1),gl.SArray(fft_a2),gl.SArray(fft_b2)


fft_5= calculate_fft(5)
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_5[0],'fft_5_a0')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_5[1],'fft_5_a1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_5[2],'fft_5_b1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_5[3],'fft_5_a2')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_5[4],'fft_5_b2')

fft_10= calculate_fft(10)
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_10[0],'fft_10_a0')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_10[1],'fft_10_a1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_10[2],'fft_10_b1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_10[3],'fft_10_a2')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_10[4],'fft_10_b2')

fft_20= calculate_fft(20)
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_20[0],'fft_20_a0')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_20[1],'fft_20_a1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_20[2],'fft_20_b1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_20[3],'fft_20_a2')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_20[4],'fft_20_b2')

fft_30= calculate_fft(30)
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_30[0],'fft_30_a0')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_30[1],'fft_30_a1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_30[2],'fft_30_b1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_30[3],'fft_30_a2')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_30[4],'fft_30_b2')

In [40]:
def generate_time_serials_windows(feature_array,columne_base_name,time_unit):
    # generate history time-serials features 
    sf_ts_window_feature = gl.SFrame({columne_base_name:feature_array})
    for i in range(1,time_unit+1):
        column_name = columne_base_name + '_'+str(i)
        column_data = feature_array[0:i].append(feature_array[:-i])
        sf_ts_window_feature.add_column(column_data,name=column_name)
    return sf_ts_window_feature

sf_1 = generate_time_serials_windows(sf_eur_usd_features['momentum_3'],'momentum_3',60)
sf_eur_usd_features.remove_column('momentum_3')                            
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)

sf_1 = generate_time_serials_windows(sf_eur_usd_features['momentum_4'],'momentum_4',60)
sf_eur_usd_features.remove_column('momentum_4')                            
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)

sf_1 = generate_time_serials_windows(sf_eur_usd_features['momentum_5'],'momentum_5',60)
sf_eur_usd_features.remove_column('momentum_5')                            
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)

sf_1 = generate_time_serials_windows(sf_eur_usd_features['momentum_8'],'momentum_8',60)
sf_eur_usd_features.remove_column('momentum_8')                            
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)

sf_1 = generate_time_serials_windows(sf_eur_usd_features['momentum_9'],'momentum_9',60)
sf_eur_usd_features.remove_column('momentum_9')                            
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)

sf_1 = generate_time_serials_windows(sf_eur_usd_features['momentum_10'],'momentum_10',60)
sf_eur_usd_features.remove_column('momentum_10')                            
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)




sf_1 = generate_time_serials_windows(sf_eur_usd_features['close'],'close',60)
sf_eur_usd_features.remove_column('close')                            
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)

In [52]:
sf_1 = generate_time_serials_windows(sf_eur_usd_features['volume'],'volume',60)
sf_eur_usd_features.remove_column('volume')                            
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)

In [55]:
sf_1 = generate_time_serials_windows(sf_eur_usd_features['garch_1_1_log_like'],'garch_1_1_log_like',60)
sf_eur_usd_features.remove_column('garch_1_1_log_like')                            
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)

In [57]:
sf_1 = generate_time_serials_windows(sf_eur_usd_features['garch_1_1_sqrt'],'garch_1_1_sqrt',60)
sf_eur_usd_features.remove_column('garch_1_1_sqrt')                            
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)

sf_1 = generate_time_serials_windows(sf_eur_usd_features['adsoc_1'],'adsoc_1',60)
sf_eur_usd_features.remove_column('adsoc_1')                            
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)

In [41]:
target_column= 'predict action'
features_columns= ['volume','open','high','low','close',
 'momentum_3','momentum_4','momentum_5','momentum_8','momentum_9','momentum_10',
 'roc_3','roc_4','roc_5','roc_8','roc_9','roc_10','roc_12','roc_13','roc_14','roc_15',
 'fast_k_3','fast_d_3','fast_k_4','fast_d_4','fast_k_5','fast_d_5','fast_k_8','fast_d_8','fast_k_9','fast_d_9','fast_k_10','fast_d_10',
 'weighted close price_15','william_r_6','william_r_7','william_r_8','william_r_9','william_r_10',
 'william_a_d','adsoc_1','adsoc_2','adsoc_3','adsoc_4','adsoc_5',
 'macd','cci_20','Bollinger_Bands_20_down','Bollinger_Bands_20_up',
 'heikin_ashi_open','heikin_ashi_high','heikin_ashi_low','heikin_ashi_close',
 'high_low_avg_2day','high_low_avg_1day','high_avg_2day','low_avg_2day',
 'close_slope_3','close_slope_4','close_slope_5','close_slope_8','close_slope_10','close_slope_12','close_slope_15',
 'close_slope_20','close_slope_25','close_slope_30',
 'garch_1_1_log_like','garch_1_1_sqrt',
 'fft_5_a0','fft_5_a1','fft_5_b1','fft_5_a2','fft_5_b2',
 'fft_10_a0','fft_10_a1','fft_10_b1','fft_10_a2','fft_10_b2',
 'fft_20_a0', 'fft_20_a1','fft_20_b1','fft_20_a2','fft_20_b2',
 'fft_30_a0','fft_30_a1','fft_30_b1','fft_30_a2','fft_30_b2']

In [63]:
features_columns= ['volume','open','high','low','close']
features_columns= [ 'momentum_3','momentum_4','momentum_5','momentum_8','momentum_9','momentum_10',
 'roc_3','roc_4','roc_5','roc_8','roc_9','roc_10','roc_12','roc_13','roc_14','roc_15']
def generate_feature_column_names(columne_base_name,time_unit):
    column_names =[columne_base_name]
    for i in range(1,time_unit+1):
        column_name = columne_base_name + '_'+str(i)
        column_names.append(column_name)
    return column_names

features_columns= []
features_columns.extend(generate_feature_column_names('momentum_3',60))
features_columns.extend(generate_feature_column_names('momentum_4',60))
features_columns.extend(generate_feature_column_names('momentum_5',60))
features_columns.extend(generate_feature_column_names('momentum_8',60))
features_columns.extend(generate_feature_column_names('momentum_9',60))
features_columns.extend(generate_feature_column_names('momentum_10',60))
features_columns.extend(generate_feature_column_names('close',60))
#features_columns.extend(generate_feature_column_names('volume',60))
features_columns.extend(generate_feature_column_names('garch_1_1_log_like',60))
features_columns.extend(generate_feature_column_names('garch_1_1_sqrt',60))
#features_columns.extend(generate_feature_column_names('adsoc_1',60))

model =  gl.classifier.create(sf_eur_usd_features, 
                              target=target_column,
                              features=features_columns)
results = model.evaluate(sf_eur_usd_features)


PROGRESS: Creating a validation set from 5 percent of training data. This may take a while.
          You can set ``validation_set=None`` to disable validation tracking.

PROGRESS: The following methods are available for this type of problem.
PROGRESS: LogisticClassifier, SVMClassifier
PROGRESS: The returned model will be chosen according to validation accuracy.
PROGRESS: Logistic regression:
PROGRESS: --------------------------------------------------------
PROGRESS: Number of examples          : 4020
PROGRESS: Number of classes           : 2
PROGRESS: Number of feature columns   : 549
PROGRESS: Number of unpacked features : 549
PROGRESS: Number of coefficients    : 550
PROGRESS: Starting L-BFGS
PROGRESS: --------------------------------------------------------
PROGRESS: +-----------+----------+-----------+--------------+-------------------+---------------------+
PROGRESS: | Iteration | Passes   | Step size | Elapsed Time | Training-accuracy | Validation-accuracy |
PROGRESS: +-----------+----------+-----------+--------------+-------------------+---------------------+
PROGRESS: | 1         | 6        | 0.000011  | 0.139480     | 0.511194          | 0.543689            |
PROGRESS: | 2         | 9        | 5.000000  | 0.232855     | 0.525871          | 0.475728            |
PROGRESS: | 3         | 10       | 5.000000  | 0.279489     | 0.532587          | 0.432039            |
PROGRESS: | 4         | 12       | 1.000000  | 0.348166     | 0.538060          | 0.500000            |
PROGRESS: | 5         | 13       | 1.000000  | 0.395483     | 0.548507          | 0.504854            |
PROGRESS: | 6         | 14       | 1.000000  | 0.443750     | 0.547264          | 0.490291            |
PROGRESS: +-----------+----------+-----------+--------------+-------------------+---------------------+
PROGRESS: SVM:
PROGRESS: --------------------------------------------------------
PROGRESS: Number of examples          : 4020
PROGRESS: Number of classes           : 2
PROGRESS: Number of feature columns   : 549
PROGRESS: Number of unpacked features : 549
PROGRESS: Number of coefficients    : 550
PROGRESS: Starting L-BFGS
PROGRESS: --------------------------------------------------------
PROGRESS: +-----------+----------+-----------+--------------+-------------------+---------------------+
PROGRESS: | Iteration | Passes   | Step size | Elapsed Time | Training-accuracy | Validation-accuracy |
PROGRESS: +-----------+----------+-----------+--------------+-------------------+---------------------+
PROGRESS: | 1         | 7        | 0.000051  | 0.147656     | 0.511194          | 0.543689            |
PROGRESS: | 2         | 12       | 0.124990  | 0.281378     | 0.511194          | 0.543689            |
PROGRESS: | 3         | 13       | 0.124990  | 0.327889     | 0.511194          | 0.543689            |
PROGRESS: | 4         | 14       | 0.124990  | 0.374183     | 0.510945          | 0.543689            |
PROGRESS: | 5         | 15       | 0.124990  | 0.423491     | 0.511194          | 0.543689            |
PROGRESS: | 6         | 16       | 0.124990  | 0.469900     | 0.510697          | 0.543689            |
PROGRESS: +-----------+----------+-----------+--------------+-------------------+---------------------+
PROGRESS: Model selection based on validation accuracy:
PROGRESS: ---------------------------------------------
PROGRESS: LogisticClassifier              : 0.558252
PROGRESS: SVMClassifier                   : 0.519417
PROGRESS: ---------------------------------------------
PROGRESS: Selecting LogisticClassifier based on validation set performance.

In [50]:
results


Out[50]:
{'accuracy': 0.5655466161855183, 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      1       |        0        |  1143 |
 |      0       |        0        |  1473 |
 |      0       |        1        |  693  |
 |      1       |        1        |  917  |
 +--------------+-----------------+-------+
 [4 rows x 3 columns]}

In [44]:
valid_len = 15
#test_day = -3 #0 today, -1 yesterday
def predict_profit(start_date_index,end_date_index):#0 today, -1 yesterday
    for test_day in range(start_date_index,end_date_index +1):    
        #train and valid are used for model training with parameter search
        train = sf_eur_usd_features[:test_day -1 - valid_len]
        valid = sf_eur_usd_features[test_day -1 - valid_len:test_day -1]
        
        #Test are for prediction
        if test_day ==0:
            test = sf_eur_usd_features[-1:] #Only one date of data
        else:
            test = sf_eur_usd_features[test_day -1 :test_day] #Only one date of data
            
        params = {'target': target_column,
                  'features': [features_columns]
                 }
        
        ## SVM classifier
        j_svm = gl.random_search.create((train, valid), 
                                 gl.svm_classifier.create, 
                                 params)
        results_svm= j_svm.get_results()
        model_svm_id = results_svm[results_svm['validation_accuracy'].argmax()]['model_id']
        model_svm = j_svm.get_models()[model_svm_id]
        accuracy_svm = gl.SArray([
                results_svm['validation_accuracy'].max()
                ])

        prediction_svm = gl.SArray(model_svm.classify(test)['class'])
        
        ## GBM classifier

        j_gbm = gl.random_search.create((train, valid), 
                                     gl.boosted_trees_classifier.create, 
                                     params)
        results_gbm= j_gbm.get_results()
        model_gbm_id = results_gbm[results_gbm['validation_accuracy'].argmax()]['model_id']
        #print results_gbm
        model_gbm = j_gbm.get_models()[model_gbm_id]
        accuracy_gbm = gl.SArray([
                results_gbm['validation_accuracy'].max()
                ])
        prediction_gbm = gl.SArray(model_gbm.classify(test)['class'])

        ## Logistic classifier

        j_logistic = gl.random_search.create((train, valid), 
                                     gl.logistic_classifier.create, 
                                     params)
        results_logistic= j_logistic.get_results()
        model_logistic_id = results_logistic[results_logistic['validation_accuracy'].argmax()]['model_id']
        model_logistic = j_logistic.get_models()[model_logistic_id]
        accuracy_logistic = gl.SArray([
                results_logistic['validation_accuracy'].max()
                ])
        prediction_logistic = gl.SArray(model_logistic.classify(test)['class'])
        
        ## Neural Network classifier
        j_neuralnet = gl.random_search.create((train, valid), 
                                     gl.neuralnet_classifier.create, 
                                     params)
        results_neuralnet= j_neuralnet.get_results()
        model_neuralnet_id = results_neuralnet[results_neuralnet['validation_accuracy'].argmax()]['model_id']
        model_neuralnet = j_neuralnet.get_models()[model_neuralnet_id]
        accuracy_neuralnet = gl.SArray([
                results_neuralnet['validation_accuracy'].max()
                ])
        #print model_neuralnet
        prediction_neuralnet = gl.SArray(model_neuralnet.classify(test)['class'])
        
        
        test = test.add_column( prediction_svm,'svm_estimate')
        test = test.add_column( accuracy_svm,'svm_accuracy')
        
        test = test.add_column( prediction_gbm,'gbm_estimate')
        test = test.add_column( accuracy_gbm,'gbm_accuracy')
        
        test = test.add_column( prediction_logistic,'logistic_estimate')
        test = test.add_column( accuracy_logistic,'logistic_accuracy')
        
        test = test.add_column( prediction_neuralnet,'neuralnet_estimate')
        test = test.add_column( accuracy_neuralnet,'neuralnet_accuracy')
        
        if test_day == start_date_index: # For the first element
            results_sf = test
        else: #From second element, append it to the results
            results_sf= results_sf.append(test)
        print test_day
        results_sf[
            ['date','predict action',
             'svm_estimate','svm_accuracy',
             'gbm_estimate','gbm_accuracy',
             'logistic_estimate','logistic_accuracy',
             'neuralnet_estimate','neuralnet_accuracy']
        ].show()
    return results_sf

In [45]:
#results_sf =  predict_profit(-300,0)
#results_sf[['date','predict action','svm_estimate','gbm_estimate','logistic_estimate','neuralnet_estimate']].show()

In [46]:
#results_sf[['date','predict action','svm_estimate','gbm_estimate','logistic_estimate','neuralnet_estimate']].show()