In [1]:
import graphlab as gl
In [2]:
filename = "./price_EUR_USD_*H1.csv"
sf_eur_usd_h1 = gl.SFrame.read_csv(filename, verbose =False)
In [3]:
import logging
gl_logger = logging.getLogger('graphlab')
gl_logger.setLevel(logging.WARNING)
In [4]:
sf_eur_usd_h1 = sf_eur_usd_h1.add_column( sf_eur_usd_h1['year'].astype(str) + '-' +
sf_eur_usd_h1['month'].astype(str) + '-' +
sf_eur_usd_h1['day'].astype(str) , 'date' )
sf_eur_usd_h1 = sf_eur_usd_h1.add_column( sf_eur_usd_h1['year'].astype(str)+ '-' +
sf_eur_usd_h1['month'].astype(str) + '-' +
sf_eur_usd_h1['day'].astype(str) +':' +
sf_eur_usd_h1['hour'].astype(str), 'date-hour' )
sf_eur_usd_h1 = sf_eur_usd_h1.add_column((sf_eur_usd_h1['openBid'] + sf_eur_usd_h1['openAsk']) /2 , 'open')
sf_eur_usd_h1 = sf_eur_usd_h1.add_column((sf_eur_usd_h1['highBid'] + sf_eur_usd_h1['highAsk']) /2 , 'high')
sf_eur_usd_h1 = sf_eur_usd_h1.add_column((sf_eur_usd_h1['lowBid'] + sf_eur_usd_h1['lowAsk']) /2 , 'low')
sf_eur_usd_h1 = sf_eur_usd_h1.add_column((sf_eur_usd_h1['closeBid'] + sf_eur_usd_h1['closeAsk']) /2 , 'close')
sf_eur_usd_h1 = sf_eur_usd_h1.remove_columns(['openBid','openAsk','highBid','highAsk','lowBid','lowAsk','closeBid','closeAsk','complete'])
In [5]:
sf_eur_usd_h1 = sf_eur_usd_h1.sort(['year','month','day','hour','minute','second'])
In [6]:
sf_eur_usd_h1[sf_eur_usd_h1['volume'] >1000].shape
Out[6]:
In [7]:
#sf_eur_usd_h1 = sf_eur_usd_h1[sf_eur_usd_h1['volume'] >1000]
sf_eur_usd_h1_tmp = sf_eur_usd_h1
In [8]:
def reconstruc_volume():
normal_volume_mean = sf_eur_usd_h1_tmp[sf_eur_usd_h1_tmp['volume'] >5]['volume'].mean()
volume_list = sf_eur_usd_h1_tmp['volume']
new_volume_list = volume_list.apply(lambda x: normal_volume_mean if x<=5 else x)
sf_eur_usd_h1_tmp['volume'] = new_volume_list.astype(int)
reconstruc_volume()
sf_eur_usd_h1= sf_eur_usd_h1_tmp
In [9]:
sf_eur_usd_h1_tmp = sf_eur_usd_h1_tmp.add_column(sf_eur_usd_h1_tmp['open'] *sf_eur_usd_h1_tmp['volume'] ,'open_volume')
sf_eur_usd_h1_tmp = sf_eur_usd_h1_tmp.add_column(sf_eur_usd_h1_tmp['high'] *sf_eur_usd_h1_tmp['volume'] ,'high_volume')
sf_eur_usd_h1_tmp = sf_eur_usd_h1_tmp.add_column(sf_eur_usd_h1_tmp['low'] *sf_eur_usd_h1_tmp['volume'] ,'low_volume')
sf_eur_usd_h1_tmp = sf_eur_usd_h1_tmp.add_column(sf_eur_usd_h1_tmp['close'] *sf_eur_usd_h1_tmp['volume'] ,'close_volume')
In [10]:
sf_eur_usd_d1_tmp = sf_eur_usd_h1_tmp.groupby(['day','month','year','date'],
[gl.aggregate.SUM('open_volume'),
gl.aggregate.SUM('high_volume'),
gl.aggregate.SUM('low_volume'),
gl.aggregate.SUM('close_volume'),
gl.aggregate.SUM('volume'),
]
)
In [11]:
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.sort(['year','month','day'])
In [12]:
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.add_column(sf_eur_usd_d1_tmp['Sum of open_volume'] /sf_eur_usd_d1_tmp['Sum of volume'] ,'open')
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.add_column(sf_eur_usd_d1_tmp['Sum of high_volume'] /sf_eur_usd_d1_tmp['Sum of volume'] ,'high')
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.add_column(sf_eur_usd_d1_tmp['Sum of low_volume'] /sf_eur_usd_d1_tmp['Sum of volume'] ,'low')
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.add_column(sf_eur_usd_d1_tmp['Sum of close_volume'] /sf_eur_usd_d1_tmp['Sum of volume'] ,'close')
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.rename({'Sum of volume': 'volume'})
sf_eur_usd_d1_tmp = sf_eur_usd_d1_tmp.remove_columns(['Sum of open_volume','Sum of high_volume','Sum of low_volume','Sum of close_volume'])
In [13]:
sf_eur_usd_d1 = sf_eur_usd_d1_tmp
# Use D1 to calculate features, h1 also could be used to calculate features with same logic
sf_eur_usd_features = sf_eur_usd_d1
#sf_eur_usd_features = sf_eur_usd_h1
In [14]:
pip_unit = 10000
close_list = sf_eur_usd_features ['close']
def close_minus_n_time_unit(time_unit):
close_list = sf_eur_usd_features ['close']
if time_unit >=1:
close_minus_n_time_unit_list = close_list[0: time_unit].append(close_list[:-1*time_unit])
else:
print("close_minus_n_time_unit() parameter error!")
return close_minus_n_time_unit_list
close_minus_1 = close_minus_n_time_unit(1)
close_minus_2 = close_minus_n_time_unit(2)
close_minus_3 = close_minus_n_time_unit(3)
In [15]:
sf_eur_usd_features = sf_eur_usd_features.add_column((close_list - close_minus_1) *pip_unit ,'pips')
In [16]:
predict_pips_list = sf_eur_usd_features['pips'][1:].append(gl.SArray([0.0]))
sf_eur_usd_features = sf_eur_usd_features.add_column(predict_pips_list,'predict pips')
In [17]:
sf_eur_usd_features = sf_eur_usd_features.add_column(predict_pips_list.apply(lambda x: 'sell' if x<0 else 'buy'),'predict action')
In [18]:
def calculate_momentum(time_unit):
close_list = sf_eur_usd_features ['close']
close_minus_n = close_minus_n_time_unit(time_unit)
return close_list - close_minus_n
def calculate_roc(time_unit):
close_list = sf_eur_usd_features ['close']
close_minus_n = close_minus_n_time_unit(time_unit)
return (close_list - close_minus_n)/close_minus_n
In [19]:
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_momentum(3),'momentum_3')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_momentum(4),'momentum_4')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_momentum(5),'momentum_5')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_momentum(8),'momentum_8')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_momentum(9),'momentum_9')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_momentum(10),'momentum_10')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(3),'roc_3')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(4),'roc_4')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(5),'roc_5')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(8),'roc_8')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(9),'roc_9')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(10),'roc_10')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(12),'roc_12')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(13),'roc_13')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(14),'roc_14')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_roc(15),'roc_15')
In [20]:
def calculate_fast_k_d(time_unit):
#Fast K 100 * [( C - L (n) ) / ( H (n) – L (n) )] . Use the data in same day as initial value
#L(n) means the lowest Low during the n-day period
#H(n) means the highest high during the n-day period
#http://investexcel.net/how-to-calculate-the-stochastic-oscillator/
high_python_list = list(sf_eur_usd_features ['high'])
low_python_list = list(sf_eur_usd_features ['low'])
high_high_list =[]
low_low_list=[]
for i in range(close_list.size()):
start = max(0,i-time_unit)
high_high = max(high_python_list[start:i+1])
high_high_list.append(high_high)
low_low = min(low_python_list[start:i+1])
low_low_list.append(low_low)
high_high_list_gl = gl.SArray(high_high_list)
low_low_list_gl = gl.SArray(low_low_list)
fast_k = 100 *(close_list - low_low_list_gl) / (high_high_list_gl - low_low_list_gl)
#remove n/a to zero
fast_k = fast_k.fillna(0)
fast_k_1 = fast_k[0:1].append(fast_k[:-1])
fast_k_2 = fast_k[0:2].append(fast_k[:-2])
fast_d = (fast_k + fast_k_1 + fast_k_2 ) /3
return fast_k, fast_d
In [21]:
fast_k_d_3 = calculate_fast_k_d(3)
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_3[0],'fast_k_3')
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_3[1],'fast_d_3')
fast_k_d_4 = calculate_fast_k_d(4)
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_4[0],'fast_k_4')
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_4[1],'fast_d_4')
fast_k_d_5 = calculate_fast_k_d(5)
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_5[0],'fast_k_5')
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_5[1],'fast_d_5')
fast_k_d_8 = calculate_fast_k_d(8)
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_8[0],'fast_k_8')
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_8[1],'fast_d_8')
fast_k_d_9 = calculate_fast_k_d(9)
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_9[0],'fast_k_9')
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_9[1],'fast_d_9')
fast_k_d_10 = calculate_fast_k_d(10)
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_10[0],'fast_k_10')
sf_eur_usd_features = sf_eur_usd_features.add_column(fast_k_d_10[1],'fast_d_10')
In [22]:
#calculate average price list
def calculate_avg_price(price_list,time_unit):
price_list_python = list(price_list)
avg_price_list_python=[]
for i in range(price_list.size()):
start = max(0, i-time_unit)
avg_price = sum(price_list_python[start:i+1])/(i-start +1.0)
avg_price_list_python.append(avg_price)
return gl.SArray(avg_price_list_python)
In [23]:
#Weighted Closing Price
def calculate_weighted_close_price(time_unit):
close_list = sf_eur_usd_features ['close']
avg_close_list = calculate_avg_price(close_list,15)
high_list = sf_eur_usd_features ['high']
avg_high_list = calculate_avg_price(high_list,15)
low_list = sf_eur_usd_features ['low']
avg_low_list = calculate_avg_price(low_list,15)
weighted_close_price_list = (avg_close_list * 2 + avg_high_list + avg_low_list) / 4.0
return weighted_close_price_list
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_weighted_close_price(15),'weighted close price_15')
In [24]:
def calculate_william_r(time_unit):
#http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:williams_r
#%R = (Highest High - Close)/(Highest High - Lowest Low) * -100
# Lowest Low = lowest low for the look-back period
# Highest High = highest high for the look-back period
# %R is multiplied by -100 correct the inversion and move the decimal.
high_python_list = list(sf_eur_usd_features ['high'])
low_python_list = list(sf_eur_usd_features ['low'])
high_high_list =[]
low_low_list=[]
for i in range(close_list.size()):
start = max(0,i-time_unit)
high_high = max(high_python_list[start:i+1])
high_high_list.append(high_high)
low_low = min(low_python_list[start:i+1])
low_low_list.append(low_low)
high_high_list_gl = gl.SArray(high_high_list)
low_low_list_gl = gl.SArray(low_low_list)
william_r = -100 *(high_high_list_gl - close_list ) / (high_high_list_gl - low_low_list_gl)
#remove n/a to zero
william_r = william_r.fillna(0)
return william_r
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_william_r(6),'william_r_6')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_william_r(7),'william_r_7')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_william_r(8),'william_r_8')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_william_r(9),'william_r_9')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_william_r(10),'william_r_10')
In [25]:
def calculate_william_a_d():
high_list = sf_eur_usd_features['high']
high_list_1 = high_list[0:1].append(high_list[:-1])
low_list = sf_eur_usd_features['low']
low_list_1 = low_list[0:1].append(low_list[:-1])
high_sf = gl.SFrame({'now':high_list,'yesterday':high_list_1})
low_sf = gl.SFrame({'now':low_list,'yesterday':low_list_1})
true_high = high_sf.apply(lambda x: max(x['now'],x['yesterday']))
true_low = low_sf.apply(lambda x: min(x['now'],x['yesterday']))
close_list = sf_eur_usd_features['close']
close_list_1 = close_list[0:1].append(close_list[:-1])
close_sf = gl.SFrame({'now_close':close_list,
'yesterday_close':close_list_1,
'true_high':true_high,
'true_low':true_low})
today_a_d = close_sf.apply(
lambda x: ( (x['now_close'] - x['true_low']) if (x['now_close'] - x['yesterday_close']) > 0 else
(x['now_close'] - x['true_high']) if (x['now_close'] - x['yesterday_close']) < 0 else
0)
)
today_a_d_1 = today_a_d[0:1].append(today_a_d[:-1])
william_a_d = today_a_d + today_a_d_1
return william_a_d
In [26]:
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_william_a_d(),'william_a_d')
In [27]:
def calculate_a_d_line(time_unit):
#http://www.metastock.com/Customer/Resources/TAAZ/?c=3&p=27
high_list = sf_eur_usd_features['high']
low_list = sf_eur_usd_features['low']
close_list = sf_eur_usd_features['close']
volume_list = sf_eur_usd_features['volume']
clv_list = ((close_list - low_list) - (high_list - close_list)) /(high_list - low_list) * volume_list
clv_list = clv_list.fillna(0)
clv_list_python = list(clv_list)
a_d_line_python = []
for i in range(close_list.size()):
start = max(0,i-time_unit)
a_d_value = sum(clv_list_python[start:i+1])
a_d_line_python.append(a_d_value)
a_d_line_gl = gl.SArray(a_d_line_python)
return a_d_line_gl
In [28]:
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_a_d_line(1),'adsoc_1')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_a_d_line(2),'adsoc_2')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_a_d_line(3),'adsoc_3')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_a_d_line(4),'adsoc_4')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_a_d_line(5),'adsoc_5')
In [29]:
#sf_eur_usd_features.show()
In [30]:
def calculate_ema(time_unit):
#http://investexcel.net/how-to-calculate-macd-in-excel/
close_list = sf_eur_usd_features['close']
close_list_python = list(close_list)
ema_list=[]
for i in range(close_list.size()):
if i< time_unit:
ema_value = sum(close_list_python[0:i+1]) / float(i+1)
ema_list.append(ema_value)
else:
ema_value = (close_list_python[i] * (2.0 /(time_unit +1.0)) +
ema_list[i-1] *(1.0-(2.0/(time_unit+1.0))) )
ema_list.append(ema_value)
return gl.SArray(ema_list)
macd = calculate_ema(12) - calculate_ema(26)
sf_eur_usd_features = sf_eur_usd_features.add_column(macd,'macd')
In [31]:
def calculate_cci_20():
high_list = sf_eur_usd_features['high']
low_list = sf_eur_usd_features['low']
close_list = sf_eur_usd_features['close']
typical_price_list = (high_list + low_list + close_list) /3.0
typical_price_list_python = list(typical_price_list)
sma_typical_price_list_python = []
cci_list_python =[]
for i in range(close_list.size()):
start = max(0, i-19)
sma_typical_price = sum(typical_price_list_python[start:i+1])/(i-start +1.0)
sma_typical_price_list_python.append(sma_typical_price)
std_typical_price = sum(
[abs(tmp_typical_price - sma_typical_price )
for tmp_typical_price in sma_typical_price_list_python[start:i+1]
]) /(i-start +1.0)
if std_typical_price == 0.0:
cci =0.0
else:
cci = ( (typical_price_list_python[i] - sma_typical_price ) /
(0.015*std_typical_price) )
cci_list_python.append(cci)
return gl.SArray(cci_list_python).astype(int)
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_cci_20(),'cci_20')
In [32]:
def calculate_Bollinger_Bands_20():
close_list = sf_eur_usd_features['close']
close_list_python = list(close_list)
std_close_list_python = []
sma_list_python = []
for i in range(close_list.size()):
start = max(0, i-19)
sma_close = sum(close_list_python[start:i+1])/ (i-start +1.0)
std_close = sum(
[abs(tmp_close_price - sma_close )
for tmp_close_price in close_list_python[start:i+1]
]) /(i-start +1.0)
std_close_list_python.append(std_close)
sma_list_python.append(sma_close)
sma_list = gl.SArray(sma_list_python)
std_close_list = gl.SArray(std_close_list_python)
return (sma_list - std_close_list *2), (sma_list + std_close_list *2)
bb_list = calculate_Bollinger_Bands_20()
sf_eur_usd_features = sf_eur_usd_features.add_column(bb_list[0],'Bollinger_Bands_20_down')
sf_eur_usd_features = sf_eur_usd_features.add_column(bb_list[1],'Bollinger_Bands_20_up')
In [33]:
#sf_eur_usd_features[['Bollinger_Bands_20_down','Bollinger_Bands_20_up','close']]
In [34]:
def calculate_heikin_ashi(time_unit):
#http://www.investopedia.com/articles/technical/04/092204.asp
high_list = sf_eur_usd_features['high']
high_list_avg = calculate_avg_price(high_list,time_unit)
low_list = sf_eur_usd_features['low']
low_list_avg = calculate_avg_price(low_list,time_unit)
close_list = sf_eur_usd_features['close']
close_list_avg = calculate_avg_price(close_list,time_unit)
close_list_avg_1 = close_list_avg[0:1].append(close_list_avg[:-1])
open_list = sf_eur_usd_features['open']
open_list_avg = calculate_avg_price(open_list,time_unit)
open_list_avg_1 = open_list_avg[0:1].append(open_list_avg[:-1])
xclose_list = (open_list_avg + high_list_avg + low_list_avg + close_list_avg)/4.0
xopen_list = (open_list_avg_1 + close_list_avg_1) /2.0
sf_heikin_ashi = gl.SFrame({
'high': high_list_avg,
'low': low_list_avg,
'xopen': xopen_list,
'xclose': xclose_list
})
#(xhigh_list,xlow_list) = sf_heikin_ashi.apply(
# lambda x: (max(x['high'],x['xopen'],x['xclose']), min(x['low'],x['xopen'],x['xclose'])))
xhigh_list = sf_heikin_ashi.apply(
lambda x: max(x['high'],x['xopen'],x['xclose']) )
xlow_list = sf_heikin_ashi.apply(
lambda x: min(x['low'],x['xopen'],x['xclose']) )
return xopen_list, xhigh_list, xlow_list, xclose_list
heikin_ashi_lists_15 =calculate_heikin_ashi(15)
In [35]:
sf_eur_usd_features = sf_eur_usd_features.add_column(heikin_ashi_lists_15[0],'heikin_ashi_open')
sf_eur_usd_features = sf_eur_usd_features.add_column(heikin_ashi_lists_15[1],'heikin_ashi_high')
sf_eur_usd_features = sf_eur_usd_features.add_column(heikin_ashi_lists_15[2],'heikin_ashi_low')
sf_eur_usd_features = sf_eur_usd_features.add_column(heikin_ashi_lists_15[3],'heikin_ashi_close')
In [36]:
#2day high/low average
high_list = sf_eur_usd_features['high']
high_list_1 = high_list[0:1].append(high_list[:-1])
low_list = sf_eur_usd_features['low']
low_list_1 = low_list[0:1].append(low_list[:-1])
high_low_avg_2day = ( high_list + high_list_1 + low_list + low_list_1 ) /4.0
high_avg_2day = ( high_list + high_list_1) /2.0
low_avg_2day = ( low_list + low_list_1) /2.0
high_low_avg_1day = ( high_list + low_list ) /2.0
sf_eur_usd_features = sf_eur_usd_features.add_column(high_low_avg_2day,'high_low_avg_2day')
sf_eur_usd_features = sf_eur_usd_features.add_column(high_low_avg_1day,'high_low_avg_1day')
sf_eur_usd_features = sf_eur_usd_features.add_column(high_avg_2day,'high_avg_2day')
sf_eur_usd_features = sf_eur_usd_features.add_column(low_avg_2day,'low_avg_2day')
In [37]:
def calculate_slope(time_unit):
close_list = sf_eur_usd_features['close']
close_list_n = close_list[0:time_unit].append(close_list[:-1*time_unit])
slope_list = (close_list - close_list_n) / time_unit
return slope_list
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(3),'close_slope_3')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(4),'close_slope_4')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(5),'close_slope_5')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(8),'close_slope_8')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(10),'close_slope_10')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(12),'close_slope_12')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(15),'close_slope_15')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(20),'close_slope_20')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(25),'close_slope_25')
sf_eur_usd_features = sf_eur_usd_features.add_column(calculate_slope(30),'close_slope_30')
In [38]:
import math
def calculate_garch_1_1():
#http://investexcel.net/garch-excel/
close_list = sf_eur_usd_features['close']
close_list_1 = close_list[0:1].append(close_list[:-1])
Residual_list = close_list - close_list_1
suqred_Residual_list = Residual_list * Residual_list
lagged_1_suqred_Residual_list = suqred_Residual_list[0:1].append(suqred_Residual_list[:-1])
unconditional_variance = close_list.var()
w = 0.00000397106501352437
alpha = 0.0824292201426092
beta = 0.874056639703639
conditional_variance_list =[]
sqrt_conditional_variance_list =[]
log_like_conditional_variance_list =[]
for i in range(close_list.size()):
if i==0:
conditional_variance = unconditional_variance
else:
conditional_variance = w + alpha*lagged_1_suqred_Residual_list[i] + beta*conditional_variance_list[i-1]
conditional_variance_list.append(conditional_variance)
sqrt_conditional_variance_list.append(
math.sqrt(conditional_variance)
)
log_like_conditional_variance_list.append(
math.log(
(1.0/math.sqrt(2*3.1415927*conditional_variance))*
math.exp(-0.5*suqred_Residual_list[i]/conditional_variance)
)
)
return gl.SArray(log_like_conditional_variance_list), gl.SArray(sqrt_conditional_variance_list)
garch_1_1_list = calculate_garch_1_1()
sf_eur_usd_features = sf_eur_usd_features.add_column(garch_1_1_list[0],'garch_1_1_log_like')
sf_eur_usd_features = sf_eur_usd_features.add_column(garch_1_1_list[1],'garch_1_1_sqrt')
In [39]:
import numpy as np
def calculate_fft(time_unit):
#Only reserve a0,a1,b1,a2,b2 factors for each sequence of fft
#The data which is not long enough to generate time_unit length of data will be avoid to calculate
if time_unit <5:
print "calculate_fft: time_unit parameter is too small."
exit(-1)
close_list = sf_eur_usd_features['close']
close_list_python = list(sf_eur_usd_features['close'])
fft_a0 =[]
fft_a1 =[]
fft_b1 =[]
fft_a2 =[]
fft_b2 =[]
for i in range(close_list.size()):
if i-time_unit +1 < 0:
a0=a1=b1=a2=b2=0.0
else:
start = i-time_unit +1
fft_source = close_list_python[start:i+1]
fft_trans = np.fft.fft(fft_source)
a0 = fft_trans[0].real
a1 = fft_trans[1].real
a2 = fft_trans[2].real
b1 = fft_trans[1].imag
b2 = fft_trans[2].imag
fft_a0.append(a0)
fft_a1.append(a1)
fft_b1.append(b1)
fft_a2.append(a2)
fft_b2.append(b2)
return gl.SArray(fft_a0),gl.SArray(fft_a1),gl.SArray(fft_b1),gl.SArray(fft_a2),gl.SArray(fft_b2)
fft_5= calculate_fft(5)
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_5[0],'fft_5_a0')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_5[1],'fft_5_a1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_5[2],'fft_5_b1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_5[3],'fft_5_a2')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_5[4],'fft_5_b2')
fft_10= calculate_fft(10)
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_10[0],'fft_10_a0')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_10[1],'fft_10_a1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_10[2],'fft_10_b1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_10[3],'fft_10_a2')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_10[4],'fft_10_b2')
fft_20= calculate_fft(20)
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_20[0],'fft_20_a0')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_20[1],'fft_20_a1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_20[2],'fft_20_b1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_20[3],'fft_20_a2')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_20[4],'fft_20_b2')
fft_30= calculate_fft(30)
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_30[0],'fft_30_a0')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_30[1],'fft_30_a1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_30[2],'fft_30_b1')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_30[3],'fft_30_a2')
sf_eur_usd_features = sf_eur_usd_features.add_column(fft_30[4],'fft_30_b2')
In [40]:
def generate_time_serials_windows(feature_array,columne_base_name,time_unit):
# generate history time-serials features
sf_ts_window_feature = gl.SFrame({columne_base_name:feature_array})
for i in range(1,time_unit+1):
column_name = columne_base_name + '_'+str(i)
column_data = feature_array[0:i].append(feature_array[:-i])
sf_ts_window_feature.add_column(column_data,name=column_name)
return sf_ts_window_feature
sf_1 = generate_time_serials_windows(sf_eur_usd_features['momentum_3'],'momentum_3',60)
sf_eur_usd_features.remove_column('momentum_3')
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)
sf_1 = generate_time_serials_windows(sf_eur_usd_features['momentum_4'],'momentum_4',60)
sf_eur_usd_features.remove_column('momentum_4')
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)
sf_1 = generate_time_serials_windows(sf_eur_usd_features['momentum_5'],'momentum_5',60)
sf_eur_usd_features.remove_column('momentum_5')
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)
sf_1 = generate_time_serials_windows(sf_eur_usd_features['momentum_8'],'momentum_8',60)
sf_eur_usd_features.remove_column('momentum_8')
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)
sf_1 = generate_time_serials_windows(sf_eur_usd_features['momentum_9'],'momentum_9',60)
sf_eur_usd_features.remove_column('momentum_9')
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)
sf_1 = generate_time_serials_windows(sf_eur_usd_features['momentum_10'],'momentum_10',60)
sf_eur_usd_features.remove_column('momentum_10')
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)
sf_1 = generate_time_serials_windows(sf_eur_usd_features['close'],'close',60)
sf_eur_usd_features.remove_column('close')
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)
In [52]:
sf_1 = generate_time_serials_windows(sf_eur_usd_features['volume'],'volume',60)
sf_eur_usd_features.remove_column('volume')
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)
In [55]:
sf_1 = generate_time_serials_windows(sf_eur_usd_features['garch_1_1_log_like'],'garch_1_1_log_like',60)
sf_eur_usd_features.remove_column('garch_1_1_log_like')
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)
In [57]:
sf_1 = generate_time_serials_windows(sf_eur_usd_features['garch_1_1_sqrt'],'garch_1_1_sqrt',60)
sf_eur_usd_features.remove_column('garch_1_1_sqrt')
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)
sf_1 = generate_time_serials_windows(sf_eur_usd_features['adsoc_1'],'adsoc_1',60)
sf_eur_usd_features.remove_column('adsoc_1')
sf_eur_usd_features = sf_eur_usd_features.add_columns(sf_1)
In [41]:
target_column= 'predict action'
features_columns= ['volume','open','high','low','close',
'momentum_3','momentum_4','momentum_5','momentum_8','momentum_9','momentum_10',
'roc_3','roc_4','roc_5','roc_8','roc_9','roc_10','roc_12','roc_13','roc_14','roc_15',
'fast_k_3','fast_d_3','fast_k_4','fast_d_4','fast_k_5','fast_d_5','fast_k_8','fast_d_8','fast_k_9','fast_d_9','fast_k_10','fast_d_10',
'weighted close price_15','william_r_6','william_r_7','william_r_8','william_r_9','william_r_10',
'william_a_d','adsoc_1','adsoc_2','adsoc_3','adsoc_4','adsoc_5',
'macd','cci_20','Bollinger_Bands_20_down','Bollinger_Bands_20_up',
'heikin_ashi_open','heikin_ashi_high','heikin_ashi_low','heikin_ashi_close',
'high_low_avg_2day','high_low_avg_1day','high_avg_2day','low_avg_2day',
'close_slope_3','close_slope_4','close_slope_5','close_slope_8','close_slope_10','close_slope_12','close_slope_15',
'close_slope_20','close_slope_25','close_slope_30',
'garch_1_1_log_like','garch_1_1_sqrt',
'fft_5_a0','fft_5_a1','fft_5_b1','fft_5_a2','fft_5_b2',
'fft_10_a0','fft_10_a1','fft_10_b1','fft_10_a2','fft_10_b2',
'fft_20_a0', 'fft_20_a1','fft_20_b1','fft_20_a2','fft_20_b2',
'fft_30_a0','fft_30_a1','fft_30_b1','fft_30_a2','fft_30_b2']
In [63]:
features_columns= ['volume','open','high','low','close']
features_columns= [ 'momentum_3','momentum_4','momentum_5','momentum_8','momentum_9','momentum_10',
'roc_3','roc_4','roc_5','roc_8','roc_9','roc_10','roc_12','roc_13','roc_14','roc_15']
def generate_feature_column_names(columne_base_name,time_unit):
column_names =[columne_base_name]
for i in range(1,time_unit+1):
column_name = columne_base_name + '_'+str(i)
column_names.append(column_name)
return column_names
features_columns= []
features_columns.extend(generate_feature_column_names('momentum_3',60))
features_columns.extend(generate_feature_column_names('momentum_4',60))
features_columns.extend(generate_feature_column_names('momentum_5',60))
features_columns.extend(generate_feature_column_names('momentum_8',60))
features_columns.extend(generate_feature_column_names('momentum_9',60))
features_columns.extend(generate_feature_column_names('momentum_10',60))
features_columns.extend(generate_feature_column_names('close',60))
#features_columns.extend(generate_feature_column_names('volume',60))
features_columns.extend(generate_feature_column_names('garch_1_1_log_like',60))
features_columns.extend(generate_feature_column_names('garch_1_1_sqrt',60))
#features_columns.extend(generate_feature_column_names('adsoc_1',60))
model = gl.classifier.create(sf_eur_usd_features,
target=target_column,
features=features_columns)
results = model.evaluate(sf_eur_usd_features)
In [50]:
results
Out[50]:
In [44]:
valid_len = 15
#test_day = -3 #0 today, -1 yesterday
def predict_profit(start_date_index,end_date_index):#0 today, -1 yesterday
for test_day in range(start_date_index,end_date_index +1):
#train and valid are used for model training with parameter search
train = sf_eur_usd_features[:test_day -1 - valid_len]
valid = sf_eur_usd_features[test_day -1 - valid_len:test_day -1]
#Test are for prediction
if test_day ==0:
test = sf_eur_usd_features[-1:] #Only one date of data
else:
test = sf_eur_usd_features[test_day -1 :test_day] #Only one date of data
params = {'target': target_column,
'features': [features_columns]
}
## SVM classifier
j_svm = gl.random_search.create((train, valid),
gl.svm_classifier.create,
params)
results_svm= j_svm.get_results()
model_svm_id = results_svm[results_svm['validation_accuracy'].argmax()]['model_id']
model_svm = j_svm.get_models()[model_svm_id]
accuracy_svm = gl.SArray([
results_svm['validation_accuracy'].max()
])
prediction_svm = gl.SArray(model_svm.classify(test)['class'])
## GBM classifier
j_gbm = gl.random_search.create((train, valid),
gl.boosted_trees_classifier.create,
params)
results_gbm= j_gbm.get_results()
model_gbm_id = results_gbm[results_gbm['validation_accuracy'].argmax()]['model_id']
#print results_gbm
model_gbm = j_gbm.get_models()[model_gbm_id]
accuracy_gbm = gl.SArray([
results_gbm['validation_accuracy'].max()
])
prediction_gbm = gl.SArray(model_gbm.classify(test)['class'])
## Logistic classifier
j_logistic = gl.random_search.create((train, valid),
gl.logistic_classifier.create,
params)
results_logistic= j_logistic.get_results()
model_logistic_id = results_logistic[results_logistic['validation_accuracy'].argmax()]['model_id']
model_logistic = j_logistic.get_models()[model_logistic_id]
accuracy_logistic = gl.SArray([
results_logistic['validation_accuracy'].max()
])
prediction_logistic = gl.SArray(model_logistic.classify(test)['class'])
## Neural Network classifier
j_neuralnet = gl.random_search.create((train, valid),
gl.neuralnet_classifier.create,
params)
results_neuralnet= j_neuralnet.get_results()
model_neuralnet_id = results_neuralnet[results_neuralnet['validation_accuracy'].argmax()]['model_id']
model_neuralnet = j_neuralnet.get_models()[model_neuralnet_id]
accuracy_neuralnet = gl.SArray([
results_neuralnet['validation_accuracy'].max()
])
#print model_neuralnet
prediction_neuralnet = gl.SArray(model_neuralnet.classify(test)['class'])
test = test.add_column( prediction_svm,'svm_estimate')
test = test.add_column( accuracy_svm,'svm_accuracy')
test = test.add_column( prediction_gbm,'gbm_estimate')
test = test.add_column( accuracy_gbm,'gbm_accuracy')
test = test.add_column( prediction_logistic,'logistic_estimate')
test = test.add_column( accuracy_logistic,'logistic_accuracy')
test = test.add_column( prediction_neuralnet,'neuralnet_estimate')
test = test.add_column( accuracy_neuralnet,'neuralnet_accuracy')
if test_day == start_date_index: # For the first element
results_sf = test
else: #From second element, append it to the results
results_sf= results_sf.append(test)
print test_day
results_sf[
['date','predict action',
'svm_estimate','svm_accuracy',
'gbm_estimate','gbm_accuracy',
'logistic_estimate','logistic_accuracy',
'neuralnet_estimate','neuralnet_accuracy']
].show()
return results_sf
In [45]:
#results_sf = predict_profit(-300,0)
#results_sf[['date','predict action','svm_estimate','gbm_estimate','logistic_estimate','neuralnet_estimate']].show()
In [46]:
#results_sf[['date','predict action','svm_estimate','gbm_estimate','logistic_estimate','neuralnet_estimate']].show()