In [1]:
import h2o

In [2]:
h2o.__version__


Out[2]:
'3.0.1.3'

In [3]:
import pandas as pd
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
df_eur_usd = pd.read_csv("price_EUR_USD_D1_merge.csv", header=0, index_col=0)
df_eur_usd.columns


Out[4]:
Index([u'Currency_pair', u'Prediction_action', u'Day Open', u'Day Close', u'Day High', u'Day Low', u'Day Average', u'Momentum_3day', u'Momentum_4day', u'Momentum_5day', u'Momentum_8day', u'Momentum_9day', u'Momentum_10day', u'Roc_3day', u'Roc_4day', u'Roc_5day', u'Roc_8day', u'Roc_9day', u'Roc_10day', u'Fast_k_3day', u'Fast_d_3day', u'Fast_k_4day', u'Fast_d_4day', u'Fast_k_5day', u'Fast_d_5day', u'Fast_k_8day', u'Fast_d_8day', u'Fast_k_9day', u'Fast_d_9day', u'Fast_k_10day', u'Fast_d_10day', u'PROC_12day', u'PROC_13day', u'PROC_14day', u'PROC_15day', u'Weighted_Close_Price', u'WILLIAM_A_D', u'ADOSC_1day', u'ADOSC_2day', u'ADOSC_3day', u'ADOSC_4day', u'ADOSC_5day', u'EMA_12Day', u'EMA_26Day', u'MACD', u'CCI', u'BOLLINGER_BANDS_LOW', u'BOLLINGER_BANDS_HIGH', u'HEIKIN_ASHI_XCLOSE', u'HEIKIN_ASHI_XOPEN', u'HEIKIN_ASHI_XHIGH', u'HEIKIN_ASHI_XLOW', u'2DAY_HIGH', u'2DAY_LOW', u'1DAY_HIGH_LOW_AVG', u'2DAY_HIGH_LOW_AVG', u'High_slope_3day', u'High_slope_4day', u'High_slope_5day', u'High_slope_8day', u'High_slope_10day', u'High_slope_12day', u'High_slope_15day', u'High_slope_20day', u'High_slope_25day', u'High_slope_30day', u'Pips', u'Prediction_Pips', u'Volume', u'Active Hour', u'Active Hour Volume'], dtype='object')

In [5]:
#df_eur_usd_h2o = h2o.import_frame(df_eur_usd)
h2o.init()
df_eur_usd_h2o = h2o.import_frame(path="./price_EUR_USD_D1_merge.csv")



No instance found at ip and port: localhost:54321. Trying to start local jar...


JVM stdout: /tmp/tmpnMFyx0/h2o_cyue_started_from_python.out
JVM stderr: /tmp/tmpKvwATr/h2o_cyue_started_from_python.err
Using ice_root: /tmp/tmplAAfHj


Java Version: java version "1.7.0_75"
OpenJDK Runtime Environment (IcedTea 2.5.4) (7u75-2.5.4-1~trusty1)
OpenJDK 64-Bit Server VM (build 24.75-b04, mixed mode)


Starting H2O JVM and connecting: ............... Connection sucessful!
H2O cluster uptime: 1 seconds 725 milliseconds
H2O cluster version: 3.0.1.3
H2O cluster name: H2O_started_from_python
H2O cluster total nodes: 1
H2O cluster total memory: 3.46 GB
H2O cluster total cores: 8
H2O cluster allowed cores: 8
H2O cluster healthy: True
H2O Connection ip: 127.0.0.1
H2O Connection port: 54321
Parse Progress: [##################################################] 100%
Imported ./price_EUR_USD_D1_merge.csv. Parsed 3,393 rows and 72 cols

In [6]:
df_eur_usd_h2o.describe()


Rows: 3,393 Cols: 72

Chunk compression summary:

chunk_type chunk_name count count_percentage size size_percentage
C0L Constant Integers 1 1.388889 80 B 0.014582893
C1 1-Byte Integers 1 1.388889 3.4 KB 0.6308924
C1N 1-Byte Integers (w/o NAs) 13 18.055555 43.9 KB 8.201601
C1S 1-Byte Fractions 10 13.888889 34.0 KB 6.33809
C2S 2-Byte Fractions 28 38.88889 187.9 KB 35.064568
C4 4-Byte Integers 6 8.333334 79.9 KB 14.918299
C4S 4-Byte Fractions 12 16.666668 160.0 KB 29.871597
C8 64-bit Integers 1 1.388889 26.6 KB 4.960371
Frame distribution summary:

size number_of_rows number_of_chunks_per_column number_of_chunks
127.0.0.1:54321 535.7 KB 3393.0 1.0 72.0
mean 535.7 KB 3393.0 1.0 72.0
min 535.7 KB 3393.0 1.0 72.0
max 535.7 KB 3393.0 1.0 72.0
stddev 0 B 0.0 0.0 0.0
total 535.7 KB 3393.0 1.0 72.0
Column-by-Column Summary:

Date Currency_pair Prediction_action Day Open Day Close Day High Day Low Day Average Momentum_3day Momentum_4day Momentum_5day Momentum_8day Momentum_9day Momentum_10day Roc_3day Roc_4day Roc_5day Roc_8day Roc_9day Roc_10day Fast_k_3day Fast_d_3day Fast_k_4day Fast_d_4day Fast_k_5day Fast_d_5day Fast_k_8day Fast_d_8day Fast_k_9day Fast_d_9day Fast_k_10day Fast_d_10day PROC_12day PROC_13day PROC_14day PROC_15day Weighted_Close_Price WILLIAM_A_D ADOSC_1day ADOSC_2day ADOSC_3day ADOSC_4day ADOSC_5day EMA_12Day EMA_26Day MACD CCI BOLLINGER_BANDS_LOW BOLLINGER_BANDS_HIGH HEIKIN_ASHI_XCLOSE HEIKIN_ASHI_XOPEN HEIKIN_ASHI_XHIGH HEIKIN_ASHI_XLOW 2DAY_HIGH 2DAY_LOW 1DAY_HIGH_LOW_AVG 2DAY_HIGH_LOW_AVG High_slope_3day High_slope_4day High_slope_5day High_slope_8day High_slope_10day High_slope_12day High_slope_15day High_slope_20day High_slope_25day High_slope_30day Pips Prediction_Pips Volume Active Hour Active Hour Volume
type time enum enum real real real real real real real real real real real real real real real real real int int int int int int int int int int int int real real real real real real int int int int int real real real real real real real real real real real real real real real real real real real real real real real real real real int int int
mins 1.1046528e+12 0.0 0.0 1.04704 1.04841 1.05058 1.04624 1.04925 -0.06717 -0.07515 -0.08255 -0.10946 -0.11478 -0.11353 -0.05 -0.05 -0.06 -0.07 -0.08 -0.08 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 -0.09 -0.09 -0.09 -0.09 1.04857 -0.03244 -207004.0 -207004.0 -207004.0 -207004.0 -207004.0 1.07153 1.07786 -0.03893 -377.52 1.03792 1.0943 1.04823 1.04773 1.05615 1.04624 1.06197 1.04624 1.57402 1.05179 -15.6888 -14.586 -15.085 -11.2006 -10.4055 -9.1946 -7.3409 -7.324 -7.9 -7.2214 -445.4 -445.4 1.0 0.0 1.0
maxs 1.4380668e+12 0.0 1.0 1.59746 1.59742 1.60389 1.58976 1.59529 0.09612 0.10363 0.10746 0.149 0.16732 0.16849 0.07 0.08 0.08 0.12 0.13 0.13 100.0 100.0 100.0 100.0 100.0 100.0 100.0 100.0 100.0 100.0 100.0 100.0 0.13 0.13 0.13 0.14 1.59506 0.04021 185223.0 185223.0 185223.0 185223.0 185223.0 1.58496 1.57789 0.03453 295.66 1.56658 1.60065 1.59356 1.59442 1.60389 1.58658 1.60389 1.58418 2.39718 1.59284 24.8275 25.259 21.7517 19.1033 18.0373 14.7708 12.2106 8.0395 8.1794 6.1955 417.3 417.3 272386.0 23.0 53617.0
sigma 95799910926.9 0.0 0.500073154428 0.0993138736801 0.0993492984856 0.0995336706348 0.0989684291404 0.0992432183772 0.0132968294257 0.0152689862588 0.0170371692267 0.0213814520456 0.0227410669794 0.0240586401942 0.0103727327852 0.0118129952895 0.0130779473453 0.0162617358148 0.017306836767 0.0183581180772 30.7843335332 25.2075055585 30.9321608659 26.2995848456 31.0852000026 27.1437001934 31.208557902 28.4637919507 31.331586942 28.7958399037 31.4840323528 29.0794201629 0.0201033902637 0.0208983371048 0.0216585199773 0.0225261107494 0.0992482599726 0.00754408843594 28946.8570451 28946.829822 28945.2208407 28945.2200189 28943.9593173 0.0980321042487 0.0963963348941 0.00885542682022 110.558681617 0.0972445078738 0.0994536517443 0.0992201256414 0.0991870318393 0.0994784077374 0.0989192838128 0.0995320936289 0.0987541068149 0.148935133826 0.0991078084268 3.31627867318 3.06447757825 2.8390058017 2.38500025748 2.1950945712 2.03439110975 1.87324123551 1.66955995531 1.52929194029 1.41531560368 61.7398271544 61.7398271544 31380.4989879 4.91957965947 3918.90684396
zero_count 0 3393 1694 0 0 0 0 0 4 4 6 8 10 10 1557 1379 1192 930 871 861 30 0 28 0 27 0 31 0 33 0 35 0 748 752 731 708 0 4 72 73 74 75 76 0 0 12 19 0 0 0 0 0 0 0 0 0 0 1 1 2 1 1 1 2 1 1 1 1 1 0 38 0
missing_count 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

In [7]:
r = df_eur_usd_h2o[0].runif()
df_eur_usd_h2o_train = df_eur_usd_h2o[r<0.8]
df_eur_usd_h2o_valid = df_eur_usd_h2o[r>=0.8]

In [8]:
features = [ u'Day Open', u'Day Close', u'Day High', u'Day Low', u'Day Average', u'Momentum_3day', u'Momentum_4day',
            u'Momentum_5day', u'Momentum_8day', u'Momentum_9day', u'Momentum_10day',
            u'Roc_3day', u'Roc_4day', u'Roc_5day', u'Roc_8day', u'Roc_9day', u'Roc_10day', u'Fast_k_3day', 
            u'Fast_d_3day', u'Fast_k_4day', u'Fast_d_4day', u'Fast_k_5day', u'Fast_d_5day', u'Fast_k_8day', 
            u'Fast_d_8day', u'Fast_k_9day', u'Fast_d_9day', u'Fast_k_10day', u'Fast_d_10day', u'PROC_12day',
            u'PROC_13day', u'PROC_14day', u'PROC_15day', u'Weighted_Close_Price', u'WILLIAM_A_D', u'ADOSC_1day',
            u'ADOSC_2day', u'ADOSC_3day', u'ADOSC_4day', u'ADOSC_5day', u'EMA_12Day', u'EMA_26Day', u'MACD', 
            u'CCI', u'BOLLINGER_BANDS_LOW', u'BOLLINGER_BANDS_HIGH', u'HEIKIN_ASHI_XCLOSE', u'HEIKIN_ASHI_XOPEN', 
            u'HEIKIN_ASHI_XHIGH', u'HEIKIN_ASHI_XLOW', u'2DAY_HIGH', u'2DAY_LOW', u'1DAY_HIGH_LOW_AVG', 
            u'2DAY_HIGH_LOW_AVG', u'High_slope_3day', u'High_slope_4day', u'High_slope_5day', u'High_slope_8day',
            u'High_slope_10day', u'High_slope_12day', u'High_slope_15day', u'High_slope_20day', u'High_slope_25day', 
            u'High_slope_30day', u'Pips',  u'Volume', u'Active Hour', u'Active Hour Volume']

label = [u'Prediction_action']

In [9]:
model_gbm = h2o.gbm(
               y=  df_eur_usd_h2o_train[label],
    validation_y = df_eur_usd_h2o_valid[label],
               x = df_eur_usd_h2o_train[features],
    validation_x = df_eur_usd_h2o_valid[features],
    balance_classes=True,
    ntrees         =50,
    max_depth      =5,
    distribution   ="bernoulli",
    learn_rate     =0.1
)


gbm Model Build Progress: [##################################################] 100%

In [10]:
train_metrics_gbm = model_gbm.model_performance(df_eur_usd_h2o_train)
train_metrics_gbm.show()


ModelMetricsBinomial: gbm
** Reported on test data. **

MSE: 0.130085047517
R^2: 0.47965736504
LogLoss: 0.420820522553
AUC: 0.916562817974
Gini: 0.833125635948

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.473003559639:

buy sell Error Rate
buy 1114.0 267.0 0.1933 (267.0/1381.0)
sell 194.0 1193.0 0.1399 (194.0/1387.0)
Total 1308.0 1460.0 0.3332 (0.3332/2768.0)
Maximum Metrics:

metric threshold value idx
max f1 0.473003559639 0.838075166842 209.0
max f2 0.286754346608 0.899973607812 293.0
max f0point5 0.61918496935 0.852426483854 144.0
max accuracy 0.519411358621 0.834537572254 189.0
max precision 0.940437478446 1.0 0.0
max absolute_MCC 0.519411358621 0.669650924384 189.0
max min_per_class_accuracy 0.503846246777 0.830569574621 196.0

In [11]:
val_metrics_gbm = model_gbm.model_performance(df_eur_usd_h2o_valid)
val_metrics_gbm.show()


ModelMetricsBinomial: gbm
** Reported on test data. **

MSE: 0.197227991945
R^2: 0.211086012602
LogLoss: 0.578944802454
AUC: 0.765375399361
Gini: 0.530750798722

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.363500559137:

buy sell Error Rate
buy 158.0 155.0 0.4952 (155.0/313.0)
sell 42.0 270.0 0.1346 (42.0/312.0)
Total 200.0 425.0 0.6298 (0.6298/625.0)
Maximum Metrics:

metric threshold value idx
max f1 0.363500559137 0.732700135685 267.0
max f2 0.107144016006 0.837359098229 390.0
max f0point5 0.711954597576 0.701844262295 99.0
max accuracy 0.45230256254 0.7024 220.0
max precision 0.940130731529 1.0 0.0
max absolute_MCC 0.45230256254 0.409805455597 220.0
max min_per_class_accuracy 0.525198772636 0.690095846645 189.0

In [12]:
predict_df_eur_usd_h2o_valid = model_gbm.predict(df_eur_usd_h2o_valid[features])

In [13]:
predict_df_eur_usd_h2o_valid.show()


First 10 rows and first 3 columns: 
predict buy sell
sell 0.4095038217375754 0.5904961782624246
sell 0.23823921110682197 0.7617607888931779
sell 0.5079194988403115 0.4920805011596885
buy 0.6779709312067647 0.3220290687932353
sell 0.32849428427209526 0.6715057157279049
sell 0.17971226450357558 0.8202877354964244
buy 0.8325757524150315 0.16742424758496854
sell 0.30556808470912017 0.6944319152908798
sell 0.3708645111594556 0.6291354888405444
sell 0.12573948636417556 0.8742605136358245

In [14]:
predict_df_eur_usd_valid = predict_df_eur_usd_h2o_valid.as_data_frame()
df_eur_usd_valid = df_eur_usd_h2o_valid.as_data_frame()

In [15]:
predict_df_eur_usd_valid[:5]


Out[15]:
predict buy sell
0 sell 0.409504 0.590496
1 sell 0.238239 0.761761
2 sell 0.507919 0.492081
3 buy 0.677971 0.322029
4 sell 0.328494 0.671506

In [16]:
df_eur_usd_valid[:5]


Out[16]:
Date Currency_pair Prediction_action Day Open Day Close Day High Day Low Day Average Momentum_3day Momentum_4day ... High_slope_12day High_slope_15day High_slope_20day High_slope_25day High_slope_30day Pips Prediction_Pips Volume Active Hour Active Hour Volume
0 1104652800000 EUR_USD sell 1.35600 1.35795 1.35795 1.35410 1.35675 0.00000 0.00000 ... 0.0000 0.0000 0.0000 0.0000 0.0000 0.0 -78.5 901 23 468
1 1104825600000 EUR_USD sell 1.34680 1.32820 1.34960 1.32488 1.33562 0.00000 0.00000 ... -2.7833 -2.7833 -2.7833 -2.7833 -2.7833 -132.9 -95.1 42391 14 3162
2 1106467200000 EUR_USD buy 1.30420 1.30400 1.30540 1.30370 1.30437 0.00390 0.00269 ... -0.1538 -1.5547 -2.7658 -2.7658 -2.7658 44.7 19.4 740 20 228
3 1107244800000 EUR_USD buy 1.30488 1.30458 1.30668 1.30014 1.30303 0.00087 0.00057 ... -0.0642 -0.9019 -0.0345 -1.9798 -1.8991 3.8 26.9 35494 15 2920
4 1109145600000 EUR_USD buy 1.32597 1.32010 1.32732 1.31859 1.32179 0.01330 0.01290 ... 3.5408 2.4978 0.9829 0.6312 0.8053 11.6 12.2 34572 14 2745

5 rows × 72 columns


In [17]:
result = df_eur_usd_valid.join(predict_df_eur_usd_valid)

In [18]:
result_col = ['Prediction_action','predict','buy','sell','Prediction_Pips']
result[result_col][:5]


Out[18]:
Prediction_action predict buy sell Prediction_Pips
0 sell sell 0.409504 0.590496 -78.5
1 sell sell 0.238239 0.761761 -95.1
2 buy sell 0.507919 0.492081 19.4
3 buy buy 0.677971 0.322029 26.9
4 buy sell 0.328494 0.671506 12.2

In [19]:
wrong_result = result[result['Prediction_action'] != result['predict']]

In [20]:
#Possbile loss in pips
sum(abs(wrong_result[abs(wrong_result['buy'] - wrong_result['sell']) >0.25]['Prediction_Pips']))


Out[20]:
3627.2999999999984

In [21]:
#possible profit in pips
correct_result = result[result['Prediction_action'] == result['predict']]
sum(abs(correct_result['Prediction_Pips']))


Out[21]:
20550.199999999986

In [ ]: