In [1]:
import h2o
import os.path
from collections import OrderedDict
from builtins import range
from h2o.estimators.deepwater import H2ODeepWaterEstimator
from h2o.grid.grid_search import H2OGridSearch
h2o.init(nthreads=-1)

PATH=os.path.expanduser("~/h2o-3/")


Checking whether there is an H2O instance running at http://localhost:54321. connected.
H2O cluster uptime: 57 mins 57 secs
H2O cluster version: 3.11.0.99999
H2O cluster version age: 20 hours and 30 minutes
H2O cluster name: arno
H2O cluster total nodes: 1
H2O cluster free memory: 13.57 Gb
H2O cluster total cores: 12
H2O cluster allowed cores: 12
H2O cluster status: locked, healthy
H2O connection url: http://localhost:54321
H2O connection proxy: None
Python version: 2.7.12 final

In [7]:
!nvidia-smi


Sun Oct 23 23:16:00 2016       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 367.44                 Driver Version: 367.44                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  GeForce GTX 980 Ti  Off  | 0000:01:00.0      On |                  N/A |
|  9%   62C    P0    74W / 275W |    854MiB /  6076MiB |      2%      Default |
+-------------------------------+----------------------+----------------------+
|   1  TITAN X (Pascal)    Off  | 0000:02:00.0     Off |                  N/A |
| 27%   48C    P2    53W / 250W |    456MiB / 12189MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID  Type  Process name                               Usage      |
|=============================================================================|
|    0      1642    G   /usr/lib/xorg/Xorg                             588MiB |
|    0      2590    G   compiz                                         183MiB |
|    0     10779    G   ...ivePortalInterstitial/Enabled/ChildAccoun    78MiB |
|    1     13724    C   /usr/lib/jvm/java-8-oracle/bin/java            453MiB |
+-----------------------------------------------------------------------------+

In [2]:
train = h2o.import_file(PATH + "smalldata/iris/iris_wheader.csv")
predictors = list(range(0,4))
response_col = 4


Parse progress: |█████████████████████████████████████████████████████████████████████████████| 100%

In [3]:
hyper_parameters = {
    'hidden' : [[20,20],[50,50,50],[200,200],[50,50,50,50,50]],
    'activation' : ["tanh","rectifier"],
    'learning_rate' : [lr/1e3 for lr in range(1,10)] 
}

parameters = {
    'seed' : 42,
    'epochs' : 500,
    'nfolds' : 3,
    'stopping_rounds' : 3,          ## enable early stopping of each model in the hyperparameter search
    'stopping_metric' : "logloss",
    'stopping_tolerance' : 1e-3     ## stop once validation logloss of the cv models doesn't improve enough
}

search_criteria = {
    'strategy': "RandomDiscrete",
    'max_runtime_secs': 30,         ## limit the runtime to 30 seconds
    'max_models': 100,              ## build no more than 100 models
    'seed' : 42,
    'stopping_rounds' : 5,          ## enable early stopping of the overall leaderboard
    'stopping_metric' : "logloss",
    'stopping_tolerance': 1e-4
}  
print(hyper_parameters)


{'learning_rate': [0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009], 'hidden': [[20, 20], [50, 50, 50], [200, 200], [50, 50, 50, 50, 50]], 'activation': ['tanh', 'rectifier']}

In [4]:
gs = H2OGridSearch(H2ODeepWaterEstimator,
                   hyper_params=hyper_parameters,
                   search_criteria=search_criteria)
gs.train(x=predictors, y=response_col, training_frame=train, **parameters)


deepwater Grid Build progress: |██████████████████████████████████████████████████████████████| 100%

In [5]:
gs.get_grid("logloss")


    activation                hidden learning_rate  \
0         Tanh              [20, 20]         0.009   
1         Tanh              [20, 20]         0.007   
2         Tanh          [50, 50, 50]         0.004   
3         Tanh            [200, 200]         0.006   
4         Tanh  [50, 50, 50, 50, 50]         0.003   
5         Tanh            [200, 200]         0.002   
6    Rectifier            [200, 200]         0.001   
7    Rectifier            [200, 200]         0.004   
8    Rectifier              [20, 20]         0.001   

                                                               model_ids  \
0  Grid_DeepWater_iris_wheader4.hex_model_python_1477286246751_9_model_3   
1  Grid_DeepWater_iris_wheader4.hex_model_python_1477286246751_9_model_4   
2  Grid_DeepWater_iris_wheader4.hex_model_python_1477286246751_9_model_1   
3  Grid_DeepWater_iris_wheader4.hex_model_python_1477286246751_9_model_2   
4  Grid_DeepWater_iris_wheader4.hex_model_python_1477286246751_9_model_7   
5  Grid_DeepWater_iris_wheader4.hex_model_python_1477286246751_9_model_6   
6  Grid_DeepWater_iris_wheader4.hex_model_python_1477286246751_9_model_0   
7  Grid_DeepWater_iris_wheader4.hex_model_python_1477286246751_9_model_5   
8  Grid_DeepWater_iris_wheader4.hex_model_python_1477286246751_9_model_8   

              logloss  
0   0.174867823409404  
1  0.2548120069902128  
2  0.2847309554355027  
3  0.5744841418551249  
4  0.6931066094644429  
5  0.6965300796568423  
6  1.7458040234667165  
7   2.747305024267217  
8  6.0184638863365345  
Out[5]: