In [39]:
from os import path
import os
import pandas as pd

pd.set_option('display.max_columns', None)

In [64]:
# 100 trees with Aug-26 features:
# df = pd.read_csv('opt-aug-26/hyperopt-2015-08-26T14:15:39.704429.csv')

# 1000 trees with Aug-26 features:
df = pd.read_csv('opt-aug-26/hyperopt-2015-08-27T01:32:24.393333.csv')

# var trees with Aug-26 features:
# df = pd.read_csv('opt-aug-26/hyperopt-2015-08-27T21:23:25.051104.csv')

# 1000 trees with Aug-27 features:
# df = pd.read_csv('ec2/hyperopt-2015-08-28T06:00:21.522118.csv')


len(df)


Out[64]:
200

In [73]:
dirname = 'opt-aug-27-layer2'

df = pd.DataFrame()
for filename in os.listdir(dirname):
    if filename.endswith('.csv'):
        path = os.path.join(dirname, filename)
        tmp_df = pd.read_csv(path)
        df = df.append(tmp_df)

len(df)


Out[73]:
215

In [74]:
df.sort('loss', inplace=True)
df[:10]


Out[74]:
colsample_bytree eta finish_time gamma loss loss_variance max_depth min_child_weight num_rounds objective silent status subsample test_rmsle_avg test_rmsle_std test_wall_time_avg test_wall_time_std train_rmsle_avg train_rmsle_std train_wall_time_avg train_wall_time_std
0 0.6 0.01 2015-08-28T21:48:00.233367 0 0.341850 0.000349 8 6 10000 reg:linear 1 ok 0.7 0.341850 0.018689 0.265725 0.011343 0.055346 0.001680 45.819323 0.753687
0 0.6 0.01 2015-08-28T21:56:27.033100 0 0.342765 0.000400 8 6 10000 reg:linear 1 ok 0.6 0.342765 0.019989 0.270967 0.024388 0.057522 0.001454 48.027613 0.517519
0 0.6 0.01 2015-08-28T22:04:46.025134 0 0.343033 0.000419 8 6 10000 reg:linear 1 ok 0.5 0.343033 0.020478 0.271046 0.017145 0.060748 0.001809 47.014007 0.561492
0 0.6 0.01 2015-08-28T22:46:16.742786 0 0.343751 0.000350 9 6 10000 reg:linear 1 ok 0.7 0.343751 0.018719 0.384650 0.045097 0.042072 0.001477 52.202696 0.475943
0 0.6 0.02 2015-08-28T21:13:21.317481 0 0.344216 0.000318 8 6 10000 reg:linear 1 ok 0.7 0.344216 0.017841 0.233732 0.010945 0.033646 0.001303 45.741396 0.243314
0 0.6 0.01 2015-08-28T22:21:47.026220 0 0.344307 0.000365 8 6 10000 reg:linear 1 ok 0.4 0.344307 0.019094 0.272304 0.025813 0.065740 0.001593 42.879817 0.520202
0 0.6 0.01 2015-08-28T22:56:37.039359 0 0.344554 0.000348 10 8 10000 reg:linear 1 ok 0.7 0.344554 0.018664 0.434465 0.027190 0.037309 0.001418 57.568272 0.975558
0 0.6 0.01 2015-08-28T23:09:09.058688 0 0.345173 0.000377 10 8 10000 reg:linear 1 ok 0.5 0.345173 0.019420 0.454519 0.032030 0.042504 0.001621 58.629495 0.508990
0 0.6 0.02 2015-08-28T22:13:44.242073 0 0.345190 0.000372 8 6 10000 reg:linear 1 ok 0.5 0.345190 0.019287 0.246473 0.015126 0.037551 0.001499 46.902920 0.994743
71 0.6 0.03 2015-08-28T19:11:53.076045 0 0.345264 0.000390 7 3 1000 reg:linear 1 ok 0.7 0.345264 0.019752 0.015605 0.000472 0.147586 0.001838 4.156122 0.059006

In [60]:
subset = df[:20]

In [61]:
subset.max_depth.value_counts()


Out[61]:
10    7
11    6
12    3
9     3
8     1
dtype: int64

In [68]:
df[df.num_rounds == 1000][:10]


Out[68]:
colsample_bytree eta finish_time gamma loss loss_variance max_depth min_child_weight num_rounds objective silent status subsample test_rmsle_avg test_rmsle_std test_wall_time_avg test_wall_time_std train_rmsle_avg train_rmsle_std train_wall_time_avg train_wall_time_std
132 0.5 0.05 2015-08-28T02:11:34.280924 0 0.216013 0.000220 10 8 1000 reg:linear 1 ok 1.0 0.216013 0.014819 0.065007 0.004652 0.074987 0.001584 11.506769 0.118684
196 0.5 0.05 2015-08-28T04:04:38.542555 0 0.216278 0.000248 9 11 1000 reg:linear 1 ok 0.8 0.216278 0.015745 0.056121 0.001194 0.086882 0.000917 11.505737 0.061602
189 0.5 0.05 2015-08-28T03:51:45.124211 0 0.216278 0.000248 9 11 1000 reg:linear 1 ok 0.8 0.216278 0.015745 0.058815 0.003881 0.086882 0.000917 11.860194 0.076347
58 0.5 0.03 2015-08-27T23:36:36.610086 0 0.216593 0.000213 11 8 1000 reg:linear 1 ok 0.8 0.216593 0.014589 0.076059 0.007537 0.076943 0.001321 14.633820 0.184894
192 0.5 0.05 2015-08-28T03:57:35.088035 0 0.216607 0.000232 9 12 1000 reg:linear 1 ok 0.8 0.216607 0.015236 0.057141 0.002076 0.088672 0.001205 11.896833 0.111567
100 0.5 0.03 2015-08-28T01:13:57.900256 0 0.216610 0.000236 10 7 1000 reg:linear 1 ok 0.7 0.216610 0.015376 0.071279 0.014609 0.087027 0.001466 13.900144 0.085076
101 0.5 0.03 2015-08-28T01:16:22.366081 0 0.216610 0.000236 10 7 1000 reg:linear 1 ok 0.7 0.216610 0.015376 0.067546 0.002541 0.087027 0.001466 13.843891 0.076682
155 0.6 0.02 2015-08-27T10:23:50.581279 0 0.216727 0.000186 20 18 1000 reg:linear 1 ok 0.8 0.216727 0.013627 0.130519 0.011778 0.070265 0.000735 29.059517 0.144029
95 0.6 0.03 2015-08-28T01:01:54.238169 0 0.216756 0.000202 11 4 1000 reg:linear 1 ok 0.6 0.216756 0.014204 0.079268 0.001851 0.066980 0.000738 18.133134 0.245733
152 0.6 0.03 2015-08-27T10:09:52.757867 0 0.216787 0.000181 17 15 1000 reg:linear 1 ok 0.8 0.216787 0.013469 0.110305 0.005467 0.057522 0.000639 25.438890 0.616696

In [ ]: