In [39]:
from os import path
import os
import pandas as pd
pd.set_option('display.max_columns', None)
In [64]:
# 100 trees with Aug-26 features:
# df = pd.read_csv('opt-aug-26/hyperopt-2015-08-26T14:15:39.704429.csv')
# 1000 trees with Aug-26 features:
df = pd.read_csv('opt-aug-26/hyperopt-2015-08-27T01:32:24.393333.csv')
# var trees with Aug-26 features:
# df = pd.read_csv('opt-aug-26/hyperopt-2015-08-27T21:23:25.051104.csv')
# 1000 trees with Aug-27 features:
# df = pd.read_csv('ec2/hyperopt-2015-08-28T06:00:21.522118.csv')
len(df)
Out[64]:
In [73]:
dirname = 'opt-aug-27-layer2'
df = pd.DataFrame()
for filename in os.listdir(dirname):
if filename.endswith('.csv'):
path = os.path.join(dirname, filename)
tmp_df = pd.read_csv(path)
df = df.append(tmp_df)
len(df)
Out[73]:
In [74]:
df.sort('loss', inplace=True)
df[:10]
Out[74]:
In [60]:
subset = df[:20]
In [61]:
subset.max_depth.value_counts()
Out[61]:
In [68]:
df[df.num_rounds == 1000][:10]
Out[68]:
In [ ]: