In [1]:
#----------------------------------------------------------------------
# Try to slice by using != factor_level
#----------------------------------------------------------------------

In [2]:
import h2o

In [3]:
h2o.init()


H2O cluster uptime: 14 minutes 30 seconds 918 milliseconds
H2O cluster version: 3.5.0.99999
H2O cluster name: ece
H2O cluster total nodes: 1
H2O cluster total memory: 10.67 GB
H2O cluster total cores: 8
H2O cluster allowed cores: 8
H2O cluster healthy: True
H2O Connection ip: 127.0.0.1
H2O Connection port: 54321

In [4]:
from h2o.utils.shared_utils import _locate # private function. used to find files within h2o git project directory.

air = h2o.import_file(path=_locate("smalldata/airlines/allyears2k_headers.zip"))


Parse Progress: [##################################################] 100%
Imported /Users/ece/0xdata/h2o-dev/smalldata/airlines/allyears2k_headers.zip. Parsed 43,978 rows and 31 cols

In [5]:
rows, cols = air.dim
print([rows, cols])


[43978, 31]

In [6]:
#
# Select all flights not departing from SFO
#
not_sfo = air[air["Origin"] != "SFO"]
sfo = air[air["Origin"] == "SFO"]
no_rows, no_cols = not_sfo.dim
yes_rows, yes_cols = sfo.dim
print("no_rows: {0}".format(no_rows))
print("yes_rows: {0}".format(yes_rows))
print("no_cols: {0}".format(no_cols))
print("yes_cols: {0}".format(yes_cols))


no_rows: 42434
yes_rows: 1544
no_cols: 31
yes_cols: 31