In [1]:
import h2o
In [2]:
h2o.init()
In [3]:
air = h2o.upload_file(h2o.locate("smalldata/airlines/allyears2k_headers.zip"))
In [4]:
air.dim
Out[4]:
In [5]:
numNAs = air["DepTime"].isna().sum()
print numNAs
In [6]:
DepTime_mean = air["DepTime"].mean(na_rm=True)
print DepTime_mean
In [7]:
air.impute("DepTime", method = "median", combine_method="low")
numNAs = air["DepTime"].isna().sum()
print numNAs
In [8]:
air = h2o.upload_file(h2o.locate("smalldata/airlines/allyears2k_headers.zip"))
In [9]:
air.impute("DepTime", method = "mean", by = ["Origin", "Distance"]).show()
In [10]:
air = h2o.upload_file(h2o.locate("smalldata/airlines/allyears2k_headers.zip"))
In [11]:
air.impute("TailNum", method = "mode").show()
In [12]:
air = h2o.upload_file(h2o.locate("smalldata/airlines/allyears2k_headers.zip"))
In [13]:
air.impute("TailNum", method = "mode", by=["Month", "Year"]).show()