In [1]:
import h2o

In [2]:
# Connect to a cluster
h2o.init()


H2O cluster uptime: 17 seconds 548 milliseconds
H2O cluster version: 3.1.0.99999
H2O cluster name: anqi_fu
H2O cluster total nodes: 1
H2O cluster total memory: 1.78 GB
H2O cluster total cores: 8
H2O cluster allowed cores: 8
H2O cluster healthy: True
H2O Connection ip: 127.0.0.1
H2O Connection port: 54321

In [3]:
weather_path = h2o.locate("smalldata/chicago/chicagoAllWeather.csv")
census_path = h2o.locate("smalldata/chicago/chicagoCensus.csv")
crimes_path = h2o.locate("smalldata/chicago/chicagoCrimes10k.csv.zip")

print "Import and Parse weather data"
weather = h2o.import_frame(path=weather_path)
weather.drop("date")
weather.describe()

print "Import and Parse census data"
census = h2o.import_frame(path=census_path)
census.describe()

print "Import and Parse crimes data"
crimes = h2o.import_frame(path=crimes_path)
crimes.describe()


Import and Parse weather data

Parse Progress: [##################################################] 100%
Imported  /Users/anqi_fu/Documents/workspace/h2o-3/smalldata/chicago/chicagoAllWeather.csv . Parsed 5,162 rows and 7 cols
Rows: 5,162 Cols: 7

Chunk compression summary:

chunk_type chunk_name count count_percentage size size_percentage
C1N 1-Byte Integers (w/o NAs) 2 28.57143 10.2 KB 11.221008
C1S 1-Byte Fractions 4 57.14286 20.5 KB 22.510675
CStr String 1 14.285715 60.3 KB 66.26832
Frame distribution summary:

size number_of_rows number_of_chunks_per_column number_of_chunks
172.16.2.17:54321 91.0 KB 5162.0 1.0 7.0
mean 91.0 KB 5162.0 1.0 7.0
min 91.0 KB 5162.0 1.0 7.0
max 91.0 KB 5162.0 1.0 7.0
stddev 0 B 0.0 0.0 0.0
total 91.0 KB 5162.0 1.0 7.0
Column-by-Column Summary:

date month day year maxTemp meanTemp minTemp
type string int int int int int int
mins NaN 1.0 1.0 2001.0 -2.0 -9.0 -18.0
maxs NaN 12.0 31.0 2015.0 103.0 93.0 82.0
sigma NaN 3.46905171694 8.79895173997 4.0773409057 21.4829777237 19.9302399266 19.0207297123
zero_count 0 0 0 0 0 2 16
missing_count 0 0 0 0 13 13 13
Import and Parse census data

Parse Progress: [##################################################] 100%
Imported  /Users/anqi_fu/Documents/workspace/h2o-3/smalldata/chicago/chicagoCensus.csv . Parsed 79 rows and 9 cols
Rows: 79 Cols: 9

Chunk compression summary:

chunk_type chunk_name count count_percentage size size_percentage
C1 1-Byte Integers 2 22.222223 294 B 9.312638
C1S 1-Byte Fractions 1 11.111112 163 B 5.1631293
C2S 2-Byte Fractions 4 44.444447 968 B 30.662022
C4 4-Byte Integers 1 11.111112 384 B 12.163446
CStr String 1 11.111112 1.3 KB 42.698765
Frame distribution summary:

size number_of_rows number_of_chunks_per_column number_of_chunks
172.16.2.17:54321 3.1 KB 79.0 1.0 9.0
mean 3.1 KB 79.0 1.0 9.0
min 3.1 KB 79.0 1.0 9.0
max 3.1 KB 79.0 1.0 9.0
stddev 0 B 0.0 0.0 0.0
total 3.1 KB 79.0 1.0 9.0
Column-by-Column Summary:

Community Area Number COMMUNITY AREA NAME PERCENT OF HOUSING CROWDED PERCENT HOUSEHOLDS BELOW POVERTY PERCENT AGED 16 UNEMPLOYED PERCENT AGED 25 WITHOUT HIGH SCHOOL DIPLOMA PERCENT AGED UNDER 18 OR OVER 64 PER CAPITA INCOME HARDSHIP INDEX
type int string real real real real real int int
mins 1.0 NaN 0.3 3.3 4.7 2.5 13.5 8201.0 1.0
maxs 77.0 NaN 15.8 56.5 35.9 54.8 51.5 88669.0 98.0
sigma 22.3718573212 NaN 3.65898144135 11.457230913 7.49949670861 11.7465143511 7.28442108494 15196.4055413 28.6905556516
zero_count 0 0 0 0 0 0 0 0 0
missing_count 2 0 1 1 1 1 1 1 2
Import and Parse crimes data

Parse Progress: [##################################################] 100%
Imported  /Users/anqi_fu/Documents/workspace/h2o-3/smalldata/chicago/chicagoCrimes10k.csv.zip . Parsed 9,999 rows and 22 cols
Rows: 9,999 Cols: 22

Chunk compression summary:

chunk_type chunk_name count count_percentage size size_percentage
C0L Constant Integers 4 4.5454545 320 B 0.03695244
C1 1-Byte Integers 32 36.363636 80.2 KB 9.488462
C1N 1-Byte Integers (w/o NAs) 8 9.090909 20.1 KB 2.3721156
C2 2-Byte Integers 16 18.181818 79.2 KB 9.362824
C4 4-Byte Integers 12 13.636364 118.0 KB 13.950008
CStr String 8 9.090909 391.1 KB 46.252445
C8D 64-bit Reals 8 9.090909 156.8 KB 18.537191
Frame distribution summary:

size number_of_rows number_of_chunks_per_column number_of_chunks
172.16.2.17:54321 845.7 KB 9999.0 4.0 88.0
mean 845.7 KB 9999.0 4.0 88.0
min 845.7 KB 9999.0 4.0 88.0
max 845.7 KB 9999.0 4.0 88.0
stddev 0 B 0.0 0.0 0.0
total 845.7 KB 9999.0 4.0 88.0
Column-by-Column Summary:

ID Case Number Date Block IUCR Primary Type Description Location Description Arrest Domestic Beat District Ward Community Area FBI Code X Coordinate Y Coordinate Year Updated On Latitude Longitude Location
type int string string enum int enum enum enum enum enum int int int int int int int int enum real real enum
mins 21735.0 NaN NaN 0.0 110.0 0.0 0.0 0.0 0.0 0.0 111.0 1.0 1.0 1.0 2.0 1100317.0 1814255.0 2015.0 0.0 41.64507243 -87.906463888 0.0
maxs 9962898.0 NaN NaN 6517.0 5131.0 26.0 198.0 90.0 1.0 1.0 2535.0 25.0 50.0 77.0 26.0 1205069.0 1951533.0 2015.0 32.0 42.022646183 -87.524773286 8603.0
sigma 396787.564221 NaN NaN 1915.88517194 927.751435583 9.16241735944 60.1059382029 25.5963972463 0.455083515588 0.35934414686 695.76029875 6.94547493301 13.6495661144 21.2748762223 7.57423857911 16496.4493681 31274.0163199 0.0 10.0824464345 0.0860186579359 0.0600357970653 2469.64729385
zero_count 0 0 0 3 0 11 933 19 7071 8476 0 0 0 0 0 0 0 0 603 0 0 1
missing_count 0 0 0 0 419 0 0 6 0 0 0 162 0 0 2557 162 162 0 0 162 162 162

In [4]:
def refine_date_col(data, col, pattern):
    data[col]         = data[col].as_date(pattern)
    data["Day"]       = data[col].day()
    data["Month"]     = data[col].month() + 1     # Since H2O indexes from 0
    data["Year"]      = data[col].year() + 1900   # Start of epoch is 1900
    data["WeekNum"]   = data[col].week()
    data["WeekDay"]   = data[col].dayOfWeek()
    data["HourOfDay"] = data[col].hour()
    
    data.describe()  # HACK: Force evaluation before ifelse and cut. See PUBDEV-1425.
    
    # Create weekend and season cols
    # Spring = Mar, Apr, May. Summer = Jun, Jul, Aug. Autumn = Sep, Oct. Winter = Nov, Dec, Jan, Feb.
    # data["Weekend"]   = [1 if x in ("Sun", "Sat") else 0 for x in data["WeekDay"]]
    data["Weekend"] = h2o.ifelse(data["WeekDay"] == "Sun" or data["WeekDay"] == "Sat", 1, 0)[0]
    data["Season"] = data["Month"].cut([0, 2, 5, 7, 10, 12], ["Winter", "Spring", "Summer", "Autumn", "Winter"])
    
refine_date_col(crimes, "Date", "%m/%d/%Y %I:%M:%S %p")
crimes = crimes.drop("Date")
crimes.describe()


Rows: 9,999 Cols: 27

Chunk compression summary:

chunk_type chunk_name count count_percentage size size_percentage
C0L Constant Integers 9 8.333334 720 B 0.10067465
C1 1-Byte Integers 32 29.62963 80.2 KB 11.489216
C1N 1-Byte Integers (w/o NAs) 23 21.296297 57.9 KB 8.29671
C2 2-Byte Integers 16 14.814815 79.2 KB 11.337085
C4 4-Byte Integers 12 11.111112 118.0 KB 16.891531
C8 64-bit Integers 4 3.7037036 78.4 KB 11.222987
CStr String 4 3.7037036 127.2 KB 18.215822
C8D 64-bit Reals 8 7.4074073 156.8 KB 22.445974
Frame distribution summary:

size number_of_rows number_of_chunks_per_column number_of_chunks
172.16.2.17:54321 698.4 KB 9999.0 4.0 108.0
mean 698.4 KB 9999.0 4.0 108.0
min 698.4 KB 9999.0 4.0 108.0
max 698.4 KB 9999.0 4.0 108.0
stddev 0 B 0.0 0.0 0.0
total 698.4 KB 9999.0 4.0 108.0
Column-by-Column Summary:

ID Case Number Date Block IUCR Primary Type Description Location Description Arrest Domestic Beat District Ward Community Area FBI Code X Coordinate Y Coordinate Year Updated On Latitude Longitude Location Day Month WeekNum WeekDay HourOfDay
type int string int enum int enum enum enum enum enum int int int int int int int int enum real real enum int int int enum int
mins 21735.0 NaN 1.42203063e+12 0.0 110.0 0.0 0.0 0.0 0.0 0.0 111.0 1.0 1.0 1.0 2.0 1100317.0 1814255.0 3915.0 0.0 41.64507243 -87.906463888 0.0 1.0 2.0 4.0 0.0 0.0
maxs 9962898.0 NaN 1.42346782e+12 6517.0 5131.0 26.0 198.0 90.0 1.0 1.0 2535.0 25.0 50.0 77.0 26.0 1205069.0 1951533.0 3915.0 32.0 42.022646183 -87.524773286 8603.0 31.0 3.0 6.0 6.0 23.0
sigma 396787.564221 NaN 433879245.188 1915.88517194 927.751435583 9.16241735944 60.1059382029 25.5963972463 0.455083515588 0.35934414686 695.76029875 6.94547493301 13.6495661144 21.2748762223 7.57423857911 16496.4493681 31274.0163199 0.0 10.0824464345 0.0860186579359 0.0600357970653 2469.64729385 11.1801043358 0.493492406787 0.738929830409 1.93284056432 6.47321735807
zero_count 0 0 0 3 0 11 933 19 7071 8476 0 0 0 0 0 0 0 0 603 0 0 1 0 0 0 1038 374
missing_count 0 0 0 0 419 0 0 6 0 0 0 162 0 0 2557 162 162 0 0 162 162 162 0 0 0 0 0
Rows: 9,999 Cols: 28

Chunk compression summary:

chunk_type chunk_name count count_percentage size size_percentage
C0L Constant Integers 13 11.607142 1.0 KB 0.16332634
CBS Bits 4 3.5714288 1.5 KB 0.2404352
C1 1-Byte Integers 32 28.57143 80.2 KB 12.9040365
C1N 1-Byte Integers (w/o NAs) 23 20.535715 57.9 KB 9.318395
C2 2-Byte Integers 16 14.285715 79.2 KB 12.733171
C4 4-Byte Integers 12 10.714286 118.0 KB 18.97161
CStr String 4 3.5714288 127.2 KB 20.458979
C8D 64-bit Reals 8 7.1428576 156.8 KB 25.210047
Frame distribution summary:

size number_of_rows number_of_chunks_per_column number_of_chunks
172.16.2.17:54321 621.8 KB 9999.0 4.0 112.0
mean 621.8 KB 9999.0 4.0 112.0
min 621.8 KB 9999.0 4.0 112.0
max 621.8 KB 9999.0 4.0 112.0
stddev 0 B 0.0 0.0 0.0
total 621.8 KB 9999.0 4.0 112.0
Column-by-Column Summary:

ID Case Number Block IUCR Primary Type Description Location Description Arrest Domestic Beat District Ward Community Area FBI Code X Coordinate Y Coordinate Year Updated On Latitude Longitude Location Day Month WeekNum WeekDay HourOfDay Weekend Season
type int string enum int enum enum enum enum enum int int int int int int int int enum real real enum int int int enum int int enum
mins 21735.0 NaN 0.0 110.0 0.0 0.0 0.0 0.0 0.0 111.0 1.0 1.0 1.0 2.0 1100317.0 1814255.0 3915.0 0.0 41.64507243 -87.906463888 0.0 1.0 2.0 4.0 0.0 0.0 0.0 0.0
maxs 9962898.0 NaN 6517.0 5131.0 26.0 198.0 90.0 1.0 1.0 2535.0 25.0 50.0 77.0 26.0 1205069.0 1951533.0 3915.0 32.0 42.022646183 -87.524773286 8603.0 31.0 3.0 6.0 6.0 23.0 1.0 1.0
sigma 396787.564221 NaN 1915.88517194 927.751435583 9.16241735944 60.1059382029 25.5963972463 0.455083515588 0.35934414686 695.76029875 6.94547493301 13.6495661144 21.2748762223 7.57423857911 16496.4493681 31274.0163199 0.0 10.0824464345 0.0860186579359 0.0600357970653 2469.64729385 11.1801043358 0.493492406787 0.738929830409 1.93284056432 6.47321735807 0.365802434041 0.493492406787
zero_count 0 0 3 0 11 933 19 7071 8476 0 0 0 0 0 0 0 0 603 0 0 1 0 0 0 1038 374 8408 5805
missing_count 0 0 0 419 0 0 6 0 0 0 162 0 0 2557 162 162 0 0 162 162 162 0 0 0 0 0 0 0

In [5]:
# Merge crimes data with weather and census
census["Community Area Number"]._name = "Community Area"
weather["month"]._name = "Month"
weather["day"]  ._name = "Day"
weather["year"] ._name = "Year"
crimes.merge(census, allLeft=True, allRite=False)
crimes.merge(weather, allLeft=True, allRite=False)


---------------------------------------------------------------------------
EnvironmentError                          Traceback (most recent call last)
<ipython-input-5-e946a6af6204> in <module>()
      4 weather["day"]  ._name = "Day"
      5 weather["year"] ._name = "Year"
----> 6 crimes.merge(census, allLeft=True, allRite=False)
      7 crimes.merge(weather, allLeft=True, allRite=False)

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/frame.pyc in merge(self, other, allLeft, allRite)
   1022     expr2 = "(, "+expr+" (del %"+lkey+" #0) (del %"+rkey+" #0) )"
   1023 
-> 1024     h2o.rapids(expr2)       # merge in h2o
   1025     # Make backing H2OVecs for the remote h2o vecs
   1026     j = h2o.frame(tmp_key)  # Fetch the frame as JSON

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/h2o.pyc in rapids(expr)
    487   :return: The JSON response of the Rapids execution
    488   """
--> 489   result = H2OConnection.post_json("Rapids", ast=urllib.quote(expr), _rest_version=99)
    490   if result['error'] is not None:
    491     raise EnvironmentError("rapids expression not evaluated: {0}".format(str(result['error'])))

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc in post_json(url_suffix, file_upload_info, **kwargs)
    360     if __H2OCONN__ is None:
    361       raise ValueError("No h2o connection. Did you run `h2o.init()` ?")
--> 362     return __H2OCONN__._rest_json(url_suffix, "POST", file_upload_info, **kwargs)
    363 
    364   def _rest_json(self, url_suffix, method, file_upload_info, **kwargs):

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc in _rest_json(self, url_suffix, method, file_upload_info, **kwargs)
    363 
    364   def _rest_json(self, url_suffix, method, file_upload_info, **kwargs):
--> 365     raw_txt = self._do_raw_rest(url_suffix, method, file_upload_info, **kwargs)
    366     return self._process_tables(raw_txt.json())
    367 

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc in _do_raw_rest(self, url_suffix, method, file_upload_info, **kwargs)
    429       raise EnvironmentError(("h2o-py got an unexpected HTTP status code:\n {} {} (method = {}; url = {}). \n"+ \
    430                               "detailed error messages: {}")
--> 431                              .format(http_result.status_code,http_result.reason,method,url,detailed_error_msgs))
    432 
    433     # TODO: is.logging? -> write to logs

EnvironmentError: h2o-py got an unexpected HTTP status code:
 412 Precondition Failed (method = POST; url = http://localhost:54321/99/Rapids). 
detailed error messages: water.DException$DistributedException: from /172.16.2.17:54321; by class water.rapids.ASTMerge$MergeSet$MakeHash; class water.exceptions.H2OIllegalArgumentException: unimplemented

In [12]:
# Create test/train split
data_split = h2o.split_frame(data, ratios = [0.8,0.2])
train = data_split[1]
test  = data_split[2]

# Simple GBM - Predict Arrest
data_gbm = h2o.gbm(x              =train.drop("Arrest"),
                   y              =train     ["Arrest"],
                   validation_x   =test .drop("Arrest"),
                   validation_y   =test      ["Arrest"],
                   ntrees         =10,
                   max_depth      =6,
                   distribution   ="bernoulli")

# Simple Deep Learning
data_dl = h2o.deeplearning(x                   =train.drop("Arrest"),
                           y                   =train     ["Arrest"],
                           validation_x        =test .drop("Arrest"),
                           validation_y        =test      ["Arrest"],
                           variable_importances=True,
                           loss                ="Automatic")


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-347776b381b3> in <module>()
      1 # Create test/train split
----> 2 data_split = h2o.split_frame(data, ratios = [0.8,0.2])
      3 train = data_split[1]
      4 test  = data_split[2]
      5 

NameError: name 'data' is not defined

In [2]:
# GBM performance on train/test data
train_auc_gbm = data_gbm.model_performance(train).auc()
test_auc_gbm  = data_gbm.model_performance(test) .auc()

# Deep Learning performance on train/test data
train_auc_dl = data_dl.model_performance(train).auc()
test_auc_dl  = data_dl.model_performance(test) .auc()

# Make a pretty HTML table printout of the results
header = ["Model", "AUC Train", "AUC Test"]
table  = [
           ["GBM", train_auc_gbm, test_auc_gbm],
           ["DL ", train_auc_dl,  test_auc_dl]
         ]
h2o.H2ODisplay(table, header)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-f7c2ab3a3e26> in <module>()
      1 # GBM performance on train/test data
----> 2 train_auc_gbm = data_gbm.model_performance(train).auc()
      3 test_auc_gbm  = data_gbm.model_performance(test) .auc()
      4 
      5 # Deep Learning performance on train/test data

NameError: name 'data_gbm' is not defined

In [6]:
# Create new H2OFrame of crime observations
examples = {
            "Date":                 ["02/08/2015 11:43:58 PM", "02/08/2015 11:00:39 PM"],
            "IUCR":                 [1811, 1150],
            "Primary.Type":         ["NARCOTICS", "DECEPTIVE PRACTICE"],
            "Location.Description": ["STREET", "RESIDENCE"],
            "Domestic":             ["false", "false"],
            "Beat":                 [422, 923],
            "District":             [4, 9],
            "Ward":                 [7, 14],
            "Community.Area":       [46, 63],
            "FBI.Code":             [18, 11]
            }

crime_examples = h2o.H2OFrame(python_obj = examples)

# Refine date column and merge with census data
refine_date_col(crime_examples, "Date", "%m/%d/%Y %I:%M:%S %p")
crime_examples.drop("Date")
crime_examples.merge(census, allLeft=True, allRite=False)


Parse Progress: [##################################################] 100%
Uploaded py634b18a9-7e84-40ca-b265-b2fe43e064aa into cluster with 2 rows and 10 cols
Rows: 2 Cols: 16

Chunk compression summary:

chunk_type chunk_name count count_percentage size size_percentage
C0L Constant Integers 7 43.75 560 B 43.818466
C1N 1-Byte Integers (w/o NAs) 4 25.0 280 B 21.909233
C2 2-Byte Integers 2 12.5 144 B 11.267606
C2S 2-Byte Fractions 1 6.25 88 B 6.885759
CStr String 2 12.5 206 B 16.118937
Frame distribution summary:

size number_of_rows number_of_chunks_per_column number_of_chunks
172.16.2.17:54321 1.2 KB 2.0 1.0 16.0
mean 1.2 KB 2.0 1.0 16.0
min 1.2 KB 2.0 1.0 16.0
max 1.2 KB 2.0 1.0 16.0
stddev 0 B 0.0 0.0 0.0
total 1.2 KB 2.0 1.0 16.0
Column-by-Column Summary:

Location.Description FBI.Code Primary.Type Community.Area District Beat Domestic IUCR Date Ward Day Month Year WeekNum WeekDay HourOfDay
type string int string int int int enum int int int int int int int enum int
mins NaN 11.0 NaN 46.0 4.0 422.0 0.0 1150.0 1.423465239e+12 7.0 8.0 3.0 3915.0 6.0 6.0 23.0
maxs NaN 18.0 NaN 63.0 9.0 923.0 0.0 1811.0 1.423467838e+12 14.0 8.0 3.0 3915.0 6.0 6.0 23.0
sigma NaN 4.94974746831 NaN 12.0208152802 3.53553390593 354.260497374 0.0 467.397582364 1837770.5243 4.94974746831 0.0 0.0 0.0 0.0 0.0 0.0
zero_count 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0
missing_count 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
---------------------------------------------------------------------------
EnvironmentError                          Traceback (most recent call last)
<ipython-input-6-85bb7c75c897> in <module>()
     16 
     17 # Refine date column and merge with census data
---> 18 refine_date_col(crime_examples, "Date", "%m/%d/%Y %I:%M:%S %p")
     19 crime_examples.drop("Date")
     20 crime_examples.merge(census, allLeft=True, allRite=False)

<ipython-input-4-c2702228f9f1> in refine_date_col(data, col, pattern)
     15     # data["Weekend"] = h2o.ifelse(data["WeekDay"] in ("Sun", "Sat"), 1, 0)[0]
     16     data["Weekend"] = h2o.ifelse(data["WeekDay"] == "Sun" or data["WeekDay"] == "Sat", 1, 0)[0]
---> 17     data["Season"] = data["Month"].cut([0, 2, 5, 7, 10, 12], ["Winter", "Spring", "Summer", "Autumn", "Winter"])
     18 
     19 refine_date_col(crimes, "Date", "%m/%d/%Y %I:%M:%S %p")

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/frame.pyc in cut(self, breaks, labels, include_lowest, right, dig_lab)
   1256 
   1257     expr = "(cut '{}' {} {} {} {} #{}".format(self.key(), breaks_list, labels_list, "%TRUE" if include_lowest else "%FALSE", "%TRUE" if right else "%FALSE", dig_lab)
-> 1258     res = h2o.rapids(expr)
   1259     return H2OVec(self._name, Expr(op=res["vec_ids"][0]["name"], length=res["num_rows"]))
   1260 

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/h2o.pyc in rapids(expr)
    487   :return: The JSON response of the Rapids execution
    488   """
--> 489   result = H2OConnection.post_json("Rapids", ast=urllib.quote(expr), _rest_version=99)
    490   if result['error'] is not None:
    491     raise EnvironmentError("rapids expression not evaluated: {0}".format(str(result['error'])))

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc in post_json(url_suffix, file_upload_info, **kwargs)
    360     if __H2OCONN__ is None:
    361       raise ValueError("No h2o connection. Did you run `h2o.init()` ?")
--> 362     return __H2OCONN__._rest_json(url_suffix, "POST", file_upload_info, **kwargs)
    363 
    364   def _rest_json(self, url_suffix, method, file_upload_info, **kwargs):

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc in _rest_json(self, url_suffix, method, file_upload_info, **kwargs)
    363 
    364   def _rest_json(self, url_suffix, method, file_upload_info, **kwargs):
--> 365     raw_txt = self._do_raw_rest(url_suffix, method, file_upload_info, **kwargs)
    366     return self._process_tables(raw_txt.json())
    367 

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc in _do_raw_rest(self, url_suffix, method, file_upload_info, **kwargs)
    429       raise EnvironmentError(("h2o-py got an unexpected HTTP status code:\n {} {} (method = {}; url = {}). \n"+ \
    430                               "detailed error messages: {}")
--> 431                              .format(http_result.status_code,http_result.reason,method,url,detailed_error_msgs))
    432 
    433     # TODO: is.logging? -> write to logs

EnvironmentError: h2o-py got an unexpected HTTP status code:
 412 Precondition Failed (method = POST; url = http://localhost:54321/99/Rapids). 
detailed error messages: Data vector is constant!

In [ ]:
# Predict probability of arrest from new observations
gbm_pred = data_gbm.predict(crime_examples)
dl_pred  = data_dl .predict(crime_examples)

# TODO: Replace with a pretty HTML table
gbm_pred.describe()
dl_pred.describe()