In [ ]:
import h2o
import numpy as np
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt

In [ ]:
h2o.init()

In [ ]:
# Import walking gait data
gait = h2o.upload_file(path=h2o.locate("smalldata/glrm_test/subject01_walk1.csv"))
gait.describe()

In [ ]:
# Basic GLRM using quadratic loss and no regularization (PCA)
model = h2o.glrm(x=gait[1:], k=5, init="PlusPlus", loss="Quadratic", regularization_x="None", regularization_y="None", max_iterations=1000)
model.show()

In [ ]:
# Archetype to feature mapping (Y)
gait_y = model._model_json["output"]["archetypes"]
print gait_y

gait_y_np = np.array(model.archetypes())
feat_cols = range(2, gait_y_np.shape[1], 3)
num_feat = len(feat_cols)

# Plot first archetype on z-coordinate features
x_pts = range(1, num_feat+1)
y_pts = gait_y_np[0, feat_cols]
plt.plot(x_pts, y_pts, 'bo')
plt.plot(x_pts, [0]*num_feat, 'k--')   # Dashed line at zero

# Add feature labels to each point
feat_names = gait_y.col_header[1:]
for i in xrange(num_feat):
    plt.annotate(feat_names[i], xy = [x_pts[i], y_pts[i]])
plt.title("First Archetype's Z-Coordinate Feature Weights")
plt.xlabel("Feature")
plt.ylabel("Archetypal Weight")
plt.show()

In [ ]:
# Projection into archetype space (X)
x_key = model._model_json["output"]["loading_key"]["name"]
gait_x = h2o.get_frame(x_key)
gait_x.show()

time_np = np.array(h2o.as_list(gait["Time"]))
gait_x_np = np.array(h2o.as_list(gait_x))

# Plot archetypes over time
lines = []
for i in xrange(gait_x_np.shape[1]):
    lines += plt.plot(time_np, gait_x_np[:,i], '-')
plt.title("Archetypes over Time")
plt.xlabel("Time")
plt.ylabel("Archetypal Projection")
plt.legend(lines, gait_x.col_names)
plt.show()

In [ ]:
# Reconstruct data from matrix product XY
pred = model.predict(gait)
pred.head()

In [ ]:
# Plot original and reconstructed L.Acromium.X over time
# lacro_np = np.array(gait["L.Acromium.X"])
# lacro_pred_np = np.array(pred["reconstr_L.Acromium.X"])
# line_orig = plt.plot(time_np, lacro_np, '-')
# line_imp = plt.plot(time_np, lacro_pred_np, '-')

# plt.title("Position of Left Acromium over Time")
# plt.xlabel("Time")
# plt.ylabel("X-Coordinate of Left Acromium")
# plt.legend([line_orig, line_imp], ["Original", "Imputed"])
# plt.show()

In [ ]:
# Import walking gait data with missing values
gait_miss = h2o.upload_file(h2o.locate("smalldata/glrm_test/subject01_walk1_miss15.csv"))
gait_miss.describe()

In [ ]:
# Basic GLRM using quadratic loss and no regularization
model2 = h2o.glrm(x=gait_miss[1:], validation_x=gait[1:], k=15, init="PlusPlus", loss="Quadratic", regularization_x="None", regularization_y="None", max_iterations=500, min_step_size=1e-7)
model2.show()

In [ ]:
# Impute missing data from X and Y
pred2 = model2.predict(gait_miss)
pred2.head()

In [ ]:
# Plot original and imputed L.Acromium.X over time
lacro_np2 = np.array(h2o.as_list(gait["L.Acromium.X"]))
lacro_pred_np2 = np.array(h2o.as_list(pred2["reconstr_L.Acromium.X"]))
plt.plot(time_np, lacro_np2, 'b-')
plt.plot(time_np, lacro_pred_np2, 'g-')

plt.title("Position of Left Acromium over Time")
plt.xlabel("Time")
plt.ylabel("X-Coordinate of Left Acromium")
blue_patch = mpatches.Patch(color = 'blue', label = 'Original')
green_patch = mpatches.Patch(color = 'green', label='Imputed')
plt.legend([blue_patch, green_patch], ["Original", "Imputed"])
plt.show()

In [ ]: