In [ ]:
def trial(df_train, test_data):
"""
Test 1: 1s followed by 3s
"""
my_test_data = test_data.drop(['avg_stand'], axis=1)
y = df_train['state'].values
X = df_train.drop(['avg_stand', 'stand', 'state', 'index'], axis=1)
if X.isnull().values.any() == False:
rf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
max_depth=None, max_features='auto', max_leaf_nodes=None,
min_samples_leaf=8, min_samples_split=4,
min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
oob_score=False, random_state=None, verbose=0,
warm_start=False)
X = polynomial_features.fit_transform(X)
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.1)
else:
print "Found NaN values"
rf.fit(X_train, y_train)
p_test_data = polynomial_features.fit_transform(my_test_data)
rf_pred2 = rf.predict(p_test_data)
print rf_pred2
test_data['state'] = rf_pred2
final_prediction = convert_to_words(rf_pred2)
print_full(final_prediction)
get_position_stats(final_prediction)
return test_data
def trial_standup(df_train, test_data):
"""
Test 1: 1s followed by 3s
"""
y = df_train['avg_stand'].values
X = df_train.drop(['avg_stand', 'stand', 'state', 'index'], axis=1)
if X.isnull().values.any() == False:
rf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
max_depth=None, max_features='auto', max_leaf_nodes=None,
min_samples_leaf=8, min_samples_split=4,
min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
oob_score=False, random_state=None, verbose=0,
warm_start=False)
X = polynomial_features.fit_transform(X)
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.1)
else:
print "Found NaN values"
rf.fit(X_train, y_train)
p_test_data = polynomial_features.fit_transform(test_data)
print p_test_data
rf_pred2 = rf.predict(p_test_data)
print rf_pred2
# Now we have the estimated stand_up values, we use them to create a new feature
# in the original df
# rf_pred3 = rf_pred2.astype(int)
test_data['avg_stand'] = rf_pred2
return test_data