In [16]:
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn import grid_search
from sklearn import metrics
from sklearn import cross_validation
from sklearn.externals import joblib
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
import operator
import itertools
import random
import os
import pickle
In [17]:
if not os.path.exists("results"):
os.makedirs("results")
In [18]:
PREPROCESSED_DIRECTORY = "E:\\eaglesense\\data\\topviewkinect\\all"
In [19]:
FEATURE_SET = "weak"
In [30]:
s1_data_path = "{root}/{tag}_s1_data.pickle".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
with open(s1_data_path, "rb") as f:
s1_data = pickle.load(f)
s2_data_path = "{root}/{tag}_s2_data.pickle".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
with open(s2_data_path, "rb") as f:
s2_data = pickle.load(f)
cs_data_path = "{root}/{tag}_cs_data.pickle".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
with open(cs_data_path, "rb") as f:
cs_data = pickle.load(f)
In [21]:
XGBOOST_SEED = 0
In [22]:
xgboost_clf = xgb.XGBClassifier(learning_rate=0.3, n_estimators=100, objective="multi:softmax", seed=XGBOOST_SEED)
In [44]:
max_depth = [5, 6, 7, 8]
gamma = [1, 2, 3]
subsample = [0.5, 1]
colsample_bytree = [0.5, 1]
colsample_bylevel = [0.5, 1]
reg_alpha = [1, 2, 3]
reg_lambda = [1, 2, 3]
xgboost_knobs = {
"max_depth": max_depth,
"gamma": gamma,
"subsample": subsample,
"colsample_bytree": colsample_bytree,
"colsample_bylevel": colsample_bylevel,
"reg_alpha": reg_alpha,
"reg_lambda": reg_lambda
}
In [45]:
num_combinations = len(list(itertools.product(max_depth, gamma, subsample, colsample_bytree, colsample_bylevel, reg_alpha, reg_lambda)))
num_combinations
Out[45]:
In [25]:
RAND_SEED = 42
In [47]:
num_grid_searches = int(num_combinations / 3)
In [48]:
params_search = grid_search.RandomizedSearchCV(estimator=xgboost_clf, param_distributions=xgboost_knobs, cv=5,
n_iter=num_grid_searches, random_state=RAND_SEED, verbose=1)
In [15]:
s1_params_path = "{root}/{tag}_s1_params.pickle".format(root=data_directory, tag=FEATURE_SET)
s2_params_path = "{root}/{tag}_s2_params.pickle".format(root=data_directory, tag=FEATURE_SET)
cs_params_path = "{root}/{tag}_cs_params.pickle".format(root=data_directory, tag=FEATURE_SET)
In [49]:
s1_X_train = s1_data["X_train"]
s1_y_train = s1_data["y_train"]
In [50]:
params_search.fit(s1_X_train, s1_y_train.ravel())
In [14]:
params_search.best_params_
Out[14]:
In [31]:
params_search.best_score_
Out[31]:
In [16]:
s1_params = {
"best_params": params_search.best_params_,
"best_score": params_search.best_score_,
"grid_scores": params_search.grid_scores_
}
with open(s1_params_path, "wb") as f:
pickle.dump(s1_params, f)
In [20]:
s2_X_train = s2_data["X_train"]
s2_y_train = s2_data["y_train"]
In [21]:
params_search.fit(s2_X_train, s2_y_train.ravel())
Out[21]:
In [26]:
params_search.best_params_
Out[26]:
In [30]:
params_search.best_score_
Out[30]:
In [32]:
s2_params = {
"best_params": params_search.best_params_,
"best_score": params_search.best_score_,
"grid_scores": params_search.grid_scores_
}
with open(s2_params_path, "wb") as f:
pickle.dump(s2_params, f)
In [34]:
cs_X_train = cs_data["X_train"]
cs_y_train = cs_data["y_train"]
In [35]:
params_search.fit(cs_X_train, cs_y_train.ravel())
Out[35]:
In [36]:
params_search.best_params_
Out[36]:
In [37]:
params_search.best_score_
Out[37]:
In [39]:
cs_params = {
"best_params": params_search.best_params_,
"best_score": params_search.best_score_,
"grid_scores": params_search.grid_scores_
}
with open(cs_params_path, "wb") as f:
pickle.dump(cs_params, f)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
subject_kfolds_indices = list()
In [ ]:
subject_kfolds = list(itertools.combinations(unique_subjects, len(unique_subjects) - 1))
In [ ]:
for kth_fold in subject_kfolds:
print(kth_fold)
kth_fold_train_indices = list()
kth_fold_test_indices = list()
for subject_id in unique_subjects:
subject_features = features_df[:][features_df["subject"] == subject_id]
if subject_id in kth_fold:
kth_fold_train_indices.extend(subject_features.index)
else:
kth_fold_test_indices.extend(subject_features.index)
subject_kfolds_indices.append((kth_fold_train_indices, kth_fold_test_indices))
In [ ]:
clf = xgb.XGBClassifier(learning_rate=0.3, n_estimators=100, objective="multi:softmax", seed=M_XGB_SEED)
In [ ]:
num_searches = 30
random_search = grid_search.RandomizedSearchCV(clf, param_distributions=tuning_params, cv=subject_kfolds_indices,
verbose=2, n_iter=num_searches, random_state=M_RAND_SEED)
In [24]:
random_search.fit(X, y.ravel())
In [ ]:
random_search.grid_scores_
In [ ]:
random_search.best_score_
In [ ]:
random_search.best_params_
In [ ]:
In [ ]:
all_xgbmatrix = xgb.DMatrix(X, y)
In [ ]:
params_cv_results = xgb.cv(params=XGB_PARAM_CV, dtrain=all_xgbmatrix, num_boost_round=200, nfold=12, folds=subject_kfolds_indices,
verbose_eval=True, early_stopping_rounds=50)
In [ ]:
params_cv_results
In [ ]:
sample_1_accuracy
In [ ]:
sample_1_cm = metrics.confusion_matrix(sample_1_y_test, sample_1_y_predicted)
sample_1_cm_normalized = sample_1_cm.astype("float") / sample_1_cm.sum(axis=1)[:, np.newaxis]
sample_1_cm_normalized *= 100
In [ ]:
with sns.axes_style("ticks"):
fig, ax = plt.subplots(figsize=thesis_figsize)
sns.heatmap(data=sample_1_cm_normalized, annot=True, fmt=".2f", linewidths=0.5, square=True,
vmin=0, vmax=100, ax=ax, xticklabels=ACTIVITIES, yticklabels=ACTIVITIES)
plt.yticks(rotation=0)
sns.despine()
In [ ]:
In [ ]:
SAMPLE_2_TRAIN_SIZE = 2 / 3
In [ ]:
sample_2_train_size_by_subject = np.zeros((len(unique_subjects), NUM_ACTIVITIES))
In [ ]:
for subject_idx, subject_id in enumerate(unique_subjects):
subject_y = labels_df[labels_df["subject"] == subject_id]["label"].values
subject_activities_bin = np.bincount(np.squeeze(subject_y))
sample_2_train_size_by_subject[subject_idx] = np.array([int(size * SAMPLE_2_TRAIN_SIZE) for size in subject_activities_bin])
In [ ]:
sample_2_X_train = np.array([], dtype=np.float64).reshape(0, num_features)
sample_2_y_train = np.array([], dtype=np.int32).reshape(0, 1)
sample_2_X_test = np.array([], dtype=np.float64).reshape(0, num_features)
sample_2_y_test = np.array([], dtype=np.int32).reshape(0, 1)
for subject_idx, subject_id in enumerate(unique_subjects):
print(subject_id)
subject_features = features_df[:][features_df["subject"] == subject_id]
subject_features.drop(["subject"], axis=1, inplace=True)
subject_labels = labels_df[:][labels_df["subject"] == subject_id]
subject_labels.drop(["subject"], axis=1, inplace=True)
for activity_idx in range(NUM_ACTIVITIES):
subject_activity_train_size = sample_2_train_size_by_subject[subject_idx, activity_idx]
subject_activity_labels_df = subject_labels[subject_labels["label"] == activity_idx]
subject_activity_train_labels_df = subject_activity_labels_df.sample(n=subject_activity_train_size, replace=False, random_state=M_RAND_SEED)
subject_activity_all_indices = list(subject_activity_labels_df.index.values)
subject_activity_train_indices = list(subject_activity_train_labels_df.index.values)
subject_activity_test_indices = [idx for idx in subject_activity_all_indices if idx not in subject_activity_train_indices]
subject_acitivty_X_train = subject_features.ix[subject_activity_train_indices]
subject_activity_y_train = subject_labels.ix[subject_activity_train_indices]
subject_acitivty_X_test = subject_features.ix[subject_activity_test_indices]
subject_activity_y_test = subject_labels.ix[subject_activity_test_indices]
sample_2_X_train = np.vstack([sample_2_X_train, subject_acitivty_X_train.values])
sample_2_y_train = np.vstack([sample_2_y_train, subject_activity_y_train.values])
sample_2_X_test = np.vstack([sample_2_X_test, subject_acitivty_X_test.values])
sample_2_y_test = np.vstack([sample_2_y_test, subject_activity_y_test.values])
In [ ]:
sample_2_X_train.shape
In [ ]:
sample_2_X_test.shape
In [ ]:
sample_2_train_xgbmatrix = xgb.DMatrix(sample_2_X_train, sample_2_y_train)
sample_2_test_xgbmatrix = xgb.DMatrix(sample_2_X_test, sample_2_y_test)
# sample_2_watchlist = [(sample_2_train_xgbmatrix, "train"), (sample_2_test_xgbmatrix, "eval")]
In [ ]:
# sample_2_results = {}
# xgb.train(params=XGB_PARAM, dtrain=sample_2_train_xgbmatrix, num_boost_round=XGB_NUM_ROUNDS,
# evals=sample_2_watchlist, evals_result=sample_2_results, early_stopping_rounds=XGB_EARLYSTOPPING_ROUNDS)
In [ ]:
# iterations = list(range(len(sample_2_results["eval"]["merror"]))) * 2
# errors = sample_2_results["eval"]["merror"] + sample_2_results["train"]["merror"]
# types = ["Test"] * int(len(iterations)/2) + ["Train"] * int(len(iterations)/2)
# units = [0] * len(iterations)
# data = {
# "iteration": iterations,
# "error": errors,
# "type": types,
# "unit": units
# }
# samples_test2_df = pd.DataFrame(data)
In [ ]:
# with sns.axes_style("ticks"):
# fig, ax = plt.subplots(figsize=(10, 7.5))
# sns.tsplot(data=samples_test2_df, time="iteration", value="error", condition="type", unit="unit",
# color=sns.color_palette("Set1"), ax=ax)
# ax.set_xlabel("Iteration")
# ax.set_ylabel("Mean Error")
# sns.despine()
# plt.tight_layout()
In [ ]:
sample_2_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=sample_2_train_xgbmatrix, num_boost_round=XGB_NUM_ROUNDS_FINAL)
In [ ]:
sample_2_y_predicted = sample_2_booster.predict(sample_2_test_xgbmatrix)
In [ ]:
sample_2_accuracy = metrics.accuracy_score(sample_2_y_test, sample_2_y_predicted)
In [ ]:
sample_2_accuracy
In [ ]:
sample_2_cm = metrics.confusion_matrix(sample_2_y_test, sample_2_y_predicted)
sample_2_cm_normalized = sample_2_cm.astype("float") / sample_2_cm.sum(axis=1)[:, np.newaxis]
sample_2_cm_normalized *= 100
In [ ]:
with sns.axes_style("ticks"):
fig, ax = plt.subplots(figsize=thesis_figsize)
sns.heatmap(data=sample_2_cm_normalized, annot=True, fmt=".2f", linewidths=0.5, square=True,
vmin=0, vmax=100, ax=ax, xticklabels=ACTIVITIES, yticklabels=ACTIVITIES)
sns.despine()
plt.yticks(rotation=0)
In [ ]:
crosssubject_1_X_train = np.array([], dtype=np.float64).reshape(0, num_features)
crosssubject_1_y_train = np.array([], dtype=np.int32).reshape(0, 1)
crosssubject_1_X_test = np.array([], dtype=np.float64).reshape(0, num_features)
crosssubject_1_y_test = np.array([], dtype=np.int32).reshape(0, 1)
for subject_id in unique_subjects:
subject_features = features_df[:][features_df["subject"] == subject_id]
subject_features.drop(["subject"], axis=1, inplace=True)
subject_labels = labels_df[:][labels_df["subject"] == subject_id]
subject_labels.drop(["subject"], axis=1, inplace=True)
subject_X = subject_features.values
subject_y = subject_labels.values
if subject_id % 2 == 1:
print(subject_id, "\tTrain")
crosssubject_1_X_train = np.vstack([crosssubject_1_X_train, subject_X])
crosssubject_1_y_train = np.vstack([crosssubject_1_y_train, subject_y])
else:
print(subject_id, "\tTest")
crosssubject_1_X_test = np.vstack([crosssubject_1_X_test, subject_X])
crosssubject_1_y_test = np.vstack([crosssubject_1_y_test, subject_y])
In [ ]:
crosssubject_1_X_train.shape
In [ ]:
crosssubject_1_X_test.shape
In [ ]:
crosssubject_1_train_xgbmatrix = xgb.DMatrix(crosssubject_1_X_train, crosssubject_1_y_train)
crosssubject_1_test_xgbmatrix = xgb.DMatrix(crosssubject_1_X_test, crosssubject_1_y_test)
crosssubject_1_watchlist = [(crosssubject_1_train_xgbmatrix, "train"), (crosssubject_1_test_xgbmatrix, "eval")]
In [ ]:
crosssubject_1_results = {}
xgb.train(params=XGB_PARAM_FINAL, dtrain=crosssubject_1_train_xgbmatrix, num_boost_round=XGB_NUM_ROUNDS,
evals=crosssubject_1_watchlist, evals_result=crosssubject_1_results, early_stopping_rounds=50)
In [ ]:
iterations = list(range(len(crosssubject_1_results["eval"]["merror"]))) * 2
errors = crosssubject_1_results["eval"]["merror"] + crosssubject_1_results["train"]["merror"]
types = ["Test"] * int(len(iterations)/2) + ["Train"] * int(len(iterations)/2)
units = [0] * len(iterations)
data = {
"iteration": iterations,
"error": errors,
"Type": types,
"unit": units
}
crosssubject_1_df = pd.DataFrame(data)
In [ ]:
with sns.axes_style("ticks"):
fig, ax = plt.subplots(figsize=(10, 7.5))
sns.tsplot(data=crosssubject_1_df, time="iteration", value="error", condition="Type", unit="unit",
color=sns.color_palette("Set1"), ax=ax)
ax.set_xlabel("Iteration")
ax.set_ylabel("Mean Error")
sns.despine()
plt.tight_layout()
In [ ]:
In [ ]:
crosssubject_1_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=crosssubject_1_train_xgbmatrix,
num_boost_round=XGB_NUM_ROUNDS_FINAL)
In [ ]:
feature_importance = crosssubject_1_booster.get_fscore()
In [ ]:
xgb.plot_importance(crosssubject_1_booster)
In [ ]:
len(features_vector.columns)
In [ ]:
features_importance_formated = dict()
for feature_idx, feature_name in enumerate(features_vector.columns):
old_key = "f{}".format(feature_idx)
if old_key not in feature_importance:
continue
new_key = feature_name
features_importance_formated[new_key] = feature_importance[old_key]
In [ ]:
features_importance_formated = sorted(features_importance_formated.items(), key=operator.itemgetter(1))
In [ ]:
features_importance_formated
In [ ]:
features_importance_formated_last = features_importance_formated[-10:]
In [ ]:
features_importance_formated_last
In [ ]:
features_importance_df = pd.DataFrame(features_importance_formated_last, columns=["feature", "gain"])
features_importance_df["gain"] = features_importance_df["gain"] / features_importance_df["gain"].sum()
In [ ]:
with sns.axes_style("ticks"):
fig, ax = plt.subplots(figsize=(10, 7.5))
sns.barplot(x="feature", y="gain", data=features_importance_df, label="Total", color="#3498db", ax=ax)
plt.xticks(rotation=90)
plt.xlabel("")
plt.ylabel("")
sns.despine()
In [ ]:
np.set_printoptions(formatter={'float': lambda x: "{:.2f}".format(x)})
In [ ]:
crosssubject_individual_cm_list = list()
for subject_id in unique_subjects:
print(subject_id)
subject_features = features_df[:][features_df["subject"] == subject_id]
subject_features.drop(["subject"], axis=1, inplace=True)
subject_labels = labels_df[:][labels_df["subject"] == subject_id]
subject_labels.drop(["subject"], axis=1, inplace=True)
subject_X = subject_features.values
subject_y = subject_labels.values
subject_xgbmatrix = xgb.DMatrix(subject_X, subject_y)
subject_y_predicted = crosssubject_1_booster.predict(subject_xgbmatrix)
subject_accuracy = metrics.accuracy_score(subject_y, subject_y_predicted)
print("accuracy:", subject_accuracy)
subject_cm = metrics.confusion_matrix(subject_y, subject_y_predicted)
subject_cm_normalized = subject_cm.astype("float") / subject_cm.sum(axis=1)[:, np.newaxis]
subject_cm_normalized *= 100
print("confusion matrix:\n", subject_cm_normalized, "\n")
if subject_id % 2 == 0:
crosssubject_individual_cm_list.append((subject_id, subject_cm_normalized))
In [ ]:
crosssubject_individual_results_list = list()
for subject_id, subject_cm in crosssubject_individual_cm_list:
subject_string = str(subject_id)[2:4]
for activity_id, activity in enumerate(ACTIVITIES):
crosssubject_subject_result = {
"Activity": activity,
"Accuracy": subject_cm[activity_id, activity_id],
"Subject": subject_string
}
crosssubject_individual_results_list.append(crosssubject_subject_result)
crosssubject_individuals_df = pd.DataFrame(crosssubject_individual_results_list)
In [ ]:
with sns.axes_style("ticks"):
fig, ax = plt.subplots(figsize=thesis_figsize)
g = sns.FacetGrid(data=crosssubject_individuals_df, col="Subject", col_wrap=3)
g = (g.map(sns.barplot, "Activity", "Accuracy", palette=sns.color_palette(), ci=None)
.set_xlabels("")
.set_ylabels("")
.set_xticklabels(rotation=90))
plt.yticks(np.arange(0, 110, 20.0))
g.despine()
In [ ]:
crosssubject_1_y_predicted = crosssubject_1_booster.predict(crosssubject_1_test_xgbmatrix)
In [ ]:
crosssubject_1_accuracy = metrics.accuracy_score(crosssubject_1_y_test, crosssubject_1_y_predicted)
In [ ]:
crosssubject_1_accuracy
In [ ]:
crosssubject_1_accuracy
In [ ]:
In [ ]:
crosssubject_1_cm = metrics.confusion_matrix(crosssubject_1_y_test, crosssubject_1_y_predicted)
crosssubject_1_cm_normalized = crosssubject_1_cm.astype("float") / crosssubject_1_cm.sum(axis=1)[:, np.newaxis]
crosssubject_1_cm_normalized *= 100
In [ ]:
with sns.axes_style("ticks"):
fig, ax = plt.subplots(figsize=thesis_figsize)
sns.heatmap(data=crosssubject_1_cm_normalized, annot=True, fmt=".2f", linewidths=0.5, square=True,
vmin=0, vmax=100, ax=ax, xticklabels=ACTIVITIES, yticklabels=ACTIVITIES)
plt.yticks(rotation=0)
sns.despine()
In [ ]:
noinfrared_features_cols = [c for c in features_df.columns if not c.startswith("extreme_infrared_")]
In [ ]:
noinfrared_features_df = features_df[noinfrared_features_cols]
In [ ]:
noinfrared_features_df.shape
In [ ]:
num_noinfrared_features = noinfrared_features_df.shape[1] - 1
In [ ]:
noinfrared_X_train = np.array([], dtype=np.float64).reshape(0, num_noinfrared_features)
noinfrared_y_train = np.array([], dtype=np.int32).reshape(0, 1)
noinfrared_X_test = np.array([], dtype=np.float64).reshape(0, num_noinfrared_features)
noinfrared_y_test = np.array([], dtype=np.int32).reshape(0, 1)
for subject_id in unique_subjects:
subject_features = noinfrared_features_df[:][noinfrared_features_df["subject"] == subject_id]
subject_features.drop(["subject"], axis=1, inplace=True)
subject_labels = labels_df[:][labels_df["subject"] == subject_id]
subject_labels.drop(["subject"], axis=1, inplace=True)
subject_X = subject_features.values
subject_y = subject_labels.values
if subject_id % 2 == 1:
print(subject_id, "\tTrain")
noinfrared_X_train = np.vstack([noinfrared_X_train, subject_X])
noinfrared_y_train = np.vstack([noinfrared_y_train, subject_y])
else:
print(subject_id, "\tTest")
noinfrared_X_test = np.vstack([noinfrared_X_test, subject_X])
noinfrared_y_test = np.vstack([noinfrared_y_test, subject_y])
In [ ]:
noinfrared_train_xgbmatrix = xgb.DMatrix(noinfrared_X_train, noinfrared_y_train)
noinfrared_test_xgbmatrix = xgb.DMatrix(noinfrared_X_test, noinfrared_y_test)
In [ ]:
noinfrared_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=noinfrared_train_xgbmatrix, num_boost_round=XGB_NUM_ROUNDS_FINAL)
In [ ]:
noinfrared_y_predicted = noinfrared_booster.predict(noinfrared_test_xgbmatrix)
In [ ]:
noinfrared_accuracy = metrics.accuracy_score(noinfrared_y_test, noinfrared_y_predicted)
In [ ]:
noinfrared_accuracy
In [ ]:
noinfrared_cm = metrics.confusion_matrix(noinfrared_y_test, noinfrared_y_predicted)
noinfrared_cm_normalized = noinfrared_cm.astype("float") / noinfrared_cm.sum(axis=1)[:, np.newaxis]
noinfrared_cm_normalized *= 100
In [ ]:
with sns.axes_style("ticks"):
fig, ax = plt.subplots(figsize=(10, 7.5))
sns.heatmap(data=noinfrared_cm_normalized, annot=True, fmt=".2f", linewidths=0.5, square=True,
vmin=0, vmax=100, ax=ax, xticklabels=ACTIVITIES, yticklabels=ACTIVITIES)
sns.despine()
plt.yticks(rotation=0)
In [ ]:
crosssubject_infrared_comparisons_list = list()
for activity_id, activity in enumerate(ACTIVITIES):
crosssubject_result = {
"Activity": activity,
"Accuracy": crosssubject_1_cm_normalized[activity_id, activity_id],
"Condition": "All"
}
crosssubject_infrared_comparisons_list.append(crosssubject_result)
for activity_id, activity in enumerate(ACTIVITIES):
crosssubject_result = {
"Activity": activity,
"Accuracy": noinfrared_cm_normalized[activity_id, activity_id],
"Condition": "Without infrared"
}
crosssubject_infrared_comparisons_list.append(crosssubject_result)
crosssubject_infrared_comparisons_df = pd.DataFrame(crosssubject_infrared_comparisons_list)
In [ ]:
sns.palplot(sns.color_palette("coolwarm", n_colors=7))
In [ ]:
sns.palplot(sns.color_palette("hls", 8))
In [ ]:
sns.palplot(sns.color_palette("Set2", 10))
In [ ]:
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
In [ ]:
sns.palplot(sns.color_palette(flatui))
In [ ]:
with sns.axes_style("ticks"):
fig, ax = plt.subplots(figsize=thesis_figsize)
sns.barplot(x="Activity", y="Accuracy", hue="Condition", palette=[flatui[1], flatui[4]],
data=crosssubject_infrared_comparisons_df, ax=ax)
ax.set_xlabel("")
ax.set_ylabel("")
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), frameon=True, framealpha=1, fancybox=True, shadow=True, ncol=2)
sns.despine()
for p in ax.patches:
height = p.get_height()
ax.text(p.get_x(), height+ 3, '%1.2f'%(height/100))
In [ ]:
crosssubject_infrared_comparisons_df
In [ ]:
# reading paper
91.858038 - 92.790536
In [ ]:
# phone
77.292500 - 64.903428
In [ ]:
# tablet
99.087616 - 64.430431
In [ ]:
In [ ]:
XGB_PARAM_DEMO = {}
XGB_PARAM_DEMO["eta"] = 0.3
XGB_PARAM_DEMO["gamma"] = 1
XGB_PARAM_DEMO["lambda"] = 3
XGB_PARAM_DEMO["alpha"] = 1
XGB_PARAM_DEMO["max_depth"] = 6
XGB_PARAM_DEMO["colsample_bytree"] = 0.8
XGB_PARAM_DEMO["subsample"] = 0.5
XGB_PARAM_DEMO["objective"] = "multi:softmax"
XGB_PARAM_DEMO["eval_metric"] = "merror"
XGB_PARAM_DEMO["num_class"] = len(ACTIVITIES)
XGB_PARAM_DEMO["silent"] = 0
XGB_NUM_ROUNDS_DEMO = 40
In [ ]:
X.shape
In [ ]:
y.shape
In [ ]:
demo_train_xgbmatrix = xgb.DMatrix(X, y)
demo_test_xgbmatrix = xgb.DMatrix(X, y)
demo_watchlist = [(demo_train_xgbmatrix, "train"), (demo_test_xgbmatrix, "eval")]
In [ ]:
demo_results = {}
demo_booster = xgb.train(XGB_PARAM_DEMO, demo_train_xgbmatrix, XGB_NUM_ROUNDS_DEMO, demo_watchlist, evals_result=demo_results, early_stopping_rounds=20)
In [ ]:
demo_booster.save_model("demo-xgboost.model")
In [ ]:
bst2 = xgb.Booster(model_file="demo-xgboost.model")
In [ ]:
test_dmatrix = xgb.DMatrix(X)
y_predicted = bst2.predict(test_dmatrix)
accuracy = metrics.accuracy_score(y, y_predicted)
In [ ]:
accuracy
In [ ]:
cs_combinations = list(itertools.combinations(unique_subjects, int(len(unique_subjects)/2)))
In [ ]:
len(cs_combinations)
In [ ]:
cs_combinations_results_csv = "E:\\angel\\data\\topviewkinect\\all\\iss-cs_combinations.csv"
In [ ]:
open(combinations_results_csv, "w").close()
with open(combinations_results_csv, "a") as f:
data_columns = pd.DataFrame(columns=["combination", "activity", "a1", "a2", "a3", "a4", "a5", "a6"])
data_columns.to_csv(f, header=True, index=False)
In [ ]:
with open(combinations_results_csv, "a") as f:
for cs_combination_idx, subjects_combination in enumerate(cs_combinations):
print(cs_combination_idx)
cs_combination_X_train = np.array([], dtype=np.float64).reshape(0, num_features)
cs_combination_y_train = np.array([], dtype=np.int32).reshape(0, 1)
cs_combination_X_test = np.array([], dtype=np.float64).reshape(0, num_features)
cs_combination_y_test = np.array([], dtype=np.int32).reshape(0, 1)
for subject_id in unique_subjects:
subject_features = features_df[:][features_df["subject"] == subject_id]
subject_features.drop(["subject"], axis=1, inplace=True)
subject_labels = labels_df[:][labels_df["subject"] == subject_id]
subject_labels.drop(["subject"], axis=1, inplace=True)
subject_X = subject_features.values
subject_y = subject_labels.values
if subject_id in subjects_combination:
cs_combination_X_train = np.vstack([cs_combination_X_train, subject_X])
cs_combination_y_train = np.vstack([cs_combination_y_train, subject_y])
else:
cs_combination_X_test = np.vstack([cs_combination_X_test, subject_X])
cs_combination_y_test = np.vstack([cs_combination_y_test, subject_y])
cs_combination_train_xgbmatrix = xgb.DMatrix(cs_combination_X_train, cs_combination_y_train)
cs_combination_test_xgbmatrix = xgb.DMatrix(cs_combination_X_test, cs_combination_y_test)
cs_combination_booster = xgb.train(XGB_PARAM_FINAL, dtrain=cs_combination_train_xgbmatrix,
num_boost_round=XGB_NUM_ROUNDS_FINAL)
cs_combination_y_predicted = cs_combination_booster.predict(cs_combination_test_xgbmatrix)
result = metrics.confusion_matrix(cs_combination_y_test, cs_combination_y_predicted)
data = pd.DataFrame(columns=["combination", "activity", "a1", "a2", "a3", "a4", "a5", "a6"])
for activity_id, activity in enumerate(ACTIVITIES):
data.loc[activity_id] = [cs_combination_idx, activity,
result[activity_id,0],
result[activity_id,1],
result[activity_id,2],
result[activity_id,3],
result[activity_id,4],
result[activity_id,5]]
data.to_csv(f, header=False, index=False)
In [ ]:
cs_combinations_results_pd = pd.read_csv(cs_combinations_results_csv)
In [ ]:
cs_combinations_results_pd
In [ ]:
noinfrared_cm
In [ ]:
# standing
combinations_standing = cs_combinations_results_pd[:][cs_combinations_results_pd["activity"] == "Standing"]
combinations_sitting = cs_combinations_results_pd[:][cs_combinations_results_pd["activity"] == "Sitting"]
combinations_pointing = cs_combinations_results_pd[:][cs_combinations_results_pd["activity"] == "Pointing"]
combinations_phone = cs_combinations_results_pd[:][cs_combinations_results_pd["activity"] == "Phone"]
combinations_tablet = cs_combinations_results_pd[:][cs_combinations_results_pd["activity"] == "Tablet"]
combinations_paper = cs_combinations_results_pd[:][cs_combinations_results_pd["activity"] == "Paper"]
standing_cm = list()
sitting_cm = list()
pointing_cm = list()
phone_cm = list()
tablet_cm = list()
paper_cm = list()
for col in ["a1", "a2", "a3", "a4", "a5", "a6"]:
standing_cm.append(combinations_standing[col].sum())
sitting_cm.append(combinations_sitting[col].sum())
pointing_cm.append(combinations_pointing[col].sum())
phone_cm.append(combinations_phone[col].sum())
tablet_cm.append(combinations_tablet[col].sum())
paper_cm.append(combinations_paper[col].sum())
combinations_cm = np.array([
standing_cm, sitting_cm, pointing_cm, phone_cm, tablet_cm, paper_cm
])
In [ ]:
combinations_cm
In [ ]:
all_samples = np.sum(combinations_cm)
In [ ]:
accurate_samples = 0
for activity_id in range(len(ACTIVITIES)):
accurate_samples += combinations_cm[activity_id, activity_id]
In [ ]:
combinations_accuracy = accurate_samples / all_samples
In [ ]:
combinations_accuracy
In [ ]:
combinations_cm
combinations_cm_normalized = combinations_cm.astype("float") / combinations_cm.sum(axis=1)[:, np.newaxis]
combinations_cm_normalized *= 100
In [ ]:
combinations_cm_normalized
In [ ]:
with sns.axes_style("ticks"):
fig, ax = plt.subplots(figsize=(10, 7.5))
sns.heatmap(data=combinations_cm_normalized, annot=True, fmt=".2f", linewidths=0.5, square=True,
vmin=0, vmax=100, ax=ax, xticklabels=ACTIVITIES, yticklabels=ACTIVITIES)
sns.despine()
plt.yticks(rotation=0)