notebook.community

Edit and run



In [1]:

    
%matplotlib inline

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import math
import itertools



In [2]:

    
DATA_INPUT_DIR = "D:\\p_eaglesense\\eaglesense\\data\\topviewkinect"



In [3]:

    
DATA_OUTPUT_DIR = DATA_INPUT_DIR + "\\v2-models"
if not os.path.exists(DATA_OUTPUT_DIR):
    os.makedirs(DATA_OUTPUT_DIR)



In [4]:

    
data_dirs = []
for subdir in sorted(next(os.walk(DATA_INPUT_DIR))[1]):
    if subdir.isdigit():
        data_dirs.append(int(subdir))
data_dirs.sort(key=int)



In [5]:

    
data_dirs









    Out[5]:





[1, 2, 3, 4, 5, 6]



In [6]:

    
ignored_X_cols = ["frame_id", "skeleton_id", "x", "y", "z"]
ignored_y_cols = ["frame_id", "skeleton_id", "orientation", "orientation_accurate"]



In [7]:

    
all_X_csv = "{d}/{tag}_X.csv".format(d=DATA_OUTPUT_DIR, tag="v2")
all_y_csv = "{d}/{tag}_y.csv".format(d=DATA_OUTPUT_DIR, tag="v2")



In [42]:

    
open(all_X_csv, "w").close()
open(all_y_csv, "w").close()

all_X_file = open(all_X_csv, "a")
all_y_file = open(all_y_csv, "a")

write_header=True

for dataset_id in data_dirs:
    # read data
    X_csv = "{d}/{dataset}/features.csv".format(d=DATA_INPUT_DIR, dataset=dataset_id)
    X_df = pd.read_csv(X_csv, dtype=np.float64)
    y_csv = "{d}/{dataset}/labels.csv".format(d=DATA_INPUT_DIR, dataset=dataset_id)
    y_df = pd.read_csv(y_csv, dtype=np.float64)

    # get single skeleton activities
    y_df = y_df.loc[y_df["skeleton_id"] == 0]
    y_df = y_df.loc[y_df["activity"].isin(list(range(0, 6)))]

    # get common frame indices
    X_df = X_df.loc[X_df["frame_id"].isin(y_df["frame_id"].values)]
    y_df = y_df.loc[y_df["frame_id"].isin(X_df["frame_id"].values)]
    
    # drop unncessary columns
    X_df = X_df.drop(labels=ignored_X_cols, axis=1)
    y_df = y_df.drop(labels=ignored_y_cols, axis=1)
    
    # add dataset id
    X_df["dataset_id"] = dataset_id
    y_df["dataset_id"] = dataset_id
    
    # save to csv
    X_df = X_df.astype("float64")
    X_df.to_csv(all_X_file, header=write_header, index=False)
    y_df = y_df.astype("int")
    y_df.to_csv(all_y_file, header=write_header, index=False)
    
    write_header = False
    
    print(dataset_id, "Done!")
    
all_X_file.close()
all_y_file.close()









    



1 Done!
2 Done!
3 Done!
4 Done!
5 Done!
6 Done!



In [9]:

    
ACTIVITIES = ["Standing", "Sitting", "Pointing", "Phone", "Tablet", "Paper"]



In [10]:

    
num_activities = len(ACTIVITIES)



In [11]:

    
num_activities









    Out[11]:





6



In [12]:

    
X_all_df = pd.read_csv(all_X_csv, dtype=np.float64)
y_all_df = pd.read_csv(all_y_csv, dtype=np.int)



In [13]:

    
X_all_df.shape, y_all_df.shape









    Out[13]:





((7762, 73), (7752, 2))



In [14]:

    
X_all_df.head()









    Out[14]:







  
    
      
      layer_area_0
      layer_area_1
      layer_area_2
      layer_contours_0
      layer_contours_1
      layer_distance_0
      layer_distance_1
      layer_distance_2
      layer_distance_3
      layer_distance_4
      ...
      interlayer_pos_16
      interlayer_pos_17
      extremities0
      extreme_infrared_0
      extreme_infrared_1
      extreme_infrared_2
      extreme_infrared_3
      extreme_infrared_4
      extreme_infrared_5
      dataset_id
    
  
  
    
      0
      0.433004
      0.425952
      0.141044
      3.0
      3.0
      29.8329
      26.3059
      27.4591
      26.3059
      267.5
      ...
      -33.0
      -194.0
      4.0
      71.5
      61.5
      86.5
      81.5
      0.0
      197.5
      1.0
    
    
      1
      0.345013
      0.502695
      0.152291
      3.0
      3.0
      31.0161
      38.0132
      29.0172
      38.0132
      297.5
      ...
      -32.0
      -191.0
      4.0
      53.5
      54.0
      74.5
      94.0
      0.0
      103.5
      1.0
    
    
      2
      0.418367
      0.428571
      0.153061
      3.0
      3.0
      29.5466
      27.7308
      26.6833
      27.7308
      216.0
      ...
      -32.0
      -190.0
      2.0
      52.5
      54.0
      0.0
      0.0
      0.0
      54.0
      1.0
    
    
      3
      0.452675
      0.397119
      0.150206
      3.0
      3.0
      26.4008
      23.2594
      25.0799
      23.2594
      164.0
      ...
      -31.0
      -186.0
      2.0
      15.5
      57.5
      0.0
      0.0
      0.0
      57.5
      1.0
    
    
      4
      0.486784
      0.400881
      0.112335
      3.0
      3.0
      25.0599
      19.6469
      24.0208
      19.6469
      157.0
      ...
      -31.0
      -188.0
      5.0
      12.0
      19.5
      68.0
      83.5
      87.5
      131.0
      1.0
    
  

5 rows × 73 columns



In [15]:

    
y_all_df.head()



In [16]:

    
X_vec = X_all_df.drop(["dataset_id"], axis=1)
y_vec = y_all_df.drop(["dataset_id"], axis=1)



In [19]:

    
y_bins = np.bincount(np.squeeze(y_vec))



In [20]:

    
y_bins









    Out[20]:





array([ 912,  900, 1161, 1497, 1561, 1721], dtype=int64)



In [21]:

    
y_min_count = min(y_bins)



In [22]:

    
y_min_count









    Out[22]:





900



In [23]:

    
X_balanced_df = pd.DataFrame(columns=X_all_df.columns, dtype=np.float64)
y_balanced_df = pd.DataFrame(columns=y_all_df.columns, dtype=np.int)



In [25]:

    
for activity_id in range(num_activities):
    samples_y = y_all_df.loc[(y_all_df["activity"] == activity_id)]
    samples_y = samples_y.sample(y_min_count, replace=False, random_state=42)
    samples_X = X_all_df.iloc[samples_y.index]
    X_balanced_df = X_balanced_df.append(samples_X, ignore_index=True)
    y_balanced_df = y_balanced_df.append(samples_y, ignore_index=True)



In [26]:

    
X_balanced_df = X_balanced_df.astype(np.float64)



In [27]:

    
y_balanced_df = y_balanced_df.astype(np.int)



In [28]:

    
X_balanced_df.shape, y_balanced_df.shape









    Out[28]:





((5400, 73), (5400, 2))



In [30]:

    
X_final_df = X_balanced_df.drop(labels=["dataset_id"], axis=1)
y_final_df = y_balanced_df.drop(labels=["dataset_id"], axis=1)



In [31]:

    
X_final_df.head()









    Out[31]:







  
    
      
      layer_area_0
      layer_area_1
      layer_area_2
      layer_contours_0
      layer_contours_1
      layer_distance_0
      layer_distance_1
      layer_distance_2
      layer_distance_3
      layer_distance_4
      ...
      interlayer_pos_15
      interlayer_pos_16
      interlayer_pos_17
      extremities0
      extreme_infrared_0
      extreme_infrared_1
      extreme_infrared_2
      extreme_infrared_3
      extreme_infrared_4
      extreme_infrared_5
    
  
  
    
      0
      0.493197
      0.362245
      0.144558
      3.0
      3.0
      26.5707
      25.9422
      26.9258
      25.9422
      180.5
      ...
      -14.0
      -33.0
      -174.0
      3.0
      34.5
      43.0
      81.0
      0.0
      0.0
      102.0
    
    
      1
      0.155372
      0.598347
      0.246281
      3.0
      3.0
      13.1529
      29.1548
      23.0217
      29.1548
      326.0
      ...
      -10.0
      -26.0
      -154.0
      5.0
      10.0
      37.0
      72.5
      57.0
      56.5
      97.5
    
    
      2
      0.455904
      0.393124
      0.150972
      3.0
      3.0
      26.4008
      28.2312
      29.6142
      28.2312
      224.5
      ...
      -14.0
      -33.0
      -169.0
      5.0
      55.5
      26.0
      83.0
      93.5
      60.5
      192.5
    
    
      3
      0.157895
      0.441426
      0.400679
      3.0
      3.0
      12.3693
      27.6586
      25.3180
      27.6586
      226.5
      ...
      -17.0
      -27.0
      -137.0
      5.0
      3.5
      62.0
      55.0
      48.5
      17.5
      116.5
    
    
      4
      0.170498
      0.419540
      0.409962
      3.0
      3.0
      13.0384
      26.0768
      25.0200
      26.0768
      189.5
      ...
      -17.0
      -24.0
      -137.0
      3.0
      11.5
      28.5
      22.0
      0.0
      0.0
      28.5
    
  

5 rows × 72 columns



In [32]:

    
y_final_df.head()



In [43]:

    
X_final_csv = "{d}/{tag}_X.csv".format(d=DATA_OUTPUT_DIR, tag="v2")
y_final_csv = "{d}/{tag}_y.csv".format(d=DATA_OUTPUT_DIR, tag="v2")



In [44]:

    
X_final_file = open(X_final_csv, "w")
y_final_file = open(y_final_csv, "w")
X_final_df.to_csv(X_final_file, header=False, index=False)
y_final_df.to_csv(y_final_file, header=False, index=False)
X_final_file.close()
y_final_file.close()



In [45]:

    
def sample_test_split(X_df, y_df, train_ratio, seed=42):
    
    train_size = math.floor(len(X_df) * train_ratio)
    
    X_train_df = X_df.sample(train_size, replace=False, random_state=seed)
    y_train_df = y_df.loc[X_train_df.index]
    
    X_test_df = X_df.loc[~X_df.index.isin(X_train_df.index)]
    y_test_df = y_df.loc[X_test_df.index]
    
    X_train, y_train = X_train_df.values, y_train_df.values
    X_test, y_test = X_test_df.values, y_test_df.values
    
    return X_train, y_train, X_test, y_test



In [77]:

    
X, y = X_final_df.values, y_final_df.values



In [78]:

    
X_train, y_train, X_test, y_test = sample_test_split(X_final_df, y_final_df, 7/10)



In [79]:

    
X.shape, y.shape









    Out[79]:





((5400, 72), (5400, 1))



In [47]:

    
X_train.shape, y_train.shape









    Out[47]:





((3779, 72), (3779, 1))



In [48]:

    
X_test.shape, y_test.shape









    Out[48]:





((1621, 72), (1621, 1))



In [49]:

    
import xgboost as xgb
from sklearn import metrics



In [57]:

    
XGB_PARAM_FINAL = {}
XGB_PARAM_FINAL["eta"] = 0.3
XGB_PARAM_FINAL["gamma"] = 1
XGB_PARAM_FINAL["lambda"] = 1
XGB_PARAM_FINAL["alpha"] = 0
XGB_PARAM_FINAL["max_depth"] = 6
XGB_PARAM_FINAL["colsample_bytree"] = 0.5
XGB_PARAM_FINAL["colsample_bylevel"] = 0.5
XGB_PARAM_FINAL["subsample"] = 0.5
XGB_PARAM_FINAL["objective"] = "multi:softmax"
XGB_PARAM_FINAL["eval_metric"] = "merror"
XGB_PARAM_FINAL["num_class"] = len(ACTIVITIES)
XGB_PARAM_FINAL["silent"] = 0
XGB_PARAM_FINAL["seed"] = 42
XGB_NUM_ROUNDS = 200
XGB_EARLYSTOPPING_ROUNDS = 30



In [58]:

    
train_xgbmatrix = xgb.DMatrix(X_train, y_train)
test_xgbmatrix = xgb.DMatrix(X_test, y_test)
watchlist = [(train_xgbmatrix, "train"), (test_xgbmatrix, "eval")]



In [59]:

    
eval_results = {}
validation = xgb.train(params=XGB_PARAM_FINAL, dtrain=train_xgbmatrix, evals=watchlist, evals_result=eval_results, 
                       num_boost_round=XGB_NUM_ROUNDS, early_stopping_rounds=XGB_EARLYSTOPPING_ROUNDS, verbose_eval=100)









    



[0]	train-merror:0.365705	eval-merror:0.42628
Multiple eval metrics have been passed: 'eval-merror' will be used for early stopping.

Will train until eval-merror hasn't improved in 30 rounds.
[100]	train-merror:0.014554	eval-merror:0.164713
Stopping. Best iteration:
[106]	train-merror:0.013496	eval-merror:0.159161



In [65]:

    
validation.best_iteration+1









    Out[65]:





107



In [66]:

    
booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=train_xgbmatrix, num_boost_round=validation.best_iteration+1)



In [67]:

    
y_predicted = booster.predict(test_xgbmatrix)



In [68]:

    
accuracy = metrics.accuracy_score(y_test, y_predicted)



In [69]:

    
accuracy









    Out[69]:





0.8408389882788402



In [71]:

    
def get_normalized_confusion_matrix(y_true, y_predicted):
    confusion_matrix = metrics.confusion_matrix(y_true, y_predicted)
    confusion_matrix_normalized = confusion_matrix.astype("float") / confusion_matrix.sum(axis=1)[:, np.newaxis]
    confusion_matrix_normalized *= 100
    return confusion_matrix_normalized



In [72]:

    
confusion_matrix = get_normalized_confusion_matrix(y_test, y_predicted)



In [73]:

    
fig, ax = plt.subplots(figsize=(10,7.5))
sns.heatmap(data=confusion_matrix, annot=True, fmt=".2f", linewidths=1, square=True,
            vmin=0, vmax=100, ax=ax, xticklabels=ACTIVITIES, yticklabels=ACTIVITIES, cmap=sns.cubehelix_palette(8))
plt.yticks(rotation=0)
sns.despine(top=False, right=False, left=False, bottom=False)



In [81]:

    
all_dmatrix = xgb.DMatrix(X, y)



In [82]:

    
final_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=all_dmatrix, num_boost_round=validation.best_iteration+1)



In [83]:

    
final_booster.save_model("v2.model")

	layer_area_0	layer_area_1	layer_area_2	layer_contours_0	layer_contours_1	layer_distance_0	layer_distance_1	layer_distance_2	layer_distance_3	layer_distance_4	...	interlayer_pos_16	interlayer_pos_17	extremities0	extreme_infrared_0	extreme_infrared_1	extreme_infrared_2	extreme_infrared_3	extreme_infrared_4	extreme_infrared_5	dataset_id
0	0.433004	0.425952	0.141044	3.0	3.0	29.8329	26.3059	27.4591	26.3059	267.5	...	-33.0	-194.0	4.0	71.5	61.5	86.5	81.5	0.0	197.5	1.0
1	0.345013	0.502695	0.152291	3.0	3.0	31.0161	38.0132	29.0172	38.0132	297.5	...	-32.0	-191.0	4.0	53.5	54.0	74.5	94.0	0.0	103.5	1.0
2	0.418367	0.428571	0.153061	3.0	3.0	29.5466	27.7308	26.6833	27.7308	216.0	...	-32.0	-190.0	2.0	52.5	54.0	0.0	0.0	0.0	54.0	1.0
3	0.452675	0.397119	0.150206	3.0	3.0	26.4008	23.2594	25.0799	23.2594	164.0	...	-31.0	-186.0	2.0	15.5	57.5	0.0	0.0	0.0	57.5	1.0
4	0.486784	0.400881	0.112335	3.0	3.0	25.0599	19.6469	24.0208	19.6469	157.0	...	-31.0	-188.0	5.0	12.0	19.5	68.0	83.5	87.5	131.0	1.0

	layer_area_0	layer_area_1	layer_area_2	layer_contours_0	layer_contours_1	layer_distance_0	layer_distance_1	layer_distance_2	layer_distance_3	layer_distance_4	...	interlayer_pos_15	interlayer_pos_16	interlayer_pos_17	extremities0	extreme_infrared_0	extreme_infrared_1	extreme_infrared_2	extreme_infrared_3	extreme_infrared_4	extreme_infrared_5
0	0.493197	0.362245	0.144558	3.0	3.0	26.5707	25.9422	26.9258	25.9422	180.5	...	-14.0	-33.0	-174.0	3.0	34.5	43.0	81.0	0.0	0.0	102.0
1	0.155372	0.598347	0.246281	3.0	3.0	13.1529	29.1548	23.0217	29.1548	326.0	...	-10.0	-26.0	-154.0	5.0	10.0	37.0	72.5	57.0	56.5	97.5
2	0.455904	0.393124	0.150972	3.0	3.0	26.4008	28.2312	29.6142	28.2312	224.5	...	-14.0	-33.0	-169.0	5.0	55.5	26.0	83.0	93.5	60.5	192.5
3	0.157895	0.441426	0.400679	3.0	3.0	12.3693	27.6586	25.3180	27.6586	226.5	...	-17.0	-27.0	-137.0	5.0	3.5	62.0	55.0	48.5	17.5	116.5
4	0.170498	0.419540	0.409962	3.0	3.0	13.0384	26.0768	25.0200	26.0768	189.5	...	-17.0	-24.0	-137.0	3.0	11.5	28.5	22.0	0.0	0.0	28.5