In [1]:
# import modules as usual
import os
import glob
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import cv2
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier
In [2]:
# path of files
path_positive = '../eskin_data/yamamoto/throw/'
path_negative = '../eskin_data/yamamoto/others/'
In [3]:
# extract the moment of throwing based on accel values
def extract_action(df):
df = df.reset_index()
mom_action = int((np.argmax(abs(df.accelX))+ np.argmax(abs(df.accelY))+ np.argmax(abs(df.accelZ)))/3)
df = df.ix[mom_action-90:mom_action+90]
df.index = df.time
df.drop(["time"], axis=1, inplace=True)
return df.as_matrix()
In [4]:
def load_positive_data(path):
path = os.path.join(path, '*.csv')
files = glob.glob(path)
X_positives = []
for file_path in files:
df = pd.read_csv(file_path, index_col=0)
df = extract_action(df)
X_positives.append(df)
X_positives = np.array(X_positives)
y_positives = np.ones(len(X_positives))
return X_positives, y_positives
In [5]:
def load_negative_data(path, num_clip=100, random_state=71):
np.random.seed(random_state)
path = os.path.join(path, '*.csv')
files = glob.glob(path)
X_negatives = []
for file_path in files:
df = pd.read_csv(file_path, index_col=0)
for i in range(num_clip):
start = np.random.choice(range(len(df)-180))
df_extracted = df.iloc[start:start+180].as_matrix()
X_negatives.append(df_extracted)
X_negatives = np.array(X_negatives)
y_negatives = np.zeros(len(X_negatives))
return X_negatives, y_negatives
In [6]:
def resize_matrix(X, size = (20, 20), flatten=False):
X_resized = []
for i in range(len(X)):
X_ = X[i] /1.
X_ = cv2.resize(X_, size, interpolation = cv2.INTER_LINEAR)
if flatten == True: # True for XGBoost etc., False for CNN (Convolutional Newral Networks)
X_ = X_.ravel()
X_resized.append(X_)
X_resized = np.array(X_resized)
return X_resized
In [7]:
X_positives, y_positives = load_positive_data(path_positive)
In [8]:
X_negatives, y_negatives = load_negative_data(path_negative, num_clip=500) # random 500 clops from negative data
In [9]:
# check the shape of positive data
X_positives.shape, y_positives.shape
Out[9]:
In [10]:
# check the shape of negative data
X_negatives.shape, y_negatives.shape
Out[10]:
In [11]:
X_positives = resize_matrix(X_positives, flatten=True)
In [12]:
X_negatives = resize_matrix(X_negatives, flatten=True)
In [13]:
X = np.concatenate((X_positives, X_negatives), axis=0)
y = np.concatenate((y_positives, y_negatives), axis=0)
In [14]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=71)
scores = []
for i, (train, test) in enumerate(skf.split(X, y)):
X_train, y_train = X[train], y[train]
X_test, y_test = X[test], y[test]
clf_xgb = XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.7,
gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
min_child_weight=1, missing=None, n_estimators=100, nthread=-1,
objective='binary:logistic', reg_alpha=0, reg_lambda=1,
scale_pos_weight=1, seed=0, silent=True, subsample=0.7)
clf_xgb.fit(X_train, y_train)
probs = clf_xgb.predict_proba(X_test)[:,1]
score = roc_auc_score(y_test, probs)
print(i, score)
scores.append(score)
print("Total ROC-AUC:", np.array(scores).mean())
In [15]:
df_ref = pd.read_csv("../eskin_data/yamamoto/throw/eskin131418286838246619.csv", index_col=0)
In [16]:
clf_xgb.fit(X, y)
feature_importance = clf_xgb.feature_importances_
In [17]:
df_imp = DataFrame(feature_importance.reshape(20,20), index=[str(x * 0.15) + "_msec" for x in range(20)], columns=df_ref.columns)
In [18]:
# show heatmap of feature importances.
plt.figure(figsize=[20,20])
sns.heatmap(df_imp)
plt.show()
In [ ]: