In [1]:
# import modules as usual
import os
import glob
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import cv2
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier
In [2]:
# path of files
path_positive_1 = '../eskin_data/yamamoto/throw/'
path_negative_1 = '../eskin_data/yamamoto/others/'
path_positive_2 = '../eskin_data/zennra/throw/'
path_negative_2 = '../eskin_data/zennra/others/'
In [3]:
# extract the moment of throwing based on accel values
def extract_action(df):
df = df.reset_index()
mom_action = int((np.argmax(abs(df.accelX))+ np.argmax(abs(df.accelY))+ np.argmax(abs(df.accelZ)))/3)
df = df.ix[mom_action-90:mom_action+90]
df.index = df.time
df.drop(["time"], axis=1, inplace=True)
return df.as_matrix()
In [4]:
def load_positive_data(path):
path = os.path.join(path, '*.csv')
files = glob.glob(path)
X_positives = []
for file_path in files:
df = pd.read_csv(file_path, index_col=0)
df = extract_action(df)
X_positives.append(df)
X_positives = np.array(X_positives)
y_positives = np.ones(len(X_positives))
return X_positives, y_positives
In [5]:
def load_negative_data(path, num_clip=100, random_state=71):
np.random.seed(random_state)
path = os.path.join(path, '*.csv')
files = glob.glob(path)
X_negatives = []
for file_path in files:
df = pd.read_csv(file_path, index_col=0)
for i in range(num_clip):
start = np.random.choice(range(len(df)-180))
df_extracted = df.iloc[start:start+180].as_matrix()
X_negatives.append(df_extracted)
X_negatives = np.array(X_negatives)
y_negatives = np.zeros(len(X_negatives))
return X_negatives, y_negatives
In [6]:
def resize_matrix(X, size = (20, 20), flatten=False):
X_resized = []
for i in range(len(X)):
X_ = X[i] /1.
X_ = cv2.resize(X_, size, interpolation = cv2.INTER_LINEAR)
if flatten == True: # True for XGBoost etc., False for CNN (Convolutional Newral Networks)
X_ = X_.ravel()
X_resized.append(X_)
X_resized = np.array(X_resized)
return X_resized
In [7]:
# 1人目(山本)のモーションデータを読み込む
X_positives_1, y_positives_1 = load_positive_data(path_positive_1)
X_negatives_1, y_negatives_1 = load_negative_data(path_negative_1, num_clip=500) # random 500 clops from negative data
# check the shape of positive data
print(X_positives_1.shape, y_positives_1.shape)
# check the shape of negative data
print(X_negatives_1.shape, y_negatives_1.shape)
X_positives_1 = resize_matrix(X_positives_1, flatten=True)
X_negatives_1 = resize_matrix(X_negatives_1, flatten=True)
X_1 = np.concatenate((X_positives_1, X_negatives_1), axis=0)
y_1 = np.concatenate((y_positives_1, y_negatives_1), axis=0)
In [8]:
# 2人目(武井氏 aka 全裸)のモーションデータを読み込む
X_positives_2, y_positives_2 = load_positive_data(path_positive_2)
X_negatives_2, y_negatives_2 = load_negative_data(path_negative_2, num_clip=500) # random 500 clops from negative data
# check the shape of positive data
print(X_positives_2.shape, y_positives_2.shape)
# check the shape of negative data
print(X_negatives_2.shape, y_negatives_2.shape)
X_positives_2 = resize_matrix(X_positives_2, flatten=True)
X_negatives_2 = resize_matrix(X_negatives_2, flatten=True)
X_2 = np.concatenate((X_positives_2, X_negatives_2), axis=0)
y_2 = np.concatenate((y_positives_2, y_negatives_2), axis=0)
In [9]:
# 山本データで学習して、武井データに対して予測を行う。AUCは0.98とかなり良好。
clf_xgb = XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.7,
gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
min_child_weight=1, missing=None, n_estimators=100, nthread=-1,
objective='binary:logistic', reg_alpha=0, reg_lambda=1,
scale_pos_weight=1, seed=0, silent=True, subsample=0.7)
clf_xgb.fit(X_1, y_1)
probs = clf_xgb.predict_proba(X_2)[:,1]
score = roc_auc_score(y_2, probs)
print(score)
In [10]:
# 武井データで学習して、山本データに対して予測を行う。AUCは0.84と落ちはしたが、まぁ悪くはない。
clf_xgb = XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.7,
gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
min_child_weight=1, missing=None, n_estimators=100, nthread=-1,
objective='binary:logistic', reg_alpha=0, reg_lambda=1,
scale_pos_weight=1, seed=0, silent=True, subsample=0.7)
clf_xgb.fit(X_2, y_2)
probs = clf_xgb.predict_proba(X_1)[:,1]
score = roc_auc_score(y_1, probs)
print(score)
In [11]:
X = np.concatenate((X_1, X_2), axis=0)
y = np.concatenate((y_1, y_2), axis=0)
In [12]:
df_ref = pd.read_csv("../eskin_data/yamamoto/throw/eskin131418286838246619.csv", index_col=0)
In [13]:
clf_xgb.fit(X, y)
feature_importance = clf_xgb.feature_importances_
In [14]:
df_imp = DataFrame(feature_importance.reshape(20,20), index=[str(x * 0.15) + "_msec" for x in range(20)], columns=df_ref.columns)
In [15]:
# show heatmap of feature importances.
plt.figure(figsize=[20,20])
sns.heatmap(df_imp)
plt.show()
In [ ]: