In [1]:
import pandas as pd
import numpy as np
COLUMNS = ['timestamp', 'xAxis', 'yAxis', 'zAxis']
STANDING = pd.read_csv('../Data/Standing_1462486804782.csv', header=None, names=COLUMNS)[:3000]
WALKING = pd.read_csv('../Data/Walking_1462487070722.csv', header=None, names=COLUMNS)[:3000]
RUNNING = pd.read_csv('../Data/Running_1462487326006.csv', header=None, names=COLUMNS)[:3000]
STAIRS = pd.read_csv('../Data/Stairs_1462487242397.csv', header=None, names=COLUMNS)[:3000]
ON_TRAIN = pd.read_csv('../Data/Train_1462518004872.csv', header=None, names=COLUMNS)[:3000]
STANDING.head()
Out[1]:
In [2]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
def plot_axis(ax, x, y, title):
ax.plot(x, y)
ax.set_title(title)
ax.xaxis.set_visible(False)
ax.set_ylim([min(y) - np.std(y), max(y) + np.std(y)])
ax.set_xlim([min(x), max(x)])
ax.grid(True)
def plot_activity(activity):
fig, (ax0, ax1, ax2) = plt.subplots(nrows=3, figsize=(15, 10), sharex=True)
plot_axis(ax0, activity['timestamp'], activity['xAxis'], 'x Axis')
plot_axis(ax1, activity['timestamp'], activity['yAxis'], 'y Axis')
plot_axis(ax2, activity['timestamp'], activity['zAxis'], 'z Axis')
plt.subplots_adjust(hspace=0.2)
plt.show()
In [3]:
plot_activity(STANDING)
In [4]:
plot_activity(WALKING)
In [5]:
plot_activity(RUNNING)
In [6]:
plot_activity(STAIRS)
In [7]:
plot_activity(ON_TRAIN)
In [8]:
import math
def magnitude(activity):
x2 = activity['xAxis'] * activity['xAxis']
y2 = activity['yAxis'] * activity['yAxis']
z2 = activity['zAxis'] * activity['zAxis']
m2 = x2 + y2 + z2
m = m2.apply(lambda x: math.sqrt(x))
return m
In [9]:
STANDING['magnitude'] = magnitude(STANDING)
WALKING['magnitude'] = magnitude(WALKING)
RUNNING['magnitude'] = magnitude(RUNNING)
STAIRS['magnitude'] = magnitude(STAIRS)
ON_TRAIN['magnitude'] = magnitude(ON_TRAIN)
In [10]:
def plot_magnitudes(activities, titles):
fig, axs = plt.subplots(nrows=len(activities), figsize=(15, 15))
for i in range(0, len(activities)):
plot_axis(axs[i], activities[i]['timestamp'], activities[i]['magnitude'], titles[i])
plt.subplots_adjust(hspace=0.2)
plt.show()
In [11]:
plot_magnitudes([STANDING, WALKING, RUNNING, STAIRS, ON_TRAIN],
['Standing', 'Walking', 'Running', 'Stairs', 'On Train'])
In [14]:
def windows(df, size=100):
start = 0
while start < df.count():
yield start, start + size
start += (size / 2)
fig, ax = plt.subplots(nrows=1, figsize=(15, 3))
plot_axis(ax, WALKING['timestamp'], WALKING['magnitude'], 'Walking')
for (start, end) in windows(WALKING['timestamp']):
ax.axvline(WALKING['timestamp'][start], color='r')
plt.show()
In [41]:
from scipy.stats import skew, kurtosis
from statsmodels.tsa import stattools
import numpy as np
import math
def jitter(axis, start, end):
j = float(0)
for i in xrange(start, min(end, axis.count())):
if start != 0:
j += abs(axis[i] - axis[i-1])
return j / (end-start)
def mean_crossing_rate(axis, start, end):
cr = 0
m = axis.mean()
for i in xrange(start, min(end, axis.count())):
if start != 0:
p = axis[i-1] > m
c = axis[i] > m
if p != c:
cr += 1
return float(cr) / (end-start-1)
def window_summary(axis, start, end):
acf = stattools.acf(axis[start:end])
acv = stattools.acovf(axis[start:end])
sqd_error = (axis[start:end] - axis[start:end].mean()) ** 2
return [
jitter(axis, start, end),
mean_crossing_rate(axis, start, end),
axis[start:end].mean(),
axis[start:end].std(),
axis[start:end].var(),
axis[start:end].min(),
axis[start:end].max(),
acf.mean(), # mean auto correlation
acf.std(), # standard deviation auto correlation
acv.mean(), # mean auto covariance
acv.std(), # standard deviation auto covariance
skew(axis[start:end]),
kurtosis(axis[start:end]),
math.sqrt(sqd_error.mean())
]
def features(activity):
for (start, end) in windows(activity['timestamp']):
features = []
for axis in ['xAxis', 'yAxis', 'zAxis', 'magnitude']:
features += window_summary(activity[axis], start, end)
yield features
In [43]:
import csv
activities = [STANDING, WALKING, RUNNING, STAIRS, ON_TRAIN]
with open('../Data/Features.csv', 'w') as out:
rows = csv.writer(out)
for i in range(0, len(activities)):
for f in features(activities[i]):
rows.writerow([i] + f)
In [45]:
dataset = np.loadtxt('../Data/Features.csv', delimiter=",")
X = dataset[:, 1:]
y = dataset[:, 0]
In [48]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.dummy import DummyClassifier
from sklearn.cross_validation import train_test_split
c = RandomForestClassifier()
b = DummyClassifier() # generates predictions by respecting the training set's class distribution
results = []
baselines = []
for i in range(0, 10):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)
c.fit(X_train, y_train)
b.fit(X_train, y_train)
res = c.score(X_test, y_test)
bas = b.score(X_test, y_test)
print 'Loop', i, res, bas
results.append(res)
baselines.append(bas)
print '\nBaseline', np.mean(baselines), np.std(baselines)
print 'Random Forest', np.mean(results), np.std(results)
In [ ]: