SmartHARP

Useful libraries for this assignment

  • numpy, for arrays
  • matplotlib, for plotting
  • sklearn, for applying Machine Learning Algorithms,reporting and plotting

In [25]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model

Loading the Training and Test data set using numpy


In [26]:
#Data Loading

#Training Data 
x_train=np.loadtxt("../data/Train/X_train.txt")
y_train=np.loadtxt("../data/Train/y_train.txt")
subjects_train=np.loadtxt("../data/Train/subject_id_train.txt")
#Test Data
x_test=np.loadtxt("../data/Test/X_test.txt")
y_test=np.loadtxt("../data/Test/y_test.txt")
subjects_test=np.loadtxt("../data/Test/subject_id_test.txt")

x_total=np.concatenate((x_train,x_test))
y_total=np.concatenate((y_train,y_test))
print "x_train:\n"
print x_train
print "y_train:\n"
print y_train
print "x_test:\n"
print x_test
print "y_test:\n"
print y_test


x_train:

[[ 0.04 -0.01 -0.04 ..., -0.84  0.18 -0.05]
 [ 0.04 -0.   -0.03 ..., -0.85  0.18 -0.05]
 [ 0.04 -0.01 -0.02 ..., -0.85  0.18 -0.04]
 ..., 
 [ 0.04 -0.    0.02 ..., -0.78  0.25  0.05]
 [ 0.04 -0.   -0.05 ..., -0.79  0.25  0.03]
 [ 0.07  0.   -0.08 ..., -0.78  0.25  0.04]]
y_train:

[ 5.  5.  5. ...,  2.  2.  2.]
x_test:

[[ 0.03 -0.01  0.04 ..., -0.72  0.28 -0.05]
 [ 0.04  0.   -0.03 ..., -0.7   0.28 -0.08]
 [ 0.04 -0.01 -0.03 ..., -0.7   0.28 -0.07]
 ..., 
 [ 0.07  0.04 -0.02 ..., -0.66  0.27  0.19]
 [ 0.02  0.03 -0.01 ..., -0.66  0.26  0.19]
 [-0.01 -0.   -0.04 ..., -0.66  0.26  0.19]]
y_test:

[ 5.  5.  5. ...,  2.  2.  2.]

Plots for Activity patterns

Standing:


In [4]:
%matplotlib inline
#Standing
plt.plot([k*10000 for k in range(len(x_test[0]))],x_test[0])
plt.plot([k*10000 for k in range(len(x_test[1]))],x_test[1])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_test[2]))],x_test[2])
plt.plot([k*10000 for k in range(len(x_test[3]))],x_test[3])
plt.show()

plt.plot([k*10000 for k in range(len(x_train[0]))],x_train[0])
plt.plot([k*10000 for k in range(len(x_train[1]))],x_train[1])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_train[2]))],x_train[2])
plt.plot([k*10000 for k in range(len(x_train[3]))],x_train[3])
plt.show()


Sitting:


In [5]:
#Sitting
plt.plot([k*10000 for k in range(len(x_test[17]))],x_test[17])
plt.plot([k*10000 for k in range(len(x_test[18]))],x_test[18])
plt.plot([k*10000 for k in range(len(x_test[19]))],x_test[19])
plt.plot([k*10000 for k in range(len(x_test[20]))],x_test[20])
plt.show()

plt.plot([k*10000 for k in range(len(x_train[30]))],x_train[30])
plt.plot([k*10000 for k in range(len(x_train[31]))],x_train[31])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_train[32]))],x_train[32])
plt.plot([k*10000 for k in range(len(x_train[33]))],x_train[33])
plt.show()


STAND_TO_LIE:


In [6]:
#STAND_TO_LIE
plt.plot([k*10000 for k in range(len(x_test[47]))],x_test[47])
plt.plot([k*10000 for k in range(len(x_test[48]))],x_test[48])
plt.plot([k*10000 for k in range(len(x_test[49]))],x_test[49])
plt.plot([k*10000 for k in range(len(x_test[50]))],x_test[50])
plt.show()

plt.plot([k*10000 for k in range(len(x_train[3882]))],x_train[3882])
plt.plot([k*10000 for k in range(len(x_train[3883]))],x_train[3883])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_train[3884]))],x_train[3884])
plt.plot([k*10000 for k in range(len(x_train[4068]))],x_train[4068])
plt.show()


Laying:


In [7]:
#LAYING
plt.plot([k*10000 for k in range(len(x_test[52]))],x_test[52])
plt.plot([k*10000 for k in range(len(x_test[53]))],x_test[53])
plt.plot([k*10000 for k in range(len(x_test[54]))],x_test[54])
plt.plot([k*10000 for k in range(len(x_test[55]))],x_test[55])
plt.show()

plt.plot([k*10000 for k in range(len(x_train[60]))],x_train[60])
plt.plot([k*10000 for k in range(len(x_train[61]))],x_train[61])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_train[62]))],x_train[62])
plt.plot([k*10000 for k in range(len(x_train[63]))],x_train[63])
plt.show()


LIE_TO_SIT:


In [8]:
#LIE_TO_SIT
plt.plot([k*10000 for k in range(len(x_test[63]))],x_test[63])
plt.plot([k*10000 for k in range(len(x_test[64]))],x_test[64])
plt.plot([k*10000 for k in range(len(x_test[65]))],x_test[65])
plt.plot([k*10000 for k in range(len(x_test[221]))],x_test[221])
plt.show()

plt.plot([k*10000 for k in range(len(x_train[3112]))],x_train[3112])
plt.plot([k*10000 for k in range(len(x_train[3500]))],x_train[3500])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_train[3501]))],x_train[3501])
plt.plot([k*10000 for k in range(len(x_train[3700]))],x_train[3700])
plt.show()


SIT_TO_LIE:


In [9]:
#SIT_TO_LIE
plt.plot([k*10000 for k in range(len(x_test[76]))],x_test[76])
plt.plot([k*10000 for k in range(len(x_test[77]))],x_test[77])
plt.plot([k*10000 for k in range(len(x_test[233]))],x_test[233])
plt.plot([k*10000 for k in range(len(x_test[401]))],x_test[401])
plt.show()

plt.plot([k*10000 for k in range(len(x_train[2595]))],x_train[2595])
plt.plot([k*10000 for k in range(len(x_train[2762]))],x_train[2762])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_train[2763]))],x_train[2763])
plt.plot([k*10000 for k in range(len(x_train[2945]))],x_train[2945])
plt.show()


LIE_TO_STAND:


In [10]:
#LIE_TO_STAND
plt.plot([k*10000 for k in range(len(x_test[92]))],x_test[92])
plt.plot([k*10000 for k in range(len(x_test[248]))],x_test[248])
plt.plot([k*10000 for k in range(len(x_test[417]))],x_test[417])
plt.plot([k*10000 for k in range(len(x_test[584]))],x_test[584])
plt.show()

plt.plot([k*10000 for k in range(len(x_train[4350]))],x_train[4350])
plt.plot([k*10000 for k in range(len(x_train[4351]))],x_train[4351])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_train[4742]))],x_train[4742])
plt.plot([k*10000 for k in range(len(x_train[4743]))],x_train[4743])
plt.show()


WALKING:


In [11]:
#WALKING
plt.plot([k*10000 for k in range(len(x_test[93]))],x_test[93])
plt.plot([k*10000 for k in range(len(x_test[94]))],x_test[94])
plt.plot([k*10000 for k in range(len(x_test[95]))],x_test[95])
plt.plot([k*10000 for k in range(len(x_test[96]))],x_test[96])
plt.show()

plt.plot([k*10000 for k in range(len(x_train[80]))],x_train[80])
plt.plot([k*10000 for k in range(len(x_train[81]))],x_train[81])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_train[82]))],x_train[82])
plt.plot([k*10000 for k in range(len(x_train[83]))],x_train[83])
plt.show()


WALKING_UPSTAIRS:


In [12]:
#WALKING_UPSTAIRS
plt.plot([k*10000 for k in range(len(x_test[132]))],x_test[132])
plt.plot([k*10000 for k in range(len(x_test[133]))],x_test[133])
plt.plot([k*10000 for k in range(len(x_test[134]))],x_test[134])
plt.plot([k*10000 for k in range(len(x_test[135]))],x_test[135])
plt.show()

plt.plot([k*10000 for k in range(len(x_train[160]))],x_train[160])
plt.plot([k*10000 for k in range(len(x_train[161]))],x_train[161])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_train[162]))],x_train[162])
plt.plot([k*10000 for k in range(len(x_train[163]))],x_train[163])
plt.show()


WALKING_DOWNSTAIRS:


In [13]:
#WALKING_DOWNSTAIRS
plt.plot([k*10000 for k in range(len(x_test[123]))],x_test[123])
plt.plot([k*10000 for k in range(len(x_test[124]))],x_test[124])
plt.plot([k*10000 for k in range(len(x_test[125]))],x_test[125])
plt.plot([k*10000 for k in range(len(x_test[126]))],x_test[126])
plt.show()

plt.plot([k*10000 for k in range(len(x_train[130]))],x_train[130])
plt.plot([k*10000 for k in range(len(x_train[131]))],x_train[131])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_train[132]))],x_train[132])
plt.plot([k*10000 for k in range(len(x_train[133]))],x_train[133])
plt.show()


STAND_TO_SIT:


In [14]:
#STAND_TO_SIT
plt.plot([k*10000 for k in range(len(x_test[183]))],x_test[183])
plt.plot([k*10000 for k in range(len(x_test[340]))],x_test[340])
plt.plot([k*10000 for k in range(len(x_test[341]))],x_test[341])
plt.plot([k*10000 for k in range(len(x_test[515]))],x_test[515])
plt.show()

plt.plot([k*10000 for k in range(len(x_train[2360]))],x_train[2360])
plt.plot([k*10000 for k in range(len(x_train[2361]))],x_train[2361])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_train[2362]))],x_train[2362])
plt.plot([k*10000 for k in range(len(x_train[2534]))],x_train[2534])
plt.show()


SIT_TO_STAND:


In [15]:
#SIT_TO_STAND
plt.plot([k*10000 for k in range(len(x_test[30]))],x_test[30])
plt.plot([k*10000 for k in range(len(x_test[195]))],x_test[195])
plt.plot([k*10000 for k in range(len(x_test[354]))],x_test[354])
plt.plot([k*10000 for k in range(len(x_test[530]))],x_test[530])
plt.show()

plt.plot([k*10000 for k in range(len(x_train[2595]))],x_train[2595])
plt.plot([k*10000 for k in range(len(x_train[2762]))],x_train[2762])
#plt.plot([k*10000 for k in range(len(x_test[329]))],x_test[329])
plt.plot([k*10000 for k in range(len(x_train[2763]))],x_train[2763])
plt.plot([k*10000 for k in range(len(x_train[2945]))],x_train[2945])
plt.show()


Performance Metrics

Precision

Precision (also called positive predictive value) is the fraction of retrieved instances that are relevant.

Recall

Recall (also known as sensitivity) is the fraction of relevant instances that are retrieved.

F1-Score

In statistical analysis of binary classification, the F1 score (also F-score or F-measure) is a measure of a test's accuracy.

Confusion matrix

  • Used to describe the performance of a classification model on a set of test data for which the true values are known.
  • Visualizes the precision of model for each class in test data.

    ###Why the Confusion? Accuracy is not a reliable metric for the real performance of a classifier, because it will yield misleading results if the data set is unbalanced (that is, when the number of samples in different classes vary greatly)


In [16]:
#plt.cm.Blues
def plot_confusion_matrix(cm, title='Confusion matrix', cmap="Set3"):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(target_names))
    plt.xticks(tick_marks, target_names, rotation=70)
    plt.yticks(tick_marks, target_names)
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

Classifiers

Logistic Regression

In statistics, logistic regression is a regression model where the dependent variable (DV) is categorical.


In [17]:
#Logistic Regression
from sklearn.metrics import confusion_matrix, classification_report,f1_score,accuracy_score
from sklearn.linear_model import LogisticRegression
labels=[1,2,3,4,5,6,7,8,9,10,11,12]
target_names=['WALKING','WALKING_UPSTAIRS','WALKING_DOWNSTAIRS','SITTING','STANDING','LAYING','STAND_TO_SIT','SIT_TO_STAND','SIT_TO_LIE','LIE_TO_SIT','STAND_TO_LIE','LIE_TO_STAND']
logReg=LogisticRegression()
logReg.fit(x_train,y_train)
yLogRegPredict=logReg.predict(x_test)
clf_rpt=classification_report(y_test,yLogRegPredict,target_names=target_names)
print clf_rpt
fLogReg=f1_score(y_test,yLogRegPredict,labels=labels,average=None)
print fLogReg
accuracyLogReg=accuracy_score(y_test,yLogRegPredict)
print accuracyLogReg

# Compute confusion matrix for Logistic Regression
cm = confusion_matrix(y_test,yLogRegPredict)
np.set_printoptions(precision=2)

plt.figure()
plot_confusion_matrix(cm,title='Confusion matrix for Logistic Regression')

# Normalize the confusion matrix by row (i.e by the number of samples
# in each class)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

plt.figure()
plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix for Logistic Regression')

plt.show()
print('Confusion matrix, without normalization')
print(cm)
print('Normalized confusion matrix')
print(cm_normalized)


                    precision    recall  f1-score   support

           WALKING       0.93      0.99      0.96       496
  WALKING_UPSTAIRS       0.95      0.94      0.95       471
WALKING_DOWNSTAIRS       0.99      0.97      0.98       420
           SITTING       0.96      0.88      0.92       508
          STANDING       0.90      0.97      0.93       556
            LAYING       1.00      1.00      1.00       545
      STAND_TO_SIT       1.00      0.70      0.82        23
      SIT_TO_STAND       1.00      1.00      1.00        10
        SIT_TO_LIE       0.71      0.75      0.73        32
        LIE_TO_SIT       0.81      0.84      0.82        25
      STAND_TO_LIE       0.72      0.63      0.67        49
      LIE_TO_STAND       0.77      0.63      0.69        27

       avg / total       0.95      0.95      0.94      3162

[ 0.9600779   0.94533762  0.97949337  0.91803279  0.93483927  0.99908341
  0.82051282  1.          0.72727273  0.82352941  0.67391304  0.69387755]
0.945604048071
Confusion matrix, without normalization
[[493   0   3   0   0   0   0   0   0   0   0   0]
 [ 30 441   0   0   0   0   0   0   0   0   0   0]
 [  4  10 406   0   0   0   0   0   0   0   0   0]
 [  0   5   0 448  55   0   0   0   0   0   0   0]
 [  2   0   0  16 538   0   0   0   0   0   0   0]
 [  0   0   0   0   0 545   0   0   0   0   0   0]
 [  0   2   0   2   2   0  16   0   0   0   1   0]
 [  0   0   0   0   0   0   0  10   0   0   0   0]
 [  0   0   0   0   0   0   0   0  24   0   7   1]
 [  0   0   0   0   0   0   0   0   0  21   0   4]
 [  2   3   0   2   0   1   0   0  10   0  31   0]
 [  0   1   0   0   0   0   0   0   0   5   4  17]]
Normalized confusion matrix
[[ 0.99  0.    0.01  0.    0.    0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.06  0.94  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.01  0.02  0.97  0.    0.    0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.01  0.    0.88  0.11  0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.03  0.97  0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.    0.    1.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.09  0.    0.09  0.09  0.    0.7   0.    0.    0.    0.04  0.  ]
 [ 0.    0.    0.    0.    0.    0.    0.    1.    0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.    0.    0.    0.    0.    0.75  0.    0.22  0.03]
 [ 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.84  0.    0.16]
 [ 0.04  0.06  0.    0.04  0.    0.02  0.    0.    0.2   0.    0.63  0.  ]
 [ 0.    0.04  0.    0.    0.    0.    0.    0.    0.    0.19  0.15  0.63]]

Random Forest

Random forests is a notion of the general technique of random decision forests that are an ensemble learning method for classification, regression and other tasks, that operate by constructing a multitude of decision trees at training time and outputting the class that is the mode of the classes (classification) or mean prediction (regression) of the individual trees. Random decision forests correct for decision trees' habit of overfitting to their training set.


In [18]:
#Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
labels=[1,2,3,4,5,6,7,8,9,10,11,12]
target_names=['WALKING','WALKING_UPSTAIRS','WALKING_DOWNSTAIRS','SITTING','STANDING','LAYING','STAND_TO_SIT','SIT_TO_STAND','SIT_TO_LIE','LIE_TO_SIT','STAND_TO_LIE','LIE_TO_STAND']
randForest=RandomForestClassifier()
randForest.fit(x_train,y_train)
yRandForestPredict=randForest.predict(x_test)
clf_rpt=classification_report(y_test,yRandForestPredict,target_names=target_names)
print clf_rpt
fRandForest=f1_score(y_test,yRandForestPredict,labels=labels,average=None)
print fRandForest
accuracyRandForest=accuracy_score(y_test,yRandForestPredict)
print accuracyRandForest

# Compute confusion matrix for Random Forest
cm = confusion_matrix(y_test,yRandForestPredict)
np.set_printoptions(precision=2)

plt.figure()
plot_confusion_matrix(cm,title='Confusion matrix for Random Forest')

# Normalize the confusion matrix by row (i.e by the number of samples
# in each class)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

plt.figure()
plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix for Random Forest')

plt.show()
print('Confusion matrix, without normalization')
print(cm)
print('Normalized confusion matrix')
print(cm_normalized)


                    precision    recall  f1-score   support

           WALKING       0.82      0.96      0.89       496
  WALKING_UPSTAIRS       0.86      0.81      0.84       471
WALKING_DOWNSTAIRS       0.94      0.85      0.89       420
           SITTING       0.89      0.91      0.90       508
          STANDING       0.92      0.91      0.92       556
            LAYING       1.00      1.00      1.00       545
      STAND_TO_SIT       0.73      0.70      0.71        23
      SIT_TO_STAND       0.75      0.90      0.82        10
        SIT_TO_LIE       0.53      0.50      0.52        32
        LIE_TO_SIT       0.54      0.52      0.53        25
      STAND_TO_LIE       0.60      0.53      0.57        49
      LIE_TO_STAND       0.56      0.52      0.54        27

       avg / total       0.89      0.89      0.89      3162

[ 0.89  0.84  0.89  0.9   0.92  1.    0.71  0.82  0.52  0.53  0.57  0.54]
0.891524351676
Confusion matrix, without normalization
[[474  15   7   0   0   0   0   0   0   0   0   0]
 [ 75 382  14   0   0   0   0   0   0   0   0   0]
 [ 25  38 357   0   0   0   0   0   0   0   0   0]
 [  0   0   0 464  42   0   2   0   0   0   0   0]
 [  0   0   0  50 505   0   0   0   1   0   0   0]
 [  0   0   0   0   0 543   0   0   0   0   0   2]
 [  0   2   0   2   0   0  16   1   0   0   2   0]
 [  0   0   0   1   0   0   0   9   0   0   0   0]
 [  0   0   0   0   0   0   0   1  16   0  15   0]
 [  0   0   0   1   0   0   0   1   1  13   0   9]
 [  1   4   1   2   0   0   3   0  12   0  26   0]
 [  0   1   0   0   0   0   1   0   0  11   0  14]]
Normalized confusion matrix
[[ 0.96  0.03  0.01  0.    0.    0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.16  0.81  0.03  0.    0.    0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.06  0.09  0.85  0.    0.    0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.91  0.08  0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.09  0.91  0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.    0.    1.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.09  0.    0.09  0.    0.    0.7   0.04  0.    0.    0.09  0.  ]
 [ 0.    0.    0.    0.1   0.    0.    0.    0.9   0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.    0.    0.    0.    0.03  0.5   0.    0.47  0.  ]
 [ 0.    0.    0.    0.04  0.    0.    0.    0.04  0.04  0.52  0.    0.36]
 [ 0.02  0.08  0.02  0.04  0.    0.    0.06  0.    0.24  0.    0.53  0.  ]
 [ 0.    0.04  0.    0.    0.    0.    0.04  0.    0.    0.41  0.    0.52]]

k-NN

In pattern recognition, the k-Nearest Neighbors algorithm (or k-NN for short) is a non-parametric method used for classification and regression. In both cases, the input consists of the k closest training examples in the feature space.


In [19]:
#KNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
labels=[1,2,3,4,5,6,7,8,9,10,11,12]
target_names=['WALKING','WALKING_UPSTAIRS','WALKING_DOWNSTAIRS','SITTING','STANDING','LAYING','STAND_TO_SIT','SIT_TO_STAND','SIT_TO_LIE','LIE_TO_SIT','STAND_TO_LIE','LIE_TO_STAND']
knn=KNeighborsClassifier(n_neighbors=11)
knn.fit(x_train,y_train)
yKnnPredict=knn.predict(x_test)
clf_rpt=classification_report(y_test,yKnnPredict,target_names=target_names)
print clf_rpt
fKnn=f1_score(y_test,yKnnPredict,labels=labels,average=None)
print fKnn
accuracyKnn=accuracy_score(y_test,yKnnPredict)
print accuracyKnn

# Compute confusion matrix for K Nearest Neighbours
cm = confusion_matrix(y_test,yKnnPredict)
np.set_printoptions(precision=2)

plt.figure()
plot_confusion_matrix(cm,title='Confusion matrix for K Nearest Neighbours')

# Normalize the confusion matrix by row (i.e by the number of samples
# in each class)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

plt.figure()
plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix for K Nearest Neighbours')

plt.show()
print('Confusion matrix, without normalization')
print(cm)
print('Normalized confusion matrix')
print(cm_normalized)


                    precision    recall  f1-score   support

           WALKING       0.85      0.98      0.91       496
  WALKING_UPSTAIRS       0.87      0.92      0.89       471
WALKING_DOWNSTAIRS       0.96      0.78      0.86       420
           SITTING       0.92      0.79      0.85       508
          STANDING       0.83      0.94      0.88       556
            LAYING       1.00      0.99      1.00       545
      STAND_TO_SIT       0.89      0.70      0.78        23
      SIT_TO_STAND       1.00      0.90      0.95        10
        SIT_TO_LIE       0.62      0.91      0.73        32
        LIE_TO_SIT       0.69      0.80      0.74        25
      STAND_TO_LIE       0.82      0.47      0.60        49
      LIE_TO_STAND       0.78      0.52      0.62        27

       avg / total       0.90      0.89      0.89      3162

[ 0.91  0.89  0.86  0.85  0.88  1.    0.78  0.95  0.73  0.74  0.6   0.62]
0.890575585073
Confusion matrix, without normalization
[[487   1   8   0   0   0   0   0   0   0   0   0]
 [ 36 431   4   0   0   0   0   0   0   0   0   0]
 [ 47  47 326   0   0   0   0   0   0   0   0   0]
 [  0   4   0 400 104   0   0   0   0   0   0   0]
 [  0   2   0  33 521   0   0   0   0   0   0   0]
 [  0   1   0   2   2 540   0   0   0   0   0   0]
 [  1   4   0   2   0   0  16   0   0   0   0   0]
 [  0   0   0   0   0   0   1   9   0   0   0   0]
 [  0   0   0   0   0   0   0   0  29   0   3   0]
 [  0   0   0   0   0   0   0   0   1  20   0   4]
 [  2   6   0   0   1   0   1   0  16   0  23   0]
 [  1   0   0   0   0   0   0   0   1   9   2  14]]
Normalized confusion matrix
[[ 0.98  0.    0.02  0.    0.    0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.08  0.92  0.01  0.    0.    0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.11  0.11  0.78  0.    0.    0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.01  0.    0.79  0.2   0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.06  0.94  0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.    0.    0.99  0.    0.    0.    0.    0.    0.  ]
 [ 0.04  0.17  0.    0.09  0.    0.    0.7   0.    0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.    0.    0.    0.1   0.9   0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.    0.    0.    0.    0.    0.91  0.    0.09  0.  ]
 [ 0.    0.    0.    0.    0.    0.    0.    0.    0.04  0.8   0.    0.16]
 [ 0.04  0.12  0.    0.    0.02  0.    0.02  0.    0.33  0.    0.47  0.  ]
 [ 0.04  0.    0.    0.    0.    0.    0.    0.    0.04  0.33  0.07  0.52]]

AdaBoost

AdaBoost, short for "Adaptive Boosting", is a machine learning meta-algorithm. It can be used in conjunction with many other types of learning algorithms to improve their performance. The output of the other learning algorithms ('weak learners') is combined into a weighted sum that represents the final output of the boosted classifier. AdaBoost is adaptive in the sense that subsequent weak learners are tweaked in favor of those instances misclassified by previous classifiers. AdaBoost is sensitive to noisy data and outliers. In some problems, however, it can be less susceptible to the overfitting problem than other learning algorithms. The individual learners can be weak, but as long as the performance of each one is slightly better than random guessing (e.g., their error rate is smaller than 0.5 for binary classification), the final model can be proven to converge to a strong learner


In [21]:
#AdaBoostClassifier
import warnings
warnings.filterwarnings('ignore')
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
labels=[1,2,3,4,5,6,7,8,9,10,11,12]
target_names=['WALKING','WALKING_UPSTAIRS','WALKING_DOWNSTAIRS','SITTING','STANDING','LAYING','STAND_TO_SIT','SIT_TO_STAND','SIT_TO_LIE','LIE_TO_SIT','STAND_TO_LIE','LIE_TO_STAND']
adaBoost=AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=100,
    learning_rate=1)
adaBoost.fit(x_train,y_train)
yAdaBoostPredict=adaBoost.predict(x_test)
clf_rpt=classification_report(y_test,yAdaBoostPredict,target_names=target_names)
print clf_rpt
fAdaBoost=f1_score(y_test,yAdaBoostPredict,labels=labels,average=None)
print fAdaBoost
accuracyAdaBoost=accuracy_score(y_test,yAdaBoostPredict)
print accuracyAdaBoost

# Compute confusion matrix for Adaptive Boosting
cm = confusion_matrix(y_test,yAdaBoostPredict)
np.set_printoptions(precision=2)

plt.figure()
plot_confusion_matrix(cm,title='Confusion matrix for Adaptive Boosting')

# Normalize the confusion matrix by row (i.e by the number of samples
# in each class)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

plt.figure()
plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix for Adaptive Boosting')

plt.show()
print('Confusion matrix, without normalization')
print(cm)
print('Normalized confusion matrix')
print(cm_normalized)


                    precision    recall  f1-score   support

           WALKING       0.49      0.31      0.38       496
  WALKING_UPSTAIRS       0.60      0.46      0.52       471
WALKING_DOWNSTAIRS       0.47      0.79      0.59       420
           SITTING       0.83      0.73      0.78       508
          STANDING       0.79      0.87      0.82       556
            LAYING       1.00      0.99      1.00       545
      STAND_TO_SIT       0.06      0.04      0.05        23
      SIT_TO_STAND       0.53      0.90      0.67        10
        SIT_TO_LIE       0.00      0.00      0.00        32
        LIE_TO_SIT       0.60      0.36      0.45        25
      STAND_TO_LIE       0.41      0.80      0.54        49
      LIE_TO_STAND       0.50      0.67      0.57        27

       avg / total       0.69      0.69      0.68      3162

[ 0.38  0.52  0.59  0.78  0.82  1.    0.05  0.67  0.    0.45  0.54  0.57]
0.686907020873
Confusion matrix, without normalization
[[155  95 241   0   0   0   5   0   0   0   0   0]
 [119 215 131   0   0   0   5   0   0   0   1   0]
 [ 41  48 331   0   0   0   0   0   0   0   0   0]
 [  0   0   0 373 128   0   1   0   0   0   5   1]
 [  0   0   1  71 482   0   0   0   0   0   2   0]
 [  0   0   0   0   2 540   0   0   0   0   3   0]
 [  0   0   0   1   0   0   1   0   0   0  19   2]
 [  0   0   0   1   0   0   0   9   0   0   0   0]
 [  0   0   0   4   0   0   0   2   0   0  26   0]
 [  0   0   0   0   0   0   0   1   0   9   0  15]
 [  0   0   0   2   1   0   4   3   0   0  39   0]
 [  0   0   0   0   0   0   0   2   0   6   1  18]]
Normalized confusion matrix
[[ 0.31  0.19  0.49  0.    0.    0.    0.01  0.    0.    0.    0.    0.  ]
 [ 0.25  0.46  0.28  0.    0.    0.    0.01  0.    0.    0.    0.    0.  ]
 [ 0.1   0.11  0.79  0.    0.    0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.73  0.25  0.    0.    0.    0.    0.    0.01  0.  ]
 [ 0.    0.    0.    0.13  0.87  0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.    0.    0.99  0.    0.    0.    0.    0.01  0.  ]
 [ 0.    0.    0.    0.04  0.    0.    0.04  0.    0.    0.    0.83  0.09]
 [ 0.    0.    0.    0.1   0.    0.    0.    0.9   0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.12  0.    0.    0.    0.06  0.    0.    0.81  0.  ]
 [ 0.    0.    0.    0.    0.    0.    0.    0.04  0.    0.36  0.    0.6 ]
 [ 0.    0.    0.    0.04  0.02  0.    0.08  0.06  0.    0.    0.8   0.  ]
 [ 0.    0.    0.    0.    0.    0.    0.    0.07  0.    0.22  0.04  0.67]]

F-Measure

  • F-measure (based on precision and recall) is an estimate of how accurate a classifier is.
  • F-score is often introduced as harmonic mean of precision and recall
  • Plot a histogram of F-scores of all the classifiers for each activity

In [46]:
#Histogram
n_activities=12
fig, ax = plt.subplots()
index = np.arange(n_activities)
bar_width = 0.15
opacity = 0.4
error_config = {'ecolor': '0.3'}
rects1 = plt.bar(index+ bar_width, fLogReg, bar_width,
                 alpha=opacity,
                 color='b',
                 label='Logistic Regression')

rects2 = plt.bar(index + 2*bar_width, fRandForest, bar_width,
                 alpha=opacity,
                 color='r',
                 label='Random Forest')

rects3 = plt.bar(index + 4*bar_width, fKnn, bar_width,
                 alpha=opacity,
                 color='y',
                 label='K Nearest Neighbours')

rects4 = plt.bar(index + 3*bar_width, fAdaBoost, bar_width,
                 alpha=opacity,
                 color='g',
                 label='Adaptive Boosting')


plt.xlabel('Activity')
plt.ylabel('F-Measure')
plt.title('F-Scores for Classifiers')
plt.xticks(index +bar_width, ('WALKING','WALKING_UPSTAIRS','WALKING_DOWNSTAIRS','SITTING','STANDING','LAYING','STAND_TO_SIT','SIT_TO_STAND','SIT_TO_LIE','LIE_TO_SIT','STAND_TO_LIE','LIE_TO_STAND'), rotation=70)
plt.tight_layout()
plt.legend(bbox_to_anchor=(1.8,1.2))
plt.show()


Conclusion:

Logistic regression(purple) proves to be efficient because of the high f-measure in all the activities