In [1]:
import numpy as np
from pystruct.models import ChainCRF
from pystruct.learners import FrankWolfeSSVM, NSlackSSVM,OneSlackSSVM
import argparse
import load
import split
import sys

data = load.data('../data/kasteren/2010/datasets/houseA/data.csv.gz')

def replace(l, label_map):
    return map(lambda x: label_map[int(x)], l)


def relabel(data):
    y = data['activity']

    labels = np.array(np.unique(y), dtype=int)
    labels_map = dict( zip(labels, range(len(labels))))
    y = replace(y, labels_map)
    data['activity'] = y
    return data

In [2]:
data = relabel(data)

In [4]:
trainDf, testDf, trainLens, testLens, testFrac = split.trainTest(
    data, 5400, 5400*2, testSize=0.3)

# e.g. structure of the array
# X = [np.array([  [f1],[f2],[f3 ] ... [ N days], dtype=uint8 )]
# Y = [np.array([   a, b , c])]
# splitting so that we get a fraction of the day for training the labels
X_train = np.array_split(trainDf.values[:, :trainDf.shape[1] - 2], 200)
y_train = np.array_split(trainDf.values[:, trainDf.shape[1] - 1], 200)

In [5]:
model = ChainCRF()
#ssvm = NSlackSSVM(model=model, C=.1, max_iter=11) # almost similar to FrankWolfeSSVM
ssvm = FrankWolfeSSVM(model=model, C=0.01, max_iter=11)
# c=0.2 -> 62.86 % accuracy <==> c=0.1

#ssvm = OneSlackSSVM(model=model) #doesn't work as well
ssvm.fit(X_train, y_train)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-157dafae170b> in <module>()
      8 ssvm.fit(X_train, y_train)
      9 print "Learning complete..."
---> 10 print("Test score with chain CRF: %f" % ssvm.score(X_test, y_test))

NameError: name 'X_test' is not defined
Learning complete...

In [ ]:
X_test = np.array_split(np.array(testDf.values[:, :testDf.shape[1] - 2], dtype=np.uint8), 30)
y_test = np.array_split(np.array(testDf.values[:, testDf.shape[1] - 1], dtype=np.uint8), 30)

In [ ]:
print "Learning complete..."
print("Test score with chain CRF: %f" % ssvm.score(X_test, y_test))