In [10]:
%load_ext autoreload
%autoreload 2
import h5py
print("reading hogs..")
with h5py.File('/media/data/models/daimler_hogs.h5', 'r') as h5f:
    train_pedestrian_hogs=h5f['pedestrians'][:]
    train_non_pedestrian_hogs=h5f['non_pedestrians'][:]

print("Done")
print("Pedestrian hogs shape:",train_pedestrian_hogs.shape)
print("Non-pedestrian hogs shape:",train_non_pedestrian_hogs.shape)


reading hogs..
Done
Pedestrian hogs shape: (15660, 3240)
Non-pedestrian hogs shape: (26976, 3240)

In [11]:
from sklearn.model_selection import train_test_split

print("Formatting data for training...")
n_positive=train_pedestrian_hogs.shape[0]
n_negative=train_non_pedestrian_hogs.shape[0]
import numpy as np
x=np.vstack([train_non_pedestrian_hogs,train_pedestrian_hogs])
del train_pedestrian_hogs
del train_non_pedestrian_hogs
y=np.hstack([np.zeros(n_negative),np.ones(n_positive)])

validation_subset_percent=0.10
x_train, x_val, y_train, y_val = train_test_split(x, y,stratify=y,test_size=validation_subset_percent)
print("Done")


print("Train labels array shape",y_train.shape)
print("Train hogs array shape",x_train.shape)
print("Val labels array shape",y_val.shape)
print("Val hogs array shape",x_val.shape)


Formatting data for training...
Done
Train labels array shape (38372,)
Train hogs array shape (38372, 3240)
Val labels array shape (4264,)
Val hogs array shape (4264, 3240)

In [9]:
from sklearn.metrics import classification_report
from sklearn import svm

clf = svm.LinearSVC(C=200)
print("Training..")
clf.fit(x_train, y_train)  
print("Done")


Training..
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-5e44bd60b643> in <module>()
      4 clf = svm.LinearSVC(C=200)
      5 print("Training..")
----> 6 clf.fit(x_train, y_train)
      7 print("Done")
      8 

NameError: name 'x_train' is not defined

In [ ]:
model_filepath='hog_svm_daimler_model.pkl'
print("Saving model to disk...")
from sklearn.externals import joblib
joblib.dump(clf, model_filepath)
print("Saved to %s." % model_filepath)

In [ ]:
print("Evaluating..")
print("Training set:")
predicted_train = clf.predict(x_train)
print(classification_report(y_train,predicted_train))

print("Validation set:")
predicted_val = clf.predict(x_val)
print(classification_report(y_val,predicted_val))