notebook.community

Edit and run



In [1]:

    
import sys
print('Python: {}'.format(sys.version))
# scipy
import scipy
print('scipy: {}'.format(scipy.__version__))
# numpy
import numpy
print('numpy: {}'.format(numpy.__version__))
# matplotlib
import matplotlib
print('matplotlib: {}'.format(matplotlib.__version__))
# pandas
import pandas
print('pandas: {}'.format(pandas.__version__))
# scikit-learn
import sklearn
print('sklearn: {}'.format(sklearn.__version__))









    



Python: 2.7.12 |Anaconda 4.2.0 (64-bit)| (default, Jun 29 2016, 11:07:13) [MSC v.1500 64 bit (AMD64)]
scipy: 0.18.1
numpy: 1.11.1
matplotlib: 1.5.3
pandas: 0.18.1
sklearn: 0.17.1



In [2]:

    
import pandas
from pandas.tools.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import KFold
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

# Following http://machinelearningmastery.com/machine-learning-in-python-step-by-step/



In [4]:

    
names = ['direction', 'pitch', 'roll', 'yaw']
dataset = pandas.read_csv("../rawsample-munged.csv", names=names, header=1)



In [5]:

    
# shape
print(dataset.shape)



In [6]:

    
# head
print(dataset.head(20))









    



   direction  pitch  roll  yaw
0        Top      1     4  -27
1        Top      3    -4  -28
2        Top      3   -15  -28
3        Top      2   -24  -27
4        Top      3   -27  -29
5        Top      3   -29  -30
6        Top      5   -29  -30
7        Top      5   -28  -30
8        Top      5   -27  -29
9      Right      6   -24  -28
10     Right      6   -23  -28
11     Right      5   -22  -27
12     Right      4   -18  -26
13     Right      4   -18  -26
14     Right      4   -17  -25
15     Right      4   -16  -25
16     Right      4   -15  -25
17     Right     -2   -13  -24
18     Right    -25    -2  -20
19    Bottom    -29     3  -19



In [7]:

    
# descriptions
print(dataset.describe())









    



             pitch         roll          yaw
count  1799.000000  1799.000000  1799.000000
mean     11.810450     5.959978   -23.665926
std      27.552805    40.106540    15.837892
min     -44.000000  -164.000000  -132.000000
25%      -9.000000   -17.000000   -34.000000
50%      13.000000     8.000000   -24.000000
75%      37.000000    31.000000   -13.000000
max      67.000000   144.000000    33.000000



In [8]:

    
# class distribution
print(dataset.groupby('direction').size())









    



direction
Bottom    450
Left      450
Right     450
Top       449
dtype: int64



In [9]:

    
# box and whisker plots
dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
plt.show()



In [10]:

    
# histograms
dataset.hist()
plt.show()



In [11]:

    
# scatter plot matrix
scatter_matrix(dataset)
plt.show()



In [13]:

    
array = dataset.values
X = array[:,1:]
Y = array[:,0]

validation_size = 0.20
seed = 7

X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size=validation_size, random_state=seed)



In [14]:

    
print array[:,1:] # Lables
print array[:,0]









    



[[1L 4L -27L]
 [3L -4L -28L]
 [3L -15L -28L]
 ..., 
 [46L 0L -30L]
 [46L 0L -30L]
 [46L -1L -30L]]
['Top' 'Top' 'Top' ..., 'Left' 'Left' 'Left']



In [15]:

    
# Split-out validation dataset
array = dataset.values
X = array[:,1:]
Y = array[:,0]
validation_size = 0.20
seed = 7
X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size=validation_size, random_state=seed)



In [16]:

    
# Test options and evaluation metric
seed = 7
scoring = 'accuracy'



In [18]:

    
# Spot Check Algorithms
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))
# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold = KFold(n=10, random_state=seed)
    cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)









    



LR: 0.583333 (0.180021)
LDA: 0.472222 (0.335640)
KNN: 0.166667 (0.235702)
CART: 0.388889 (0.078567)
NB: 0.083333 (0.117851)
SVM: 0.083333 (0.117851)



In [7]:

    
from sklearn.externals import joblib
clf = joblib.load('../filename.pkl')



In [17]:

    
target_names = [u'Top', u'Right', u'Bottom', u'Left']



In [24]:

    
target_names[clf.predict([-13,-10,3])]









    Out[24]:





u'Right'



In [ ]: