notebook.community

Edit and run



In [1]:

    
import numpy as np
import itertools
import sklearn.metrics  as metrics
from scipy.stats import zscore
from sklearn.preprocessing import normalize
import sklearn.cross_validation as cv
%load_ext autoreload
%autoreload 2
%aimport utils
from LogisticRegressionClassifier import LogisticRegressionClassifier



In [2]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib






    



WARNING: pylab import has clobbered these variables: ['normalize']
`%matplotlib` prevents importing * from pylab and numpy



In [ ]:



In [3]:

    
fft_dict, fft_labels, ffts = utils.read_features(feature='fft')
fft_dict









    Out[3]:





{'blues': 5,
 'classical': 6,
 'country': 8,
 'disco': 9,
 'hiphop': 7,
 'jazz': 2,
 'metal': 0,
 'pop': 1,
 'reggae': 4,
 'rock': 3}



In [5]:

    
lrc_fft = LogisticRegressionClassifier(ffts, fft_labels, fft_dict)



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [7]:

    
lrc_fft.cross_validate(3)









    



Training cross validation round 0
----------------------------------
Step 0: Error: -1.000000 updating learning rate: 0.001000
Final Step 1000: Error: 0.105710 
 Learn rate: 0.001000
classification report 
----------------------------------
             precision    recall  f1-score   support

      metal       0.61      0.70      0.65        40
        pop       0.53      0.41      0.47        41
       jazz       0.24      0.23      0.23        31
       rock       0.24      0.27      0.25        33
     reggae       0.23      0.29      0.25        28
      blues       0.32      0.28      0.30        32
  classical       0.37      0.91      0.53        23
     hiphop       0.27      0.17      0.21        36
    country       0.26      0.17      0.20        36
      disco       0.38      0.26      0.31        34

avg / total       0.35      0.36      0.34       334

Confusion matrix
----------------------------------
[[28  0  0  2  1  3  1  2  1  2]
 [ 0 17  5  3  5  1  0  4  2  4]
 [ 0  2  7  4  2  1 11  0  3  1]
 [ 5  1  1  9  2  4  1  3  3  4]
 [ 6  2  3  1  8  1  4  2  1  0]
 [ 2  0  4  4  2  9  6  2  3  0]
 [ 0  0  0  2  0  0 21  0  0  0]
 [ 3  5  1  5  8  0  3  6  3  2]
 [ 0  1  4  6  3  6  7  1  6  2]
 [ 2  4  4  2  4  3  3  2  1  9]]
----------------------------------
Training cross validation round 1
----------------------------------
Final Step 1000: Error: 0.104027 
 Learn rate: 0.001000
classification report 
----------------------------------
             precision    recall  f1-score   support

      metal       0.34      0.58      0.43        24
        pop       0.65      0.62      0.63        32
       jazz       0.26      0.28      0.27        32
       rock       0.27      0.17      0.21        35
     reggae       0.26      0.29      0.27        35
      blues       0.28      0.24      0.25        34
  classical       0.46      0.79      0.58        38
     hiphop       0.25      0.14      0.18        37
    country       0.23      0.17      0.19        36
      disco       0.20      0.17      0.18        30

avg / total       0.32      0.34      0.32       333

Confusion matrix
----------------------------------
[[14  0  0  1  4  2  0  0  1  2]
 [ 0 20  4  1  2  0  2  1  0  2]
 [ 0  0  9  1  4  4 10  1  2  1]
 [ 6  1  3  6  2  5  4  2  3  3]
 [ 5  1  1  1 10  4  5  4  3  1]
 [ 4  0  4  3  5  8  2  2  3  3]
 [ 0  1  4  0  2  1 30  0  0  0]
 [ 5  5  1  2  2  1  5  5  5  6]
 [ 1  0  9  3  3  3  6  3  6  2]
 [ 6  3  0  4  5  1  1  2  3  5]]
----------------------------------
Training cross validation round 2
----------------------------------
Final Step 1000: Error: 0.106308 
 Learn rate: 0.001000
classification report 
----------------------------------
             precision    recall  f1-score   support

      metal       0.50      0.56      0.53        36
        pop       0.54      0.48      0.51        27
       jazz       0.33      0.30      0.31        37
       rock       0.19      0.16      0.17        32
     reggae       0.31      0.22      0.25        37
      blues       0.36      0.26      0.31        34
  classical       0.45      0.87      0.59        39
     hiphop       0.17      0.15      0.16        27
    country       0.17      0.21      0.19        28
      disco       0.36      0.22      0.28        36

avg / total       0.34      0.35      0.34       333

Confusion matrix
----------------------------------
[[20  1  1  5  1  2  1  1  2  2]
 [ 0 13  1  2  1  0  1  4  4  1]
 [ 2  2 11  1  2  5 11  2  1  0]
 [ 5  2  0  5  1  2  2  3  8  4]
 [ 0  2  5  1  8  3  8  4  4  2]
 [ 3  0  4  3  2  9  4  2  5  2]
 [ 1  0  3  0  0  0 34  0  1  0]
 [ 6  1  3  0  3  0  7  4  1  2]
 [ 1  1  2  3  5  3  5  1  6  1]
 [ 2  2  3  7  3  1  3  3  4  8]]
----------------------------------
-------------
we love confusion_matrices. here is the average for              the whole training run.
[[ 20.66666667   0.33333333   0.33333333   2.66666667   2.           2.33333333
    0.66666667   1.           1.33333333   2.        ]
 [  0.          16.66666667   3.33333333   2.           2.66666667
    0.33333333   1.           3.           2.           2.33333333]
 [  0.66666667   1.33333333   9.           2.           2.66666667
    3.33333333  10.66666667   1.           2.           0.66666667]
 [  5.33333333   1.33333333   1.33333333   6.66666667   1.66666667
    3.66666667   2.33333333   2.66666667   4.66666667   3.66666667]
 [  3.66666667   1.66666667   3.           1.           8.66666667
    2.66666667   5.66666667   3.33333333   2.66666667   1.        ]
 [  3.           0.           4.           3.33333333   3.           8.66666667
    4.           2.           3.66666667   1.66666667]
 [  0.33333333   0.33333333   2.33333333   0.66666667   0.66666667
    0.33333333  28.33333333   0.           0.33333333   0.        ]
 [  4.66666667   3.66666667   1.66666667   2.33333333   4.33333333
    0.33333333   5.           5.           3.           3.33333333]
 [  0.66666667   0.66666667   5.           4.           3.66666667   4.
    6.           1.66666667   6.           1.66666667]
 [  3.33333333   3.           2.33333333   4.33333333   4.           1.66666667
    2.33333333   2.33333333   2.66666667   7.33333333]]



In [ ]:



In [ ]:



In [171]:

    
_ = utils.plot_confusion_matrix(report)



In [ ]:



In [ ]:



In [181]:



In [154]:

    
def debug_lrc(lrc):
  
    
    print(lrc.X.shape),
    print(lrc.X_test.shape),
    print(lrc.y.shape),
    print(lrc.y_test.shape),
    print(lrc.Delta.shape),
    print(lrc.W.shape),
    print(lrc._X.shape)



In [9]:

    
mfc_dict, mfc_labels, mfcs = utils.read_features(feature='mfc')



In [10]:

    
lrc_mfc = LogisticRegressionClassifier(mfcs, mfc_labels, mfc_dict)



In [11]:

    
lrc_mfc.metrics









    Out[11]:





{'train_rounds': 0}



In [14]:

    
lrc_mfc.cross_validate(3)









    



Training cross validation round 0
----------------------------------
Step 0: Error: -1.000000 updating learning rate: 0.001000
Final Step 1000: Error: 0.104261 
 Learn rate: 0.001000
classification report 
----------------------------------
             precision    recall  f1-score   support

      metal       0.69      0.67      0.68        33
        pop       0.62      0.87      0.72        30
       jazz       0.49      0.50      0.49        36
       rock       0.33      0.29      0.31        35
      blues       0.50      0.42      0.46        40
     reggae       0.43      0.54      0.48        28
  classical       0.81      0.85      0.83        34
     hiphop       0.38      0.41      0.39        29
    country       0.52      0.31      0.39        42
      disco       0.42      0.48      0.45        27

avg / total       0.52      0.52      0.52       334

Confusion matrix
----------------------------------
[[22  0  0  1  3  0  0  5  0  2]
 [ 0 26  0  0  0  2  0  0  1  1]
 [ 0  1 18  1  2  4  7  1  2  0]
 [ 1  3  2 10  5  3  0  3  0  8]
 [ 5  0  3  5 17  3  0  1  4  2]
 [ 0  1  2  2  1 15  0  6  0  1]
 [ 0  0  3  1  0  1 29  0  0  0]
 [ 2  2  1  0  0  5  0 12  4  3]
 [ 0  6  7  8  4  2  0  1 13  1]
 [ 2  3  1  2  2  0  0  3  1 13]]
----------------------------------
Training cross validation round 1
----------------------------------
Final Step 1000: Error: 0.120645 
 Learn rate: 0.001000
classification report 
----------------------------------
             precision    recall  f1-score   support

      metal       0.76      0.86      0.81        36
        pop       0.68      0.78      0.73        41
       jazz       0.52      0.42      0.46        36
       rock       0.25      0.15      0.19        27
      blues       0.38      0.58      0.46        26
     reggae       0.58      0.51      0.55        35
  classical       0.85      0.91      0.88        32
     hiphop       0.52      0.38      0.44        37
    country       0.47      0.53      0.50        30
      disco       0.34      0.36      0.35        33

avg / total       0.55      0.56      0.55       333

Confusion matrix
----------------------------------
[[31  0  0  0  2  0  0  2  0  1]
 [ 0 32  2  1  0  1  1  1  2  1]
 [ 1  0 15  1  2  3  2  1  5  6]
 [ 0  3  2  4 11  1  0  0  1  5]
 [ 4  0  2  4 15  0  0  0  1  0]
 [ 0  3  1  0  3 18  0  4  2  4]
 [ 0  0  2  0  0  0 29  0  1  0]
 [ 4  3  0  1  0  6  1 14  4  4]
 [ 0  1  5  1  4  0  0  1 16  2]
 [ 1  5  0  4  2  2  1  4  2 12]]
----------------------------------
Training cross validation round 2
----------------------------------
Final Step 1000: Error: 0.118117 
 Learn rate: 0.001000
classification report 
----------------------------------
             precision    recall  f1-score   support

      metal       0.56      0.77      0.65        31
        pop       0.77      0.83      0.80        29
       jazz       0.41      0.54      0.46        28
       rock       0.35      0.29      0.32        38
      blues       0.41      0.44      0.42        34
     reggae       0.50      0.38      0.43        37
  classical       0.90      0.79      0.84        34
     hiphop       0.53      0.53      0.53        34
    country       0.46      0.39      0.42        28
      disco       0.47      0.45      0.46        40

avg / total       0.53      0.53      0.53       333

Confusion matrix
----------------------------------
[[24  0  0  2  2  1  0  1  0  1]
 [ 0 24  0  1  0  0  0  2  0  2]
 [ 1  0 15  0  1  5  1  1  3  1]
 [ 2  0  4 11  6  1  0  2  4  8]
 [ 9  0  0  4 15  1  1  0  4  0]
 [ 0  2  6  0  4 14  1  5  1  4]
 [ 1  0  5  0  0  1 27  0  0  0]
 [ 4  4  1  1  1  2  0 18  0  3]
 [ 0  1  2  3  6  1  0  3 11  1]
 [ 2  0  4  9  2  2  0  2  1 18]]
----------------------------------
-------------
we love confusion_matrices. here is the average for
              the whole training run.
[[ 25.   0.   0.   1.   2.   0.   0.   2.   0.   1.]
 [  0.  27.   0.   0.   0.   1.   0.   1.   1.   1.]
 [  0.   0.  16.   0.   1.   4.   3.   1.   3.   2.]
 [  1.   2.   2.   8.   7.   1.   0.   1.   1.   7.]
 [  6.   0.   1.   4.  15.   1.   0.   0.   3.   0.]
 [  0.   2.   3.   0.   2.  15.   0.   5.   1.   3.]
 [  0.   0.   3.   0.   0.   0.  28.   0.   0.   0.]
 [  3.   3.   0.   0.   0.   4.   0.  14.   2.   3.]
 [  0.   2.   4.   4.   4.   1.   0.   1.  13.   1.]
 [  1.   2.   1.   5.   2.   1.   0.   3.   1.  14.]]



In [ ]:

    
lrc_mfc.prediction(lrc_mfc.W, lrc_mfc.X_test)



In [ ]:

    
lrc_mfc.train(reset=False, eta=0.001)



In [ ]:

    
lrc_mfc.metrics



In [138]:

    
mfc_pred, mfc_probas = lrc_mfc.prediction(lrc_mfc.W, mfcs)



In [139]:

    
print(metrics.classification_report(mfc_labels, mfc_pred))
cm = utils.plot_confusion_matrix(metrics.confusion_matrix(mfc_labels, mfc_pred))









    



             precision    recall  f1-score   support

          0       0.43      0.55      0.48       100
          1       0.47      0.14      0.22       100
          2       0.38      0.97      0.55       100
          3       0.50      0.06      0.11       100
          4       0.57      0.04      0.07       100
          5       0.49      0.88      0.63       100
          6       0.62      0.15      0.24       100
          7       0.57      0.50      0.53       100
          8       0.29      0.02      0.04       100
          9       0.20      0.55      0.30       100

avg / total       0.45      0.39      0.32      1000



In [ ]:



In [135]:

    
kf = cv.KFold(10, n_folds = 2, shuffle=True)
for i, k  in enumerate(kf):
    print( i, list(k))









    



0 [array([1, 5, 6, 7, 8]), array([0, 2, 3, 4, 9])]
1 [array([0, 2, 3, 4, 9]), array([1, 5, 6, 7, 8])]



In [ ]:



In [34]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [2]:

    
import sklearn
sklearn.__version__









    Out[2]:





'0.16.0'



In [5]:

    
from sklearn import datasets
from sklearn import svm
import sklearn.cross_validation as cv

iris = datasets.load_iris()
iris.data.shape, iris.target.shape









    Out[5]:





((150, 4), (150,))



In [6]:

    
clf = svm.SVC(kernel='linear', C=1)
scores = cv.cross_val_score(
    clf, iris.data, iris.target, cv=5)



In [42]:

    
scores









    Out[42]:





array([ 0.96666667,  1.        ,  0.96666667,  0.96666667,  1.        ])



In [43]:

    
cv.cross_val_score









    Out[43]:





0.98000000000000009



In [8]:

    
predicted = cv.cross_val_predict(clf, iris.data,iris.target, cv=10)



In [12]:

    
print(metrics.classification_report(iris.target, predicted))









    



             precision    recall  f1-score   support

          0       1.00      1.00      1.00        50
          1       0.98      0.94      0.96        50
          2       0.94      0.98      0.96        50

avg / total       0.97      0.97      0.97       150



In [18]:

    
kf = cv.KFold(iris.data.shape[0], n_folds = 10, shuffle=True)
for test, train in kf:
    X_train, X_test, y_train, y_test = \
            iris.data[train], iris.data[test], iris.target[train], iris.target[test]
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print(metrics.confusion_matrix(y_test, pred))









    



[[42  0  0]
 [ 0 26 23]
 [ 0  1 43]]
[[45  0  0]
 [ 0 43  2]
 [ 0  2 43]]
[[45  0  0]
 [ 0 42  4]
 [ 0  0 44]]
[[47  0  0]
 [ 0 43  0]
 [ 0  6 39]]
[[45  0  0]
 [ 0 45  0]
 [ 0  9 36]]
[[43  0  0]
 [ 3 44  0]
 [ 0 13 32]]
[[46  0  0]
 [ 0 37  8]
 [ 0  1 43]]
[[47  0  0]
 [ 0 39  4]
 [ 0  0 45]]
[[45  0  0]
 [ 0 43  0]
 [ 0 19 28]]
[[45  0  0]
 [ 0 43  1]
 [ 0  7 39]]



In [ ]: