notebook.community

Edit and run



In [32]:

    
df = dataset['val_df']
yt = (df['label'] > 0).astype(int)
yt_pred = np.zeros_like(yt).astype(float)
#yt_pred = np.random.rand(len(yt))
vislab.results.get_pr_curve(yt, yt_pred)
import sklearn.metrics
print sklearn.metrics.average_precision_score(yt, yt_pred)
print sklearn.metrics.accuracy_score(yt, yt_pred)



In [33]:

    
df = dataset['test_df']
yt = (df['label'] > 0).astype(int)
yt_pred = np.zeros_like(yt).astype(float)
#yt_pred = np.random.rand(len(yt))
vislab.results.get_pr_curve(yt, yt_pred)
import sklearn.metrics
print sklearn.metrics.average_precision_score(yt, yt_pred)
print sklearn.metrics.accuracy_score(yt, yt_pred)









    



0.542185831257
0.915628337487



In [1]:

    
%load_ext autoreload
%autoreload 2

import vislab.vw3
import vislab.datasets
import vislab.predict

feat_dirname = '/Users/sergeyk/work/aphrodite/data/feats/ava_style'
feat_names = ['lab_hist']

label_df = vislab.datasets.ava.get_style_df()
all_styles = [x for x in label_df.columns if not x.startswith('_')]
dataset = vislab.predict.get_multiclass_dataset(
            label_df, 'ava_style', 'all_styles', all_styles,
            .2, False, 42)

vw = vislab.vw3.VW('/Users/sergeyk/work/vislab/_temp' + '/vw_ava_style_all_styles')
pred_df, test_score, val_score, train_score = vw.fit_and_predict(
            dataset, feat_names, feat_dirname, force=True)
print test_score, val_score, train_score









    



/Users/sergeyk/work/vislab/vislab/results.py:71: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ True]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ True]. 
  pred_df['label'], pred_df['pred_bin'])
/Users/sergeyk/work/vislab/vislab/results.py:71: UserWarning: The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ True]. 
  pred_df['label'], pred_df['pred_bin'])
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [3]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [10]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:71: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [False]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [False]. 
  pred_df['label'], pred_df['pred_bin'])
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [0 4]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  2  4  8  9 10 11 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  4 10 11]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  2  4  7  8  9 10 11]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  4 10]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  2  3  4  8  9 10 11 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [0 4]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  2  3  4  8  9 10 11 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  4 10 11]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  2  4  8 10 11]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 3 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  7  8 11 13]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  6  7  8  9 10 11 12 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0 11 13]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  6  7  8 10 11 12 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  6  7  8 11 13]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  6  7  8 10 11 12 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  6  7  8 11 12 13]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  6  7  8 10 11 12 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  7  8 11 12 13]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  7  8 10 11 12 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  6  8 11 12 13]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  3  6  7  8  9 10 11 12 13]. 
  y_true, y_pred)






    



Running VW training for 18 param settings, 6 at a time
      l1    l2      loss  num_passes val_score test_score
0      0     0     hinge          25     0.207      0.219
1      0  1e-6     hinge          25     0.198      0.223
2      0  1e-9     hinge          25     0.196      0.226
3   1e-6     0     hinge          25     0.046      0.056
4   1e-6  1e-6     hinge          25     0.048      0.041
5   1e-6  1e-9     hinge          25     0.047      0.054
6   1e-9     0     hinge          25     0.135      0.136
7   1e-9  1e-6     hinge          25     0.178      0.197
8   1e-9  1e-9     hinge          25     0.150      0.146
9      0     0  logistic          25     0.242      0.277
10     0  1e-6  logistic          25     0.243      0.294
11     0  1e-9  logistic          25     0.261      0.280
12  1e-6     0  logistic          25     0.088      0.109
13  1e-6  1e-6  logistic          25     0.084      0.094
14  1e-6  1e-9  logistic          25     0.102      0.086
15  1e-9     0  logistic          25     0.206      0.224
16  1e-9  1e-6  logistic          25     0.213      0.220
17  1e-9  1e-9  logistic          25     0.214      0.240
Best setting: {'loss': 'logistic', 'l2': '1e-9', 'num_passes': 25, 'l1': '0'}
Best score: 0.261
Updating best VW model with validation data
Running VW prediction on all splits.
0.288571428571 0.265780730897 0.352195423624






    



/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [6]. 
  y_true, y_pred)



In [7]:

    
import vislab.results
metrics = vislab.results.multiclass_metrics(pred_df[pred_df['split'] == 'test'], 'pred')
metrics['binary_metrics_df']['ap'].plot(kind='bar')









    Out[7]:





<matplotlib.axes.AxesSubplot at 0x11922b310>



In [100]:

    
pdf = pred_df[pred_df['split'] == 'test']
pdf = pdf.join(dataset['test_df'])
_ = vislab.results.binary_metrics(pdf, balanced=False, with_print=True, with_plot=True)









    



------------------------------------------------------------
Classification metrics on the {} full full
ap_sklearn: 0.229261539704
mcc: 0.250574431586
ap: 0.242722363914
       precision    recall  f1-score  support
False   0.950989  0.827141  0.884752     2557
True    0.221831  0.536170  0.313823      235
accuracy: 0.802650429799



In [83]:

    
import pandas as pd
feat_df = pd.read_hdf(
    '/Users/sergeyk/work/aphrodite/data/feats/ava_style/lab_hist.h5',
    'df')
feat_df









    Out[83]:




<class 'pandas.core.frame.DataFrame'>
Index: 13994 entries, 1187 to 97009
Data columns (total 1 columns):
0    13994  non-null values
dtypes: object(1)



In [14]:

    
X = np.array([row[0] for row in feat_df.values])
X[X < 1e-4] = 0
feat_df = pd.DataFrame([(row,) for row in X], feat_df.index)



In [23]:

    
feat_df.to_hdf(
    '/Users/sergeyk/work/aphrodite/data/feats/ava_style/lab_hist.h5',
    'df', mode='w')









    



/Users/sergeyk/anaconda/python.app/Contents/lib/python2.7/site-packages/pandas-0.12.0_856_gd686154-py2.7-macosx-10.5-x86_64.egg/pandas/io/pytables.py:2303: PerformanceWarning: 
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->[0]]

  warnings.warn(ws, PerformanceWarning)



In [84]:

    
feat = feat_df.values[80][0]
print np.unique(feat)[:10]
matshow(feat.reshape(49, 16).T)









    



[  0.           0.04837961   0.30261752   2.90482926   3.40419579
   7.60864496  16.43554497  38.28302383]






    Out[84]:





<matplotlib.image.AxesImage at 0x11a432e90>



In [85]:

    
import vislab.datasets
import vislab.predict

label_df = vislab.datasets.ava.get_style_df()
dataset = vislab.predict.get_binary_or_regression_dataset(
            label_df, 'ava_style', 'style_Complementary_Colors',
            .2, -1, 42)
print(dataset['train_df'].shape)
print(dataset['train_df']['label'].iloc[:3])









    



(10966, 2)
image_id
1187       -1
1270       -1
1279       -1
Name: label, dtype: int64



In [86]:

    
feat_df_train = feat_df.ix[dataset['train_df'].index].dropna()
feat_df_test = feat_df.ix[dataset['test_df'].index].dropna()



In [87]:

    
X = np.array([row[0] for row in feat_df_train.values])
X[X < 1e-4] = 0
y = dataset['train_df']['label'].ix[feat_df_train.index].values > 0
Xt = np.vstack([row[0] for row in feat_df_test.values])
Xt[Xt < 1e-4] = 0
yt = dataset['test_df']['label'].ix[feat_df_test.index].values > 0

for arr in [X, y, Xt, yt]:
    print arr.shape









    



(10901, 784)
(10901,)
(2792, 784)
(2792,)



In [89]:

    
import sklearn.linear_model
import sklearn.grid_search

clf = sklearn.linear_model.SGDClassifier(loss='log', n_iter=20, shuffle=True)
grid_clf = sklearn.grid_search.GridSearchCV(
    clf,
    {'alpha': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8]},
    n_jobs=1
)
grid_clf.fit(X, y)
print grid_clf.best_params_









    



{'alpha': 0.1}



In [90]:

    
import vislab.results
import sklearn.metrics

yt_pred = grid_clf.predict_proba(Xt)
vislab.results.get_pr_curve(yt, yt_pred[:, 1])
print sklearn.metrics.average_precision_score(yt, yt_pred[:, 1])









    



0.26694060153



In [47]:

    
vw = VW('temp_vw_ava_style')
vw.fit(X, y)









    



Running command
vw -d temp_vw_ava_style/train_data.txt -k --cache_file temp_vw_ava_style/train_cache.vw --noop
Running command
vw --cache_file temp_vw_ava_style/train_cache.vw -f temp_vw_ava_style/model.vw --passes 100 --bit_precision=18 --l1=0 --l2=0 --loss_function=hinge --holdout_off






    Out[47]:





<__main__.VW at 0x117b5fa10>



In [44]:

    
yt_pred = vw.predict_proba(Xt)
vislab.results.get_pr_curve(yt, yt_pred)
print sklearn.metrics.average_precision_score(yt, yt_pred)









    



Running command
vw -d temp_vw_ava_style/test_data.txt -k -t --cache_file temp_vw_ava_style/test_cache.vw -i temp_vw_ava_style/model.vw -p temp_vw_ava_style/test_pred.txt -r temp_vw_ava_style/test_raw_pred.txt
0.120523937182



In [45]:

    
plt.hist(yt_pred, bins=50)









    Out[45]:





(array([  1.00000000e+00,   3.00000000e+00,   1.00000000e+01,
         1.03000000e+02,   1.35000000e+03,   7.22000000e+02,
         3.40000000e+02,   1.28000000e+02,   6.80000000e+01,
         3.60000000e+01,   1.50000000e+01,   7.00000000e+00,
         1.00000000e+00,   5.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   1.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   2.00000000e+00]),
 array([ -3.23705000e-01,  -2.58582920e-01,  -1.93460840e-01,
        -1.28338760e-01,  -6.32166800e-02,   1.90540000e-03,
         6.70274800e-02,   1.32149560e-01,   1.97271640e-01,
         2.62393720e-01,   3.27515800e-01,   3.92637880e-01,
         4.57759960e-01,   5.22882040e-01,   5.88004120e-01,
         6.53126200e-01,   7.18248280e-01,   7.83370360e-01,
         8.48492440e-01,   9.13614520e-01,   9.78736600e-01,
         1.04385868e+00,   1.10898076e+00,   1.17410284e+00,
         1.23922492e+00,   1.30434700e+00,   1.36946908e+00,
         1.43459116e+00,   1.49971324e+00,   1.56483532e+00,
         1.62995740e+00,   1.69507948e+00,   1.76020156e+00,
         1.82532364e+00,   1.89044572e+00,   1.95556780e+00,
         2.02068988e+00,   2.08581196e+00,   2.15093404e+00,
         2.21605612e+00,   2.28117820e+00,   2.34630028e+00,
         2.41142236e+00,   2.47654444e+00,   2.54166652e+00,
         2.60678860e+00,   2.67191068e+00,   2.73703276e+00,
         2.80215484e+00,   2.86727692e+00,   2.93239900e+00]),
 <a list of 50 Patch objects>)