In [32]:
df = dataset['val_df']
yt = (df['label'] > 0).astype(int)
yt_pred = np.zeros_like(yt).astype(float)
#yt_pred = np.random.rand(len(yt))
vislab.results.get_pr_curve(yt, yt_pred)
import sklearn.metrics
print sklearn.metrics.average_precision_score(yt, yt_pred)
print sklearn.metrics.accuracy_score(yt, yt_pred)


0.75
0.5

In [33]:
df = dataset['test_df']
yt = (df['label'] > 0).astype(int)
yt_pred = np.zeros_like(yt).astype(float)
#yt_pred = np.random.rand(len(yt))
vislab.results.get_pr_curve(yt, yt_pred)
import sklearn.metrics
print sklearn.metrics.average_precision_score(yt, yt_pred)
print sklearn.metrics.accuracy_score(yt, yt_pred)


0.542185831257
0.915628337487

In [1]:
%load_ext autoreload
%autoreload 2

import vislab.vw3
import vislab.datasets
import vislab.predict

feat_dirname = '/Users/sergeyk/work/aphrodite/data/feats/ava_style'
feat_names = ['lab_hist']

label_df = vislab.datasets.ava.get_style_df()
all_styles = [x for x in label_df.columns if not x.startswith('_')]
dataset = vislab.predict.get_multiclass_dataset(
            label_df, 'ava_style', 'all_styles', all_styles,
            .2, False, 42)

vw = vislab.vw3.VW('/Users/sergeyk/work/vislab/_temp' + '/vw_ava_style_all_styles')
pred_df, test_score, val_score, train_score = vw.fit_and_predict(
            dataset, feat_names, feat_dirname, force=True)
print test_score, val_score, train_score


/Users/sergeyk/work/vislab/vislab/results.py:71: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ True]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ True]. 
  pred_df['label'], pred_df['pred_bin'])
/Users/sergeyk/work/vislab/vislab/results.py:71: UserWarning: The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ True]. 
  pred_df['label'], pred_df['pred_bin'])
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [3]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [10]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:71: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [False]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [False]. 
  pred_df['label'], pred_df['pred_bin'])
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [0 4]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  2  4  8  9 10 11 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  4 10 11]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  2  4  7  8  9 10 11]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  4 10]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  2  3  4  8  9 10 11 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [0 4]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  2  3  4  8  9 10 11 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  4 10 11]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  2  4  8 10 11]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 3 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  7  8 11 13]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  6  7  8  9 10 11 12 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0 11 13]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  6  7  8 10 11 12 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  6  7  8 11 13]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  6  7  8 10 11 12 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  6  7  8 11 12 13]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  6  7  8 10 11 12 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  7  8 11 12 13]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  7  8 10 11 12 13]. 
  y_true, y_pred)
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The sum of true positives and false positives are equal to zero for some labels. Precision is ill defined for those labels [ 0  6  8 11 12 13]. The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [ 0  3  6  7  8  9 10 11 12 13]. 
  y_true, y_pred)
Running VW training for 18 param settings, 6 at a time
      l1    l2      loss  num_passes val_score test_score
0      0     0     hinge          25     0.207      0.219
1      0  1e-6     hinge          25     0.198      0.223
2      0  1e-9     hinge          25     0.196      0.226
3   1e-6     0     hinge          25     0.046      0.056
4   1e-6  1e-6     hinge          25     0.048      0.041
5   1e-6  1e-9     hinge          25     0.047      0.054
6   1e-9     0     hinge          25     0.135      0.136
7   1e-9  1e-6     hinge          25     0.178      0.197
8   1e-9  1e-9     hinge          25     0.150      0.146
9      0     0  logistic          25     0.242      0.277
10     0  1e-6  logistic          25     0.243      0.294
11     0  1e-9  logistic          25     0.261      0.280
12  1e-6     0  logistic          25     0.088      0.109
13  1e-6  1e-6  logistic          25     0.084      0.094
14  1e-6  1e-9  logistic          25     0.102      0.086
15  1e-9     0  logistic          25     0.206      0.224
16  1e-9  1e-6  logistic          25     0.213      0.220
17  1e-9  1e-9  logistic          25     0.214      0.240
Best setting: {'loss': 'logistic', 'l2': '1e-9', 'num_passes': 25, 'l1': '0'}
Best score: 0.261
Updating best VW model with validation data
Running VW prediction on all splits.
0.288571428571 0.265780730897 0.352195423624
/Users/sergeyk/work/vislab/vislab/results.py:258: UserWarning: The precision and recall are equal to zero for some labels. fbeta_score is ill defined for those labels [6]. 
  y_true, y_pred)

In [7]:
import vislab.results
metrics = vislab.results.multiclass_metrics(pred_df[pred_df['split'] == 'test'], 'pred')
metrics['binary_metrics_df']['ap'].plot(kind='bar')


Out[7]:
<matplotlib.axes.AxesSubplot at 0x11922b310>

In [100]:
pdf = pred_df[pred_df['split'] == 'test']
pdf = pdf.join(dataset['test_df'])
_ = vislab.results.binary_metrics(pdf, balanced=False, with_print=True, with_plot=True)


------------------------------------------------------------
Classification metrics on the {} full full
ap_sklearn: 0.229261539704
mcc: 0.250574431586
ap: 0.242722363914
       precision    recall  f1-score  support
False   0.950989  0.827141  0.884752     2557
True    0.221831  0.536170  0.313823      235
accuracy: 0.802650429799


In [83]:
import pandas as pd
feat_df = pd.read_hdf(
    '/Users/sergeyk/work/aphrodite/data/feats/ava_style/lab_hist.h5',
    'df')
feat_df


Out[83]:
<class 'pandas.core.frame.DataFrame'>
Index: 13994 entries, 1187 to 97009
Data columns (total 1 columns):
0    13994  non-null values
dtypes: object(1)

In [14]:
X = np.array([row[0] for row in feat_df.values])
X[X < 1e-4] = 0
feat_df = pd.DataFrame([(row,) for row in X], feat_df.index)

In [23]:
feat_df.to_hdf(
    '/Users/sergeyk/work/aphrodite/data/feats/ava_style/lab_hist.h5',
    'df', mode='w')


/Users/sergeyk/anaconda/python.app/Contents/lib/python2.7/site-packages/pandas-0.12.0_856_gd686154-py2.7-macosx-10.5-x86_64.egg/pandas/io/pytables.py:2303: PerformanceWarning: 
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->[0]]

  warnings.warn(ws, PerformanceWarning)

In [84]:
feat = feat_df.values[80][0]
print np.unique(feat)[:10]
matshow(feat.reshape(49, 16).T)


[  0.           0.04837961   0.30261752   2.90482926   3.40419579
   7.60864496  16.43554497  38.28302383]
Out[84]:
<matplotlib.image.AxesImage at 0x11a432e90>

In [85]:
import vislab.datasets
import vislab.predict

label_df = vislab.datasets.ava.get_style_df()
dataset = vislab.predict.get_binary_or_regression_dataset(
            label_df, 'ava_style', 'style_Complementary_Colors',
            .2, -1, 42)
print(dataset['train_df'].shape)
print(dataset['train_df']['label'].iloc[:3])


(10966, 2)
image_id
1187       -1
1270       -1
1279       -1
Name: label, dtype: int64

In [86]:
feat_df_train = feat_df.ix[dataset['train_df'].index].dropna()
feat_df_test = feat_df.ix[dataset['test_df'].index].dropna()

In [87]:
X = np.array([row[0] for row in feat_df_train.values])
X[X < 1e-4] = 0
y = dataset['train_df']['label'].ix[feat_df_train.index].values > 0
Xt = np.vstack([row[0] for row in feat_df_test.values])
Xt[Xt < 1e-4] = 0
yt = dataset['test_df']['label'].ix[feat_df_test.index].values > 0

for arr in [X, y, Xt, yt]:
    print arr.shape


(10901, 784)
(10901,)
(2792, 784)
(2792,)

In [89]:
import sklearn.linear_model
import sklearn.grid_search

clf = sklearn.linear_model.SGDClassifier(loss='log', n_iter=20, shuffle=True)
grid_clf = sklearn.grid_search.GridSearchCV(
    clf,
    {'alpha': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8]},
    n_jobs=1
)
grid_clf.fit(X, y)
print grid_clf.best_params_


{'alpha': 0.1}

In [90]:
import vislab.results
import sklearn.metrics

yt_pred = grid_clf.predict_proba(Xt)
vislab.results.get_pr_curve(yt, yt_pred[:, 1])
print sklearn.metrics.average_precision_score(yt, yt_pred[:, 1])


0.26694060153

In [47]:
vw = VW('temp_vw_ava_style')
vw.fit(X, y)


Running command
vw -d temp_vw_ava_style/train_data.txt -k --cache_file temp_vw_ava_style/train_cache.vw --noop
Running command
vw --cache_file temp_vw_ava_style/train_cache.vw -f temp_vw_ava_style/model.vw --passes 100 --bit_precision=18 --l1=0 --l2=0 --loss_function=hinge --holdout_off
Out[47]:
<__main__.VW at 0x117b5fa10>

In [44]:
yt_pred = vw.predict_proba(Xt)
vislab.results.get_pr_curve(yt, yt_pred)
print sklearn.metrics.average_precision_score(yt, yt_pred)


Running command
vw -d temp_vw_ava_style/test_data.txt -k -t --cache_file temp_vw_ava_style/test_cache.vw -i temp_vw_ava_style/model.vw -p temp_vw_ava_style/test_pred.txt -r temp_vw_ava_style/test_raw_pred.txt
0.120523937182

In [45]:
plt.hist(yt_pred, bins=50)


Out[45]:
(array([  1.00000000e+00,   3.00000000e+00,   1.00000000e+01,
         1.03000000e+02,   1.35000000e+03,   7.22000000e+02,
         3.40000000e+02,   1.28000000e+02,   6.80000000e+01,
         3.60000000e+01,   1.50000000e+01,   7.00000000e+00,
         1.00000000e+00,   5.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   1.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   2.00000000e+00]),
 array([ -3.23705000e-01,  -2.58582920e-01,  -1.93460840e-01,
        -1.28338760e-01,  -6.32166800e-02,   1.90540000e-03,
         6.70274800e-02,   1.32149560e-01,   1.97271640e-01,
         2.62393720e-01,   3.27515800e-01,   3.92637880e-01,
         4.57759960e-01,   5.22882040e-01,   5.88004120e-01,
         6.53126200e-01,   7.18248280e-01,   7.83370360e-01,
         8.48492440e-01,   9.13614520e-01,   9.78736600e-01,
         1.04385868e+00,   1.10898076e+00,   1.17410284e+00,
         1.23922492e+00,   1.30434700e+00,   1.36946908e+00,
         1.43459116e+00,   1.49971324e+00,   1.56483532e+00,
         1.62995740e+00,   1.69507948e+00,   1.76020156e+00,
         1.82532364e+00,   1.89044572e+00,   1.95556780e+00,
         2.02068988e+00,   2.08581196e+00,   2.15093404e+00,
         2.21605612e+00,   2.28117820e+00,   2.34630028e+00,
         2.41142236e+00,   2.47654444e+00,   2.54166652e+00,
         2.60678860e+00,   2.67191068e+00,   2.73703276e+00,
         2.80215484e+00,   2.86727692e+00,   2.93239900e+00]),
 <a list of 50 Patch objects>)