In [1]:
%load_ext autoreload
%autoreload 2
import re
import aphrodite.results
import sklearn.metrics
import pandas as pd

import vislab
import vislab.results
import vislab.datasets


/Users/karayev/anaconda/lib/python2.7/site-packages/configobj.py:145: DeprecationWarning: The compiler package is deprecated and removed in Python 3.x.
  import compiler

In [2]:
label_df = vislab.datasets.behance.get_illustration_df()

In [4]:
#c = vislab.util.get_mongodb_client()['predict']['behance_dec28']
c = vislab.util.get_mongodb_client()['predict']['behance_illustration_jan15']
#if c.find({'features': 'noise'}).count() > 0:
#    c.remove({'features': 'noise'})
pd.DataFrame([x for x in c.find()])


Out[4]:
_id data features num_test num_train num_val quadratic results_name score_test score_val task
0 52d6e08e52e471defe2ff34b behance_illustration_tag_logo [noise] 5047 18623 1128 None data_behance_illustration_tag_logo_features_['... 0.496454 0.490248 clf
1 52d6e3fc52e471defe2ff34c behance_illustration_tag_3d [noise] 14637 9581 580 None data_behance_illustration_tag_3d_features_['no... 0.493103 0.489655 clf

2 rows × 11 columns


In [5]:
#results_df, preds_panel = aphrodite.results.load_pred_results(
#    'behance_dec28', '/Users/sergeyk/work/aphrodite/data/results/',
results_df, preds_panel = aphrodite.results.load_pred_results(
    'behance_illustration_jan15', '/Users/sergeyk/work/aphrodite/data/results/',
    multiclass=False, force=True)
pred_prefix = 'pred'
print preds_panel
print preds_panel.minor_axis


Results in collection behance_illustration_jan15: 2
<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 24798 (major_axis) x 3 (minor_axis)
Items axis: clf behance_illustration_tag_3d to clf behance_illustration_tag_logo
Major_axis axis: behance_100006_530811213617146 to behance_9999621_efb52469e67a838d2496de5c7241100d
Minor_axis axis: noise None vw to split
Index([u'noise None vw', u'label', u'split'], dtype='object')

In [21]:
preds_panel = pd.read_pickle('/Users/sergeyk/work/aphrodite/data/results/behance_dec28_preds_panel.pickle')

In [28]:
preds_panel.minor_xs('decaf_fc6 None vw')


/Users/karayev/anaconda/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.

  warnings.warn(d.msg, DeprecationWarning)
Out[28]:
<class 'pandas.core.frame.DataFrame'>
Index: 24619 entries, behance_100006_530811213617146 to behance_9999621_efb52469e67a838d2496de5c7241100d
Data columns (total 20 columns):
clf behance_illustration_tag_3d            24619  non-null values
clf behance_illustration_tag_animals       24619  non-null values
clf behance_illustration_tag_city          24619  non-null values
clf behance_illustration_tag_fantasy       24619  non-null values
clf behance_illustration_tag_food          24619  non-null values
clf behance_illustration_tag_girl          24619  non-null values
clf behance_illustration_tag_ink           24619  non-null values
clf behance_illustration_tag_lettering     24619  non-null values
clf behance_illustration_tag_logo          24619  non-null values
clf behance_illustration_tag_minimal       24619  non-null values
clf behance_illustration_tag_nature        24619  non-null values
clf behance_illustration_tag_pencil        24619  non-null values
clf behance_illustration_tag_portrait      24619  non-null values
clf behance_illustration_tag_retro         24619  non-null values
clf behance_illustration_tag_skull         24619  non-null values
clf behance_illustration_tag_surreal       24619  non-null values
clf behance_illustration_tag_vector        24619  non-null values
clf behance_illustration_tag_vintage       24619  non-null values
clf behance_illustration_tag_watercolor    24619  non-null values
clf behance_illustration_tag_wood          24619  non-null values
dtypes: object(20)

In [26]:
mc_metrics = vislab.results.multiclass_metrics_feat_comparison(
    preds_panel, label_df, pred_prefix, features=preds_panel.minor_axis.tolist() + ['random'],
    balanced=True, with_plot=False, with_print=True)


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-26-9125c013006b> in <module>()
      1 mc_metrics = vislab.results.multiclass_metrics_feat_comparison(
      2     preds_panel, label_df, pred_prefix, features=preds_panel.minor_axis.tolist() + ['random'],
----> 3     balanced=True, with_plot=False, with_print=True)

/Users/karayev/work/vislab/vislab/results.pyc in multiclass_metrics_feat_comparison(preds_panel, source_label_df, pred_prefix, features, balanced, with_plot, with_print, nice_feat_names)
    140         feat_metrics[feature] = multiclass_metrics(
    141             mc_pred_df, pred_prefix, balanced, random_preds,
--> 142             with_plot, with_print)
    143 
    144     all_metrics = {'feat_metrics': feat_metrics}

/Users/karayev/work/vislab/vislab/results.pyc in multiclass_metrics(mc_pred_df, pred_prefix, balanced, random_preds, with_plot, with_print, min_pos)
    220     good_pred_cols = [pred_prefix + '_' + x for x in good_cols]
    221     label_df = label_df[good_cols]
--> 222     pred_df = pred_df[good_pred_cols]
    223 
    224     label_cols = label_df.columns.tolist()

/Users/karayev/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
   1993         if isinstance(key, (np.ndarray, list)):
   1994             # either boolean or fancy integer index
-> 1995             return self._getitem_array(key)
   1996         elif isinstance(key, DataFrame):
   1997             return self._getitem_frame(key)

/Users/karayev/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_array(self, key)
   2029             return self.take(indexer, axis=0, convert=False)
   2030         else:
-> 2031             indexer = self.ix._convert_to_indexer(key, axis=1)
   2032             return self.take(indexer, axis=1, convert=True)
   2033 

/Users/karayev/anaconda/lib/python2.7/site-packages/pandas/core/indexing.pyc in _convert_to_indexer(self, obj, axis)
    608                 mask = check == -1
    609                 if mask.any():
--> 610                     raise KeyError('%s not in index' % objarr[mask])
    611 
    612                 return indexer

KeyError: "['pred_pred'] not in index"
Only taking 'test' split predictions.
********************noise None vw********************