In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
import sklearn.metrics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from collections import defaultdict
import vislab.datasets
import vislab.results
import vislab._results
In [2]:
label_df = vislab.datasets.flickr.get_df()
print vislab.datasets.flickr.underscored_style_names
In [3]:
collection_name = 'flickr_mar23'
results_dirname = vislab.util.makedirs(vislab.config['paths']['shared_data'] + '/results')
In [4]:
c = vislab.util.get_mongodb_client()['predict'][collection_name]
# if c.find({'features': 'noise'}).count() > 0:
# c.remove({'features': 'noise'})
pd.DataFrame([x for x in c.find()])
Out[4]:
In [5]:
df, preds_panel = vislab._results.load_pred_results(
collection_name, results_dirname,
multiclass=True, force=False)
print preds_panel.shape
print preds_panel.items
print preds_panel.major_axis
print preds_panel.minor_axis
pred_prefix = 'pred'
In [8]:
setting_name = 'caffe_fc6 None vw'
pred_df = preds_panel.minor_xs(setting_name)
gt_col = 'style_Vintage'
best_threshold, val_accs = vislab.results.learn_accuracy_threshold(
pred_df[pred_df['split'] == 'val'], gt_col)
test_acc = vislab.results.pred_accuracy_at_threshold(
pred_df[pred_df['split'] == 'test'], gt_col, best_threshold, verbose=True)
print("Accuracy on test, at threshold ({:.3f}) learned on val: {:.3f}".format(
best_threshold, test_acc))
In [9]:
# Sanity check: are val preds lining up with test preds?
thresholds = np.logspace(-2, 0, 20) - 1
_, test_accs = vislab.results.learn_accuracy_threshold(
pred_df[pred_df['split'] == 'test'], label_name, thresholds)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(thresholds, val_accs, 's-', label='val')
ax.plot(thresholds, test_accs, 'o--', label='test')
plt.legend()
Out[9]:
In [11]:
# Now find the accuracies for all labels and settings
cache_filename = '{}/{}_thresholds_and_accs.h5'.format(results_dirname, collection_name)
threshold_df, acc_df = vislab.results.learn_accuracy_thresholds_for_preds_panel(
preds_panel, cache_filename)
In [19]:
acc_df.sort('caffe_fc6 None vw')
Out[19]:
In [13]:
threshold_df
Out[13]:
In [6]:
nice_feat_names = {
# 'imagenet None vw': 'Imagenet',
'caffe_fc6 None vw': 'Caffe FC6',
'caffe_fc7 None vw': 'Caffe FC7',
'mc_bit None vw': 'MC binary',
'fusion_flickr_mar23_all,pascal_mc_for_fusion_flickr_mar23 fp vw': 'Late fusion x content',
'random': 'Random'
}
mc_metrics = vislab.results.multiclass_metrics_feat_comparison(
preds_panel, label_df, pred_prefix, features=preds_panel.minor_axis.tolist() + ['random'],
balanced=True, with_plot=False, with_print=False, nice_feat_names=nice_feat_names)
ap_df = mc_metrics['ap_df'].copy()
In [36]:
print mc_metrics['feat_metrics']['caffe_fc6 None vw'].keys()
mc_metrics['feat_metrics']['caffe_fc6 None vw']['binary_metrics_df']
Out[36]:
In [8]:
conf_df = mc_metrics['feat_metrics']['fusion_flickr_mar23_all,pascal_mc_for_fusion_flickr_mar23 fp vw']['conf_df'].astype(float)
conf_df.index = [x.replace('style_', '') for x in conf_df.index]
conf_df.columns = [x.replace('style_', '') for x in conf_df.columns]
fig = vislab.dataset_viz.plot_conditional_occurrence(conf_df, sort_by_prior=False, font_size=14)
fig.savefig('/Users/sergeyk/dropbox_work/aphrodite-writeup/figures/evaluation/flickr_conf.pdf', bbox_inches='tight')
In [43]:
ap_df = mc_metrics['ap_df']
column_order = ap_df.columns[(-ap_df.ix['_mean']).argsort().values]
ap_df.index = [x.replace('style_', '') for x in ap_df.index]
ap_df = ap_df.reindex_axis(column_order, axis=1)
ap_df.to_csv('/Users/sergeyk/dropbox_work/aphrodite-writeup/results/flickr_ap_df.csv')
fig = vislab.results_viz.plot_df_bar(ap_df, fontsize=14)
fig.savefig('/Users/sergeyk/dropbox_work/aphrodite-writeup/figures/flickr_ap_barplot.pdf', bbox_inches='tight')
In [44]:
ap_df
Out[44]:
In [39]:
del ap_df['Random']
print ap_df.to_latex(float_format=lambda x: '%.3f'%x if not np.isnan(x) else '-')
In [31]:
from IPython.display import HTML
def top_k_images(df, k=10):
return ' '.join(
'<div style="display: inline-block;"><img src="{}" width="210px" /><br />{}</div>'.format(row['image_url'], row.name)
for i, row in df[:k].iterrows())
import subprocess
import shlex
import re
def top_images_for_style(df, style, split=None):
df_ = df.copy()
if split is not None:
df_ = df_[df_['split'] == split]
title = '<h4>{}, results: {}</h4>'.format(
style, df_.shape[0])
df_ = df_.sort(style, ascending=False)
# download and resize to folder
d = os.path.expanduser('~/work/aphrodite-writeup/figures/flickr_on_flickr/')
dirname = vislab.util.makedirs(d + '{}/'.format(style))
w_dirname = vislab.util.makedirs(d + '{}/w/'.format(style))
h_dirname = vislab.util.makedirs(d + '{}/h/'.format(style))
counter = 0
for i, row in df_[:5].iterrows():
cmd = 'wget {} -O {}.jpg'.format(row['image_url'], counter)
subprocess.call(shlex.split(cmd), cwd=dirname)
cmd = 'find . -name "*.jpg" -depth 1 -exec convert {} -resize x310 -gravity Center -crop 192x310+0+0 -density 300 -units PixelsPerInch h/{} \;'
subprocess.call(shlex.split(cmd), cwd=dirname)
cmd = 'find . -name "*.jpg" -depth 1 -exec convert {} -resize 500 -gravity Center -crop 500x310+0+0 -density 300 -units PixelsPerInch w/{} \;'
subprocess.call(shlex.split(cmd), cwd=dirname)
counter += 1
return title + top_k_images(df_, k=5)
In [33]:
pred_df['image_url'] = label_df['image_url']
HTML('<h2>Flickr-learned style on all Pinterest data</h2>' + ' '.join(
top_images_for_style(pred_df, 'pred_' + style, 'test')
for style in vislab.datasets.flickr.underscored_style_names[3:]
))
Out[33]:
In [29]:
df, preds_panel = vislab._results.load_pred_results(
'pinterest_80k_on_flickr_mar23', results_dirname,
multiclass=True, force=True)
pred_prefix = 'pred'
print preds_panel.minor_axis
In [31]:
# Get correct labels
dfs = []
for feat in preds_panel.minor_axis:
df = preds_panel.minor_xs(feat)
for col in label_df.columns:
df[col] = label_df[col]
dfs.append(df)
preds_panel = pd.Panel(dict(zip(preds_panel.minor_axis, dfs))).swapaxes('minor', 'items')
In [36]:
pinterest_on_flickr_mc_metrics = vislab.results.multiclass_metrics_feat_comparison(
preds_panel, label_df, pred_prefix, features=preds_panel.minor_axis.tolist() + ['random'],
balanced=True, with_plot=False, with_print=False, nice_feat_names=nice_feat_names)
In [37]:
pinterest_on_flickr_mc_metrics['ap_df']['caffe_fc6 False vw']
Out[37]:
In [115]:
ap_df['Caffe FC6, Pinterest-trained'] = pinterest_on_flickr_mc_metrics['ap_df']['caffe_fc6 False vw']
ap_df['Difference'] = ap_df['Caffe FC6'] - ap_df['Caffe FC6, Pinterest-trained']
ap_df['Difference %'] = ap_df['Difference'] / ap_df['Caffe FC6']
ap_df['%'] = ap_df['Caffe FC6, Pinterest-trained'] / ap_df['Caffe FC6']
ap_df
Out[115]:
In [117]:
diff = ap_df.sort('%')['%']
diff_top = diff[diff >= np.percentile(diff, 75)]
print('Styles that are most transferrable from Pinterest to Flickr (mean Pinterest-trained AP: {:.1f}% of Flickr-trained AP): {}.'.format(
diff_top.values.mean() * 100,
', '.join([_[6:] for _ in sorted(diff_top.index.tolist())]),
))
diff_bottom = diff[diff <= np.percentile(diff, 25)]
print('Styles that are least transferrable from Pinterest to Flickr (mean {:.1f}% of Flickr-trained AP): {}.'.format(
diff_bottom.values.mean() * 100,
', '.join([_[6:] for _ in sorted(diff_bottom.index.tolist())]),
))