notebook.community

Edit and run



In [127]:

    
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from collections import defaultdict
import vislab
import vislab.datasets
import vislab._results
import vislab.results
import sklearn.metrics
pd.options.display.float_format = '{:.2f}'.format









    



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload



In [128]:

    
collection_name = 'flickr_mar23'
results_dirname = vislab.util.makedirs(vislab.config['paths']['shared_data'] + '/results')
cache_filename = '{}/{}_thresholds_and_accs.h5'.format(results_dirname, collection_name)



In [129]:

    
turk_df = pd.read_hdf('/Users/sergeyk/Dropbox/mturk-results/mturk-results.h5', 'df')
print turk_df[['conf_Bright', 'tagged_Bright', 'style_Bright']].dropna().head()
print turk_df.shape
print turk_df['_split'].value_counts()









    



             conf_Bright tagged_Bright style_Bright
10041013705         1.00          True        False
10050082426         0.43         False        False
10065812373         0.80          True        False
10072845645         0.20         False        False
10077053056         0.43         False        False

[5 rows x 3 columns]
(14252, 63)
test    14252
dtype: int64



In [130]:

    
# Load predictions

df, preds_panel = vislab._results.load_pred_results(
    collection_name, results_dirname,
    multiclass=True, force=False)
preds_df = preds_panel.minor_xs('caffe_fc6 None vw').copy()
preds_df = preds_df[preds_df['split'] == 'test']
print preds_df.shape

accs_df = pd.read_hdf(cache_filename, 'acc_df')
threshold_df = pd.read_hdf(cache_filename, 'threshold_df')
tdf = threshold_df['caffe_fc6 None vw'].copy()
print accs_df.head()
print accs_df.mean(0)
print threshold_df.head()









    



Loaded from cache: 3 records
(16000, 43)
                      caffe_fc6 None vw  caffe_fc7 None vw  mc_bit None vw
style_Bokeh                        0.79               0.79            0.77
style_Bright                       0.73               0.73            0.69
style_Depth_of_Field               0.69               0.68            0.70
style_Detailed                     0.75               0.76            0.76
style_Ethereal                     0.81               0.81            0.77

[5 rows x 3 columns]
caffe_fc6 None vw   0.78
caffe_fc7 None vw   0.78
mc_bit None vw      0.77
dtype: float64
                      caffe_fc6 None vw  caffe_fc7 None vw  mc_bit None vw
style_Bokeh                       -0.89              -0.89           -0.93
style_Bright                      -0.89              -0.91           -0.93
style_Depth_of_Field              -0.91              -0.93           -0.89
style_Detailed                    -0.89              -0.89           -0.89
style_Ethereal                    -0.89              -0.89           -0.86

[5 rows x 3 columns]



In [131]:

    
styles = list(set(vislab.datasets.flickr.underscored_style_names) - set(['style_Bokeh', 'style_Texture']))
accuracies = defaultdict(dict)
for style in styles:
    tag_name = style.replace('style_', 'tagged_')
    df_ = turk_df.dropna(subset=[tag_name])
    ind = vislab.results.get_balanced_dataset_ind(df_, style)
    df_ = df_.iloc[ind]
    accuracies['MTurk accuracy, Flickr g.t.'][style] = sklearn.metrics.accuracy_score(
        df_[style], df_[tag_name].astype(bool))

for style in styles:
    accuracies['Our accuracy, Flickr g.t.'][style] = vislab.results.pred_accuracy_at_threshold(preds_df, style, tdf[style])
    
for style in styles:
    pdf = preds_df.copy()
    pdf[style] = turk_df[style.replace('style_', 'tagged_')]
    pdf = pdf.dropna()
    accuracies['Our accuracy, MTurk g.t.'][style] = vislab.results.pred_accuracy_at_threshold(pdf, style, tdf[style])

acc_df = pd.DataFrame(accuracies) * 100
acc_df.index = [
    vislab.datasets.flickr.style_names[vislab.datasets.flickr.underscored_style_names.index(_)]
    for _ in acc_df.index
]
print acc_df.mean(0)
acc_df









    



MTurk accuracy, Flickr g.t.   75.59
Our accuracy, Flickr g.t.     78.12
Our accuracy, MTurk g.t.      77.28
dtype: float64






    Out[131]:






  
    
      
      MTurk accuracy, Flickr g.t.
      Our accuracy, Flickr g.t.
      Our accuracy, MTurk g.t.
    
  
  
    
      Bright
      69.93
      73.38
      73.52
    
    
      Depth of Field
      70.60
      68.50
      81.44
    
    
      Detailed
      63.30
      75.25
      68.55
    
    
      Ethereal
      78.53
      80.62
      78.63
    
    
      Geometric Composition
      81.40
      77.75
      81.64
    
    
      HDR
      72.79
      82.00
      76.44
    
    
      Hazy
      84.17
      80.75
      83.59
    
    
      Horror
      90.42
      84.25
      80.31
    
    
      Long Exposure
      73.83
      84.19
      74.21
    
    
      Macro
      91.71
      86.56
      88.64
    
    
      Melancholy
      67.19
      70.88
      69.06
    
    
      Minimal
      82.04
      83.75
      80.64
    
    
      Noir
      81.62
      85.25
      84.56
    
    
      Pastel
      66.87
      74.56
      77.30
    
    
      Romantic
      61.04
      68.00
      65.23
    
    
      Serene
      70.14
      70.44
      79.79
    
    
      Sunny
      84.45
      84.56
      78.99
    
    
      Vintage
      70.60
      75.50
      68.48
    
  

18 rows × 3 columns



In [132]:

    
acc_df.describe()









    Out[132]:






  
    
      
      MTurk accuracy, Flickr g.t.
      Our accuracy, Flickr g.t.
      Our accuracy, MTurk g.t.
    
  
  
    
      count
      18.00
      18.00
      18.00
    
    
      mean
      75.59
      78.12
      77.28
    
    
      std
       9.02
       6.21
       6.33
    
    
      min
      61.04
      68.00
      65.23
    
    
      25%
      69.98
      73.67
      73.69
    
    
      50%
      73.31
      79.19
      78.81
    
    
      75%
      81.93
      84.08
      81.24
    
    
      max
      91.71
      86.56
      88.64
    
  

8 rows × 3 columns



In [133]:

    
print acc_df.to_latex()









    



\begin{tabular}{lrrr}
\toprule
{} &  MTurk accuracy, Flickr g.t. &  Our accuracy, Flickr g.t. &  Our accuracy, MTurk g.t. \\
\midrule
Bright                &                        69.93 &                      73.38 &                     73.52 \\
Depth of Field        &                        70.60 &                      68.50 &                     81.44 \\
Detailed              &                        63.30 &                      75.25 &                     68.55 \\
Ethereal              &                        78.53 &                      80.62 &                     78.63 \\
Geometric Composition &                        81.40 &                      77.75 &                     81.64 \\
HDR                   &                        72.79 &                      82.00 &                     76.44 \\
Hazy                  &                        84.17 &                      80.75 &                     83.59 \\
Horror                &                        90.42 &                      84.25 &                     80.31 \\
Long Exposure         &                        73.83 &                      84.19 &                     74.21 \\
Macro                 &                        91.71 &                      86.56 &                     88.64 \\
Melancholy            &                        67.19 &                      70.88 &                     69.06 \\
Minimal               &                        82.04 &                      83.75 &                     80.64 \\
Noir                  &                        81.62 &                      85.25 &                     84.56 \\
Pastel                &                        66.87 &                      74.56 &                     77.30 \\
Romantic              &                        61.04 &                      68.00 &                     65.23 \\
Serene                &                        70.14 &                      70.44 &                     79.79 \\
Sunny                 &                        84.45 &                      84.56 &                     78.99 \\
Vintage               &                        70.60 &                      75.50 &                     68.48 \\
\bottomrule
\end{tabular}



In [134]:

    
# Display the styles that experiences more than 5% change in accuracy when
# switching from Flickr to MTurk ground truth.

name = '% change going from Flickr to MTurk g.t.'
acc_df[name] = 100. * (acc_df['Our accuracy, MTurk g.t.'] - acc_df['Our accuracy, Flickr g.t.']) / acc_df['Our accuracy, Flickr g.t.']
acc_df = acc_df.sort(name)
columns = ['Our accuracy, Flickr g.t.', 'Our accuracy, MTurk g.t.', name]
acc_df[acc_df[name].abs() > 5][columns]









    Out[134]:






  
    
      
      Our accuracy, Flickr g.t.
      Our accuracy, MTurk g.t.
      % change going from Flickr to MTurk g.t.
    
  
  
    
      Long Exposure
      84.19
      74.21
      -11.85
    
    
      Vintage
      75.50
      68.48
       -9.29
    
    
      Detailed
      75.25
      68.55
       -8.91
    
    
      HDR
      82.00
      76.44
       -6.78
    
    
      Sunny
      84.56
      78.99
       -6.58
    
    
      Geometric Composition
      77.75
      81.64
        5.00
    
    
      Serene
      70.44
      79.79
       13.27
    
    
      Depth of Field
      68.50
      81.44
       18.89
    
  

8 rows × 3 columns



In [119]:

    
print acc_df[acc_df[name].abs() > 5][columns].to_latex()









    



\begin{tabular}{lrrr}
\toprule
{} &  Our accuracy, Flickr g.t. &  Our accuracy, MTurk g.t. &  \% change going from Flickr to MTurk g.t. \\
\midrule
Vintage        &                      75.50 &                     67.80 &                                    -10.19 \\
Detailed       &                      75.25 &                     68.44 &                                     -9.05 \\
Long Exposure  &                      84.19 &                     76.79 &                                     -8.79 \\
Minimal        &                      83.75 &                     78.57 &                                     -6.18 \\
HDR            &                      82.00 &                     76.96 &                                     -6.15 \\
Sunny          &                      84.56 &                     79.94 &                                     -5.46 \\
Serene         &                      70.44 &                     76.80 &                                      9.03 \\
Depth of Field &                      68.50 &                     81.05 &                                     18.32 \\
\bottomrule
\end{tabular}



In [135]:

    
# Display the styles that had more than 0.05 accuracy points change
# between us and MTurkers.

name = 'Accuracy diff. between us and MTurk'
acc_df[name] = acc_df['Our accuracy, Flickr g.t.'] - acc_df['MTurk accuracy, Flickr g.t.']
acc_df = acc_df.sort(name)
columns = ['MTurk accuracy, Flickr g.t.', 'Our accuracy, Flickr g.t.', name]
acc_df[acc_df[name].abs() > 5][columns]









    Out[135]:






  
    
      
      MTurk accuracy, Flickr g.t.
      Our accuracy, Flickr g.t.
      Accuracy diff. between us and MTurk
    
  
  
    
      Horror
      90.42
      84.25
      -6.17
    
    
      Macro
      91.71
      86.56
      -5.15
    
    
      Romantic
      61.04
      68.00
       6.96
    
    
      Pastel
      66.87
      74.56
       7.69
    
    
      HDR
      72.79
      82.00
       9.21
    
    
      Long Exposure
      73.83
      84.19
      10.35
    
    
      Detailed
      63.30
      75.25
      11.95
    
  

7 rows × 3 columns



In [136]:

    
print acc_df[acc_df[name].abs() > 5][columns].to_latex()









    



\begin{tabular}{lrrr}
\toprule
{} &  MTurk accuracy, Flickr g.t. &  Our accuracy, Flickr g.t. &  Accuracy diff. between us and MTurk \\
\midrule
Horror        &                        90.42 &                      84.25 &                                -6.17 \\
Macro         &                        91.71 &                      86.56 &                                -5.15 \\
Romantic      &                        61.04 &                      68.00 &                                 6.96 \\
Pastel        &                        66.87 &                      74.56 &                                 7.69 \\
HDR           &                        72.79 &                      82.00 &                                 9.21 \\
Long Exposure &                        73.83 &                      84.19 &                                10.35 \\
Detailed      &                        63.30 &                      75.25 &                                11.95 \\
\bottomrule
\end{tabular}

	MTurk accuracy, Flickr g.t.	Our accuracy, Flickr g.t.	Our accuracy, MTurk g.t.
Bright	69.93	73.38	73.52
Depth of Field	70.60	68.50	81.44
Detailed	63.30	75.25	68.55
Ethereal	78.53	80.62	78.63
Geometric Composition	81.40	77.75	81.64
HDR	72.79	82.00	76.44
Hazy	84.17	80.75	83.59
Horror	90.42	84.25	80.31
Long Exposure	73.83	84.19	74.21
Macro	91.71	86.56	88.64
Melancholy	67.19	70.88	69.06
Minimal	82.04	83.75	80.64
Noir	81.62	85.25	84.56
Pastel	66.87	74.56	77.30
Romantic	61.04	68.00	65.23
Serene	70.14	70.44	79.79
Sunny	84.45	84.56	78.99
Vintage	70.60	75.50	68.48

	MTurk accuracy, Flickr g.t.	Our accuracy, Flickr g.t.	Our accuracy, MTurk g.t.
count	18.00	18.00	18.00
mean	75.59	78.12	77.28
std	9.02	6.21	6.33
min	61.04	68.00	65.23
25%	69.98	73.67	73.69
50%	73.31	79.19	78.81
75%	81.93	84.08	81.24
max	91.71	86.56	88.64