In [28]:

    
%load_ext autoreload
%autoreload 2

import sys
import uuid
import math
import urllib
import urlparse
import time
import numpy as np

from boto.mturk.connection import *
from boto.mturk.question import ExternalQuestion
from boto.mturk.qualification import *
from boto.mturk.price import *

import pandas as pd
import vislab.datasets









    



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload



In [3]:

    
%matplotlib inline
import matplotlib.pyplot as plt



In [5]:

    
answer_df = pd.read_hdf( '/Users/sergeyk/Dropbox/mturk-results/mturk-answers.h5', 'df' )
print len( answer_df )
answer_df.head()









    



216796






    Out[5]:






  
    
      
      assignment_id
      hit_id
      tagged_Bright
      tagged_Depth_of_Field
      tagged_Detailed
      tagged_Ethereal
      tagged_Geometric_Composition
      tagged_HDR
      tagged_Hazy
      tagged_Horror
      tagged_Long_Exposure
      tagged_Macro
      tagged_Melancholy
      tagged_Minimal
      tagged_Noir
      tagged_Pastel
      tagged_Romantic
      tagged_Serene
      tagged_Sunny
      tagged_Vintage
      
    
  
  
    
      12750600883
       3DZQRBDBSLEAV2ZYGLOU1XEOXCR3S0
       3T2EL38U0LJ8C0T5I41BOTS8ILLXQB
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       False
       NaN
       NaN
       NaN
       NaN
      ...
    
    
      3640743068
       3DZQRBDBSLEAV2ZYGLOU1XEOXCR3S0
       3T2EL38U0LJ8C0T5I41BOTS8ILLXQB
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       False
       NaN
       NaN
       NaN
       NaN
      ...
    
    
      12664369395
       3DZQRBDBSLEAV2ZYGLOU1XEOXCR3S0
       3T2EL38U0LJ8C0T5I41BOTS8ILLXQB
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       False
       NaN
       NaN
       NaN
       NaN
      ...
    
    
      11617034266
       3DZQRBDBSLEAV2ZYGLOU1XEOXCR3S0
       3T2EL38U0LJ8C0T5I41BOTS8ILLXQB
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       False
       NaN
       NaN
       NaN
       NaN
      ...
    
    
      13317660883
       3DZQRBDBSLEAV2ZYGLOU1XEOXCR3S0
       3T2EL38U0LJ8C0T5I41BOTS8ILLXQB
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       False
       NaN
       NaN
       NaN
       NaN
      ...
    
  

5 rows × 21 columns



In [45]:

    
result_df = pd.read_hdf( '/Users/sergeyk/Dropbox/mturk-results/mturk-results.h5', 'df' )
print len( result_df )
result_df.head()









    



14721






    Out[45]:






  
    
      
      assignment_id
      conf_Bright
      conf_Depth_of_Field
      conf_Detailed
      conf_Ethereal
      conf_Geometric_Composition
      conf_HDR
      conf_Hazy
      conf_Horror
      conf_Long_Exposure
      conf_Macro
      conf_Melancholy
      conf_Minimal
      conf_Noir
      conf_Pastel
      conf_Romantic
      conf_Serene
      conf_Sunny
      conf_Vintage
      hit_id
      
    
  
  
    
      10001822913
       3OXV7EAXLEP5841198LR0SIWGZA36J
      NaN
      NaN
      NaN
       0.333333
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
       0.666667
      NaN
      NaN
      NaN
            NaN
       0.333333
      NaN
       3EKZL9T8Y7L7W9J78IIU12FXRY5HC2
      ...
    
    
      10005136676
       3K9FOBBF2HIUVTXIEVB8YPYZBCHNLK
      NaN
      NaN
      NaN
            NaN
        0
      NaN
      NaN
      NaN
        0
        0
      NaN
            NaN
      NaN
      NaN
      NaN
       0.000000
            NaN
      NaN
       3B0MCRZMBQTI3FQZW0C5UBGVC8WPPV
      ...
    
    
      10005459576
       37KGEN7NJ3PKPZOTM2H7JVCEG18PP5
      NaN
      NaN
      NaN
            NaN
        0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
            NaN
      NaN
      NaN
        0
            NaN
            NaN
      NaN
       3GMLHYZ0LDWP4YQW497IH8W6OUQUYP
      ...
    
    
      10008818046
       3GLB5JMZFXU5NPS4EA0PPSHX1RVGDR
      NaN
      NaN
      NaN
       1.000000
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
            NaN
      NaN
        0
        0
            NaN
            NaN
      NaN
       36818Z1KV2C49MFA9JX1RDOZ8Q6A3Q
      ...
    
    
      10009428143
       3DBQWDE4Y6XGT4UXM8V1H022PMJN5Y
      NaN
      NaN
      NaN
            NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
            NaN
      NaN
      NaN
      NaN
       0.666667
            NaN
      NaN
       3BAKUKE49GB0Y0NCOQ54YAYCAAF1RW
      ...
    
  

5 rows × 63 columns



In [7]:

    
style_cols = [name for name in result_df.columns if 'style' in name]
style_cols.remove( 'style_Bokeh' )
style_cols.remove( 'style_Texture' )
print style_cols









    



['style_Bright', 'style_Depth_of_Field', 'style_Detailed', 'style_Ethereal', 'style_Geometric_Composition', 'style_HDR', 'style_Hazy', 'style_Horror', 'style_Long_Exposure', 'style_Macro', 'style_Melancholy', 'style_Minimal', 'style_Noir', 'style_Pastel', 'style_Romantic', 'style_Serene', 'style_Sunny', 'style_Vintage']

Helper functions



In [8]:

    
def assignment_df_tag( df, drop=True ):
    if drop: df = df.dropna( axis=1 )
    return [name for name in df.columns.tolist() if 'tagged' in name][0]

def assignment_series_tag( ser, drop=True ):
    if drop: ser = ser.dropna()
    return [name for name in ser.index.tolist() if 'tagged' in name][0]

Test split



In [9]:

    
test_df = result_df[ result_df['_split'] == 'test' ]



In [10]:

    
[ ( len( test_df[ test_df[sn] == True ] ), sn ) for sn in style_cols ]









    Out[10]:





[(743, 'style_Bright'),
 (756, 'style_Depth_of_Field'),
 (743, 'style_Detailed'),
 (764, 'style_Ethereal'),
 (765, 'style_Geometric_Composition'),
 (760, 'style_HDR'),
 (750, 'style_Hazy'),
 (756, 'style_Horror'),
 (730, 'style_Long_Exposure'),
 (799, 'style_Macro'),
 (743, 'style_Melancholy'),
 (766, 'style_Minimal'),
 (765, 'style_Noir'),
 (761, 'style_Pastel'),
 (745, 'style_Romantic'),
 (765, 'style_Serene'),
 (727, 'style_Sunny'),
 (756, 'style_Vintage')]

Worker distribution



In [11]:

    
# total number of unique workers
unique_workers = answer_df.drop_duplicates( cols=['worker_id'] )['worker_id'].tolist()
print len( unique_workers )



In [12]:

    
answers_per_worker = [ len( answer_df[ answer_df['worker_id'] == worker_id ] ) 
                       for worker_id in unique_workers ]
answers_per_worker = pd.DataFrame( answers_per_worker, index=unique_workers, columns=['count'] )
answers_per_worker.sort( columns='count', ascending=False, inplace=True )



In [13]:

    
answers_per_worker.head(15)









    Out[13]:






  
    
      
      count
    
  
  
    
      A1QCQE5XW37LN1
       4040
    
    
      A9K0CV70JWG1W
       3717
    
    
      A3JG1P42AS7WKH
       3134
    
    
      A18TXM6GMU1411
       3106
    
    
      A1HGC1W5EWXUYS
       3105
    
    
      A2CSQNLTE06WZS
       3087
    
    
      A340DZFTU1OSQQ
       3036
    
    
      AN3KR7C1SLYY3
       2863
    
    
      A9QYAH5BONH1W
       2449
    
    
      AEVU71Z2FDTUX
       2367
    
    
      ADJ9I7ZBFYFH7
       2352
    
    
      A18RDBJFY3QWZL
       2327
    
    
      AA7BTDZQNHW5
       2168
    
    
      A35Z1QH3JQEZG6
       2004
    
    
      A3DFJV1RD653DQ
       1997
    
  

15 rows × 1 columns



In [14]:

    
plt.rcParams['figure.figsize'] = ( 16, 8 )

plt.plot( range( len( answers_per_worker ) ), answers_per_worker['count'] )
plt.ylabel( 'num answers' )
plt.xlabel( 'worker' )









    Out[14]:





<matplotlib.text.Text at 0x118d94790>

Assignment answer distribution



In [15]:

    
assignments = answer_df.drop_duplicates( cols=['assignment_id'] )['assignment_id'].tolist()
print len( assignments )

assign_means = []
for assign in assignments:
    assign_df = answer_df[ answer_df['assignment_id'] == assign ]

    tag_col = assignment_df_tag( assign_df )

    assign_means += [ assign_df[ tag_col ].astype( 'int' ).mean() ]









    



18920






    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-15-2ecff3bba77c> in <module>()
      4 assign_means = []
      5 for assign in assignments:
----> 6     assign_df = answer_df[ answer_df['assignment_id'] == assign ]
      7 
      8     tag_col = assignment_df_tag( assign_df )

/Users/sergeyk/anaconda/lib/python2.7/site-packages/pandas/core/ops.pyc in wrapper(self, other)
    561 
    562             # scalars
--> 563             res = na_op(values, other)
    564             if np.isscalar(res):
    565                 raise TypeError('Could not compare %s type with Series'

/Users/sergeyk/anaconda/lib/python2.7/site-packages/pandas/core/ops.pyc in na_op(x, y)
    524                     result = lib.vec_compare(x, y, op)
    525             else:
--> 526                 result = lib.scalar_compare(x, y, op)
    527         else:
    528 

KeyboardInterrupt:



In [ ]:

    
print len( assign_means )
print assign_means[:10]



In [ ]:

    
plt.rcParams['figure.figsize'] = ( 8, 6 )

plt.hist( assign_means, bins=10, range=(0,1) )
plt.xlabel( 'percent answered true' )
plt.ylabel( 'number of assignments' )
axs = plt.axis()
axs = [ 0, 1 ] + list( axs[2:] )
plt.axis( axs )

In this plot, we've looked at each assignment individually, computed the perecent of the 10 questions that were answered true. This plot is a histogram of those percentages. Across all assignments, we see a pretty good distribution centered around 50% of the images being of the style, with a slight bias towards less than half.

Assignment outliers



In [16]:

    
assign_allone_df = pd.DataFrame()

for assign in assignments:
    assign_df = answer_df[ answer_df['assignment_id'] == assign ]
    
    tag_col = assignment_df_tag( assign_df )

    update = False
    if len( assign_df[ assign_df[tag_col] == True ] ) is len( assign_df ):
        update = True
    if len( assign_df[ assign_df[tag_col] == False ] ) is len( assign_df ):
        update = True
        
    if update:
        assign_allone_df = pd.concat( [ assign_allone_df, assign_df ] )









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-16-2ed9281121ba> in <module>()
      2 
      3 for assign in assignments:
----> 4     assign_df = answer_df[ answer_df['assignment_id'] == assign ]
      5 
      6     tag_col = assignment_df_tag( assign_df )

/Users/sergeyk/anaconda/lib/python2.7/site-packages/pandas/core/ops.pyc in wrapper(self, other)
    552         else:
    553 
--> 554             mask = isnull(self)
    555 
    556             values = self.values

/Users/sergeyk/anaconda/lib/python2.7/site-packages/pandas/core/common.pyc in isnull(obj)
    123         given which of the element is null.
    124     """
--> 125     return _isnull(obj)
    126 
    127 

/Users/sergeyk/anaconda/lib/python2.7/site-packages/pandas/core/common.pyc in _isnull_new(obj)
    133         raise NotImplementedError("isnull is not defined for MultiIndex")
    134     elif isinstance(obj, (ABCSeries, np.ndarray)):
--> 135         return _isnull_ndarraylike(obj)
    136     elif isinstance(obj, ABCGeneric):
    137         return obj._constructor(obj._data.apply(lambda x: isnull(x.values)))

/Users/sergeyk/anaconda/lib/python2.7/site-packages/pandas/core/common.pyc in _isnull_ndarraylike(obj)
    210         else:
    211             result = np.empty(shape, dtype=bool)
--> 212             vec = lib.isnullobj(values.ravel())
    213             result[:] = vec.reshape(shape)
    214 

KeyboardInterrupt:



In [ ]:

    
assign_allone_df.head()



In [ ]:

    
assign_allone_df = assign_allone_df.drop_duplicates( cols=['assignment_id'] )
assign_allone_df



In [ ]:

    
allone_assigns = [ ( row['worker_id'], assignment_series_tag( row ) ) 
                   for index, row in assign_allone_df.iterrows() ]
allone_assigns

I'd be interested in any other suggestions on how to validate worker answers.

Per-style answer distribution



In [17]:

    
style_means = []

for sn in style_cols:
    
    tn = 'tagged_' + sn[6:]
    style_df = answer_df[ answer_df[tn].notnull() ]
    
    assignments = style_df.drop_duplicates( cols=['assignment_id'] )['assignment_id'].tolist()
    
    means = [ style_df[ style_df['assignment_id'] == assign ][tn].astype( 'int' ).mean()
              for assign in assignments ]
        
    style_means += [ means ]



In [18]:

    
print len( style_means )



In [19]:

    
plt.rcParams['figure.figsize'] = ( 16, 18 )

for i, m in enumerate( style_means ):
    plt.subplot( 6, 3, i )
    plt.title( style_cols[i][6:] )
    plt.ylabel( 'n. assignments' )
    plt.hist( m, bins=10, range=(0,1) )
    axs = plt.axis()
    axs = [ 0, 1 ] + list( axs[2:] )
    plt.axis( axs )

Plots similar to the above all-assignment plot, but broken down by style. We selected all the assignments corresponding to a given style, computed the percent tagged true in each assignment, and created a histogram.

Most styles are pretty well distributed. Long_Exposure, Pastel, Romantic and Melancholy all have biases toward marking less that 50% true, while several others have slight biases towards marking more than 50% true.

Per-worker answer distribution



In [ ]:

    
turker_means = []

for turker in answers_per_worker.index.tolist():
    
    worker_df = answer_df[ answer_df['worker_id'] == turker ]
    
    assignments = worker_df.drop_duplicates( cols=['assignment_id'] )['assignment_id'].tolist()
    
    means = []
    for assign in assignments:
        assign_df = worker_df[ worker_df['assignment_id'] == assign ]
        assign_df = assign_df.dropna( axis=1 )
        
        tag_cols = [name for name in assign_df.columns.tolist() if 'tagged' in name]
    
        means += [ assign_df[ tag_cols[0] ].astype( 'int' ).mean() ]
        
    turker_means += [ means ]



In [ ]:

    
plt.rcParams['figure.figsize'] = ( 16, 16 )

for i, m in enumerate( turker_means[:15] ):
    plt.subplot( 6, 3, i+1 )
    plt.title( unique_workers[i] + ' : ' + str(len(m)) )
    plt.ylabel( 'n. assignments' )
    plt.hist( m, bins=5, range=(0,1) )
    axs = plt.axis()
    axs = [ 0, 1 ] + list( axs[2:] )
    plt.axis( axs )

Same style of plot, but broken down by turker, showing the top 15 turkers, all of which completed at least 10 HITs. Again, a strong central distribution.

Correctly answered



In [20]:

    
flickr_df = vislab.datasets.flickr.get_df()



In [21]:

    
sn = style_cols[0]
tn = 'tagged_' + sn[6:]
print sn, tn









    



style_Bright tagged_Bright



In [22]:

    
print len( result_df )
print len( result_df[ result_df[tn].isnull() ] )
print len( result_df ) - len( result_df[ result_df[tn].isnull() ] )



In [23]:

    
tag_df = result_df.dropna( subset=[tn] )



In [24]:

    
tag_df.head(9)[ [ 'assignment_id', 'hit_id', 'worker_id', tn, sn ] ]









    Out[24]:






  
    
      
      assignment_id
      hit_id
      worker_id
      tagged_Bright
      style_Bright
    
  
  
    
      10041013705
       3ZQIG0FLQEFNESIQK041HH9SVMKVWZ
       3ECKRY5B1PVK0X516EKNCCHX0T8IZX
       A2H7BU993SYRJG
        True
       False
    
    
      10050082426
       3X65QVEQI0MB4P0RP52UIE4NC7CLC7
       34KYK9TV2Q7DCD5H5CYYUWGHIOMSBR
       A3ORE2BNURPVNI
        True
       False
    
    
      10065812373
       3KB8R4ZV1E6C8SUK0E9JU2TXY6HBGD
       306W7JMRYXXVTTQ8RESTD5GG6UI8BA
       A1WR2AKX164FZO
        True
       False
    
    
      10072845645
       3G2UL9A02DDN9N2OBKOHD9UCLQM67V
       3ULIZ0H1V94BTDPERJS1V9TGIPZ51P
       A1OVDEQPK8O5PG
       False
       False
    
    
      10074144685
       3IXEICO792IA7LZVONRII5MUAZ4T69
       32204AGAAABKDLZCA4EDRT0CPQZGHD
        AQOYZ1NB7R5UY
        True
       False
    
    
      10077053056
       3ZPBJO59KP0JNLNFYGI7ASTYGQCHDT
       3LAZVA75NHQ4Q8DSF2OE9Y3CRT72O3
       A1QCQE5XW37LN1
       False
       False
    
    
      10083205423
       3DUZQ9U6SMNU7B1H0VU0KS67P40VSM
       3S4TINXCCZMYELDOYVHPAR6GLX7BOX
         AA7BTDZQNHW5
        True
       False
    
    
      10089985664
       3WYP994K17Q6O7YPYTEHQME9P1O6YD
       3XEDXEGFX2NE47PZ2NS7JTTOAQF0KZ
       A27QLSB53XTM7U
       False
       False
    
    
      10105089606
       34BBWHLWHAAISMYQLT5I2SJYNKGWIV
       3LN3BXKGCZU2ABU1530DSWAZ4EBWGS
       A18TCR555RWUZV
        True
       False
    
  

9 rows × 5 columns



In [25]:

    
len( tag_df.dropna( subset=[sn] ) )









    Out[25]:





1558



In [29]:

    
pred_list = tag_df[tn].astype(int)
truth_list = tag_df[sn].astype(int)
print len( pred_list ) == len( truth_list )
print np.array( [ p == t for p,t in zip( pred_list.index, truth_list.index )] ).all()









    



True
True



In [30]:

    
import sklearn.metrics



In [31]:

    
print sklearn.metrics.accuracy_score( truth_list, pred_list )
print sklearn.metrics.precision_recall_fscore_support( truth_list, pred_list, pos_label=1, average='micro' )









    



0.68485237484
(0.59713168187744459, 0.71562499999999996, 0.65103056147832272, 640)



In [43]:

    
result_df.shape









    Out[43]:





(14721, 63)



In [41]:

    
acc_l = []
pre_l = []
rec_l = []
len_l = []
cor_l = []

for sn in style_cols:
    
    tn = 'tagged_' + sn[6:]
    
    tag_df = test_df.dropna( subset=[tn] )
    
    pred_list = tag_df[tn].astype(int)
    truth_list = tag_df[sn].astype(int)
    
    acc = sklearn.metrics.accuracy_score( truth_list, pred_list )
    prec_rec = sklearn.metrics.precision_recall_fscore_support( truth_list, pred_list, pos_label=1, average='micro' )
    
    acc_l += [ acc ]
    pre_l += [ prec_rec[0] ]
    rec_l += [ prec_rec[1] ]
    len_l += [ len( truth_list ) ]
    cor_l += [ len( [ t for t in truth_list if t ] ) / float( len( truth_list ) ) ]

d = {
    'accuracy': pd.Series( acc_l, index=style_cols ),
    'precision': pd.Series( pre_l, index=style_cols ),
    'recall':  pd.Series( rec_l, index=style_cols ),
    '_length':  pd.Series( len_l, index=style_cols ),
    '_%true': pd.Series( cor_l, index=style_cols ),
}



In [40]:

    
acc_df = pd.DataFrame( d )
# acc_df.to_hdf('/Users/sergeyk/Dropbox/mturk-results/acc_df.h5', 'df', mode='w')
acc_df









    Out[40]:






  
    
      
      _%true
      _length
      accuracy
      precision
      recall
    
  
  
    
      style_Bright
       0.410783
       1558
       0.684852
       0.597132
       0.715625
    
    
      style_Depth_of_Field
       0.404703
       1616
       0.702351
       0.626647
       0.654434
    
    
      style_Detailed
       0.397460
       1575
       0.643175
       0.538835
       0.709265
    
    
      style_Ethereal
       0.428144
       1670
       0.777844
       0.773885
       0.679720
    
    
      style_Geometric_Composition
       0.415640
       1624
       0.829433
       0.817891
       0.758519
    
    
      style_HDR
       0.405292
       1663
       0.763680
       0.843521
       0.511869
    
    
      style_Hazy
       0.389941
       1690
       0.842012
       0.807210
       0.781487
    
    
      style_Horror
       0.393692
       1712
       0.907126
       0.913323
       0.844214
    
    
      style_Long_Exposure
       0.398401
       1501
       0.772818
       0.828645
       0.541806
    
    
      style_Macro
       0.404895
       1961
       0.917899
       0.858437
       0.954660
    
    
      style_Melancholy
       0.398765
       1620
       0.696296
       0.628333
       0.583591
    
    
      style_Minimal
       0.407662
       1749
       0.795883
       0.709412
       0.845722
    
    
      style_Noir
       0.401153
       1735
       0.840922
       0.863322
       0.716954
    
    
      style_Pastel
       0.408798
       1705
       0.714370
       0.756098
       0.444763
    
    
      style_Romantic
       0.424516
       1550
       0.658065
       0.778261
       0.272036
    
    
      style_Serene
       0.441358
       1620
       0.694444
       0.638889
       0.707692
    
    
      style_Sunny
       0.409925
       1471
       0.852481
       0.790663
       0.870647
    
    
      style_Vintage
       0.405965
       1643
       0.706634
       0.672897
       0.539730
    
  

18 rows × 5 columns

Representative images



In [108]:

    
sn = style_cols[1]
tn = 'tagged_' + sn[6:]
cn = 'conf_' + tn[7:]
print sn, tn, cn









    



style_Depth_of_Field tagged_Depth_of_Field conf_Depth_of_Field



In [109]:

    
tag_df = result_df.dropna( subset=[tn] )



In [110]:

    
images = tag_df.index.unique()
print len( images )
print images[:10]









    



1080
[u'10021507425' u'10022874434' u'10056442674' u'10060247253' u'10061889464'
 u'10078352273' u'10098154893' u'10116691656' u'10144729766' u'10224811456']



In [111]:

    
img = images[2]



In [112]:

    
temp_df = tag_df.ix[ img ]



In [113]:

    
print len( temp_df )
print len( temp_df[ temp_df[tn] == True ] )



In [114]:

    
tag_df.ix[img][cn]









    Out[114]:





0.0



In [115]:

    
true_imgs = []
for i in range(4):
    true_imgs += [[]]

for img in images:
    
    cn = 'conf_' + tn[7:]
    
    true_len = int( tag_df.ix[img][cn] * 3 )
    
    true_imgs[ true_len ] += [ img ]



In [116]:

    
dis_html = '''
<table>
<tr>
<td width="50px">{}</td>
<td><img src="{}"></td>
<td><img src="{}"></td>
<td><img src="{}"></td>
<td><img src="{}"></td>
</tr>
</table>'''



In [117]:

    
from IPython.display import HTML



In [118]:

    
img_urls = tag_df.ix[ true_imgs[2][:4] ]['image_url'].unique()
img_urls









    Out[118]:





array(['http://farm8.staticflickr.com/7454/10375133286_3575007e0e.jpg',
       'http://farm3.staticflickr.com/2894/10422429886_a1e6988011.jpg',
       'http://farm8.staticflickr.com/7311/10890133193_f87b7ca2e5.jpg',
       'http://farm4.staticflickr.com/3768/11002493336_e281b1de3e.jpg'], dtype=object)



In [119]:

    
HTML( dis_html.format( *( ['2 true'] + list( img_urls ) ) ) )









    Out[119]:







2 true



In [120]:

    
def ranked_images_for_style( sn ):
    
    tn = 'tagged_' + sn[6:]
    cn = 'conf_' + tn[7:]    
    tag_df = result_df.dropna( subset=[tn] )

    images = tag_df.index.unique()

    true_imgs = []
    for i in range(4):
        true_imgs += [[]]

    for img in images:

        temp_df = tag_df.ix[img]
        true_len = int( tag_df.ix[img][cn] * 3 )

        true_imgs[ true_len ] += [ img ]
        
    return true_imgs



In [121]:

    
def display_ranked_images( sn ):
    
    true_imgs = ranked_images_for_style( sn )
        
    tn = 'tagged_' + sn[6:]
    tag_df = result_df.dropna( subset=[tn] )
    
    answer = ''
    
    for i in range( 4 ):
        img_urls = tag_df.ix[ true_imgs[i][:4] ]['image_url'].unique()
        result = dis_html.format( *( ['{} true'.format(i)] + list( img_urls ) ) )
        answer += result
        
    return HTML( answer )



In [122]:

    
i = 0
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Bright






    







0 true








1 true








2 true








3 true



In [123]:

    
i = 1
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Depth_of_Field






    







0 true








1 true








2 true








3 true



In [124]:

    
i = 2
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Detailed






    







0 true








1 true








2 true








3 true



In [125]:

    
i = 3
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Ethereal






    







0 true








1 true








2 true








3 true



In [126]:

    
i = 4
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Geometric_Composition






    







0 true








1 true








2 true








3 true



In [127]:

    
i = 5
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_HDR






    







0 true








1 true








2 true








3 true



In [128]:

    
i = 6
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Hazy






    







0 true








1 true








2 true








3 true



In [129]:

    
i = 7
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Horror






    







0 true








1 true








2 true








3 true



In [130]:

    
i = 8
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Long_Exposure






    







0 true








1 true








2 true








3 true



In [131]:

    
i = 9
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Macro






    







0 true








1 true








2 true








3 true



In [132]:

    
i = 10
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Melancholy






    







0 true








1 true








2 true








3 true



In [133]:

    
i = 11
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Minimal






    







0 true








1 true








2 true








3 true



In [134]:

    
i = 12
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Noir






    







0 true








1 true








2 true








3 true



In [135]:

    
i = 13
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Pastel






    







0 true








1 true








2 true








3 true



In [136]:

    
i = 14
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Romantic






    







0 true








1 true








2 true








3 true



In [137]:

    
i = 15
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Serene






    







0 true








1 true








2 true








3 true



In [138]:

    
i = 16
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Sunny






    







0 true








1 true








2 true








3 true



In [139]:

    
i = 17
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )









    




style_Vintage






    







0 true








1 true








2 true








3 true



In [ ]:

	assignment_id	hit_id	tagged_Bright	tagged_Depth_of_Field	tagged_Detailed	tagged_Ethereal	tagged_Geometric_Composition	tagged_HDR	tagged_Hazy	tagged_Horror	tagged_Long_Exposure	tagged_Macro	tagged_Melancholy	tagged_Minimal	tagged_Noir	tagged_Pastel	tagged_Romantic	tagged_Serene	tagged_Sunny	tagged_Vintage
12750600883	3DZQRBDBSLEAV2ZYGLOU1XEOXCR3S0	3T2EL38U0LJ8C0T5I41BOTS8ILLXQB	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	False	NaN	NaN	NaN	NaN	...
3640743068	3DZQRBDBSLEAV2ZYGLOU1XEOXCR3S0	3T2EL38U0LJ8C0T5I41BOTS8ILLXQB	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	False	NaN	NaN	NaN	NaN	...
12664369395	3DZQRBDBSLEAV2ZYGLOU1XEOXCR3S0	3T2EL38U0LJ8C0T5I41BOTS8ILLXQB	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	False	NaN	NaN	NaN	NaN	...
11617034266	3DZQRBDBSLEAV2ZYGLOU1XEOXCR3S0	3T2EL38U0LJ8C0T5I41BOTS8ILLXQB	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	False	NaN	NaN	NaN	NaN	...
13317660883	3DZQRBDBSLEAV2ZYGLOU1XEOXCR3S0	3T2EL38U0LJ8C0T5I41BOTS8ILLXQB	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	False	NaN	NaN	NaN	NaN	...

	assignment_id	conf_Bright	conf_Depth_of_Field	conf_Detailed	conf_Ethereal	conf_Geometric_Composition	conf_HDR	conf_Hazy	conf_Horror	conf_Long_Exposure	conf_Macro	conf_Melancholy	conf_Minimal	conf_Noir	conf_Pastel	conf_Romantic	conf_Serene	conf_Sunny	conf_Vintage	hit_id
10001822913	3OXV7EAXLEP5841198LR0SIWGZA36J	NaN	NaN	NaN	0.333333	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.666667	NaN	NaN	NaN	NaN	0.333333	NaN	3EKZL9T8Y7L7W9J78IIU12FXRY5HC2	...
10005136676	3K9FOBBF2HIUVTXIEVB8YPYZBCHNLK	NaN	NaN	NaN	NaN	0	NaN	NaN	NaN	0	0	NaN	NaN	NaN	NaN	NaN	0.000000	NaN	NaN	3B0MCRZMBQTI3FQZW0C5UBGVC8WPPV	...
10005459576	37KGEN7NJ3PKPZOTM2H7JVCEG18PP5	NaN	NaN	NaN	NaN	0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	NaN	NaN	NaN	3GMLHYZ0LDWP4YQW497IH8W6OUQUYP	...
10008818046	3GLB5JMZFXU5NPS4EA0PPSHX1RVGDR	NaN	NaN	NaN	1.000000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	NaN	NaN	NaN	36818Z1KV2C49MFA9JX1RDOZ8Q6A3Q	...
10009428143	3DBQWDE4Y6XGT4UXM8V1H022PMJN5Y	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.666667	NaN	NaN	3BAKUKE49GB0Y0NCOQ54YAYCAAF1RW	...

	count
A1QCQE5XW37LN1	4040
A9K0CV70JWG1W	3717
A3JG1P42AS7WKH	3134
A18TXM6GMU1411	3106
A1HGC1W5EWXUYS	3105
A2CSQNLTE06WZS	3087
A340DZFTU1OSQQ	3036
AN3KR7C1SLYY3	2863
A9QYAH5BONH1W	2449
AEVU71Z2FDTUX	2367
ADJ9I7ZBFYFH7	2352
A18RDBJFY3QWZL	2327
AA7BTDZQNHW5	2168
A35Z1QH3JQEZG6	2004
A3DFJV1RD653DQ	1997

	assignment_id	hit_id	worker_id	tagged_Bright	style_Bright
10041013705	3ZQIG0FLQEFNESIQK041HH9SVMKVWZ	3ECKRY5B1PVK0X516EKNCCHX0T8IZX	A2H7BU993SYRJG	True	False
10050082426	3X65QVEQI0MB4P0RP52UIE4NC7CLC7	34KYK9TV2Q7DCD5H5CYYUWGHIOMSBR	A3ORE2BNURPVNI	True	False
10065812373	3KB8R4ZV1E6C8SUK0E9JU2TXY6HBGD	306W7JMRYXXVTTQ8RESTD5GG6UI8BA	A1WR2AKX164FZO	True	False
10072845645	3G2UL9A02DDN9N2OBKOHD9UCLQM67V	3ULIZ0H1V94BTDPERJS1V9TGIPZ51P	A1OVDEQPK8O5PG	False	False
10074144685	3IXEICO792IA7LZVONRII5MUAZ4T69	32204AGAAABKDLZCA4EDRT0CPQZGHD	AQOYZ1NB7R5UY	True	False
10077053056	3ZPBJO59KP0JNLNFYGI7ASTYGQCHDT	3LAZVA75NHQ4Q8DSF2OE9Y3CRT72O3	A1QCQE5XW37LN1	False	False
10083205423	3DUZQ9U6SMNU7B1H0VU0KS67P40VSM	3S4TINXCCZMYELDOYVHPAR6GLX7BOX	AA7BTDZQNHW5	True	False
10089985664	3WYP994K17Q6O7YPYTEHQME9P1O6YD	3XEDXEGFX2NE47PZ2NS7JTTOAQF0KZ	A27QLSB53XTM7U	False	False
10105089606	34BBWHLWHAAISMYQLT5I2SJYNKGWIV	3LN3BXKGCZU2ABU1530DSWAZ4EBWGS	A18TCR555RWUZV	True	False

	_%true	_length	accuracy	precision	recall
style_Bright	0.410783	1558	0.684852	0.597132	0.715625
style_Depth_of_Field	0.404703	1616	0.702351	0.626647	0.654434
style_Detailed	0.397460	1575	0.643175	0.538835	0.709265
style_Ethereal	0.428144	1670	0.777844	0.773885	0.679720
style_Geometric_Composition	0.415640	1624	0.829433	0.817891	0.758519
style_HDR	0.405292	1663	0.763680	0.843521	0.511869
style_Hazy	0.389941	1690	0.842012	0.807210	0.781487
style_Horror	0.393692	1712	0.907126	0.913323	0.844214
style_Long_Exposure	0.398401	1501	0.772818	0.828645	0.541806
style_Macro	0.404895	1961	0.917899	0.858437	0.954660
style_Melancholy	0.398765	1620	0.696296	0.628333	0.583591
style_Minimal	0.407662	1749	0.795883	0.709412	0.845722
style_Noir	0.401153	1735	0.840922	0.863322	0.716954
style_Pastel	0.408798	1705	0.714370	0.756098	0.444763
style_Romantic	0.424516	1550	0.658065	0.778261	0.272036
style_Serene	0.441358	1620	0.694444	0.638889	0.707692
style_Sunny	0.409925	1471	0.852481	0.790663	0.870647
style_Vintage	0.405965	1643	0.706634	0.672897	0.539730