In [28]:
%load_ext autoreload
%autoreload 2
import sys
import uuid
import math
import urllib
import urlparse
import time
import numpy as np
from boto.mturk.connection import *
from boto.mturk.question import ExternalQuestion
from boto.mturk.qualification import *
from boto.mturk.price import *
import pandas as pd
import vislab.datasets
In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
In [5]:
answer_df = pd.read_hdf( '/Users/sergeyk/Dropbox/mturk-results/mturk-answers.h5', 'df' )
print len( answer_df )
answer_df.head()
Out[5]:
In [45]:
result_df = pd.read_hdf( '/Users/sergeyk/Dropbox/mturk-results/mturk-results.h5', 'df' )
print len( result_df )
result_df.head()
Out[45]:
In [7]:
style_cols = [name for name in result_df.columns if 'style' in name]
style_cols.remove( 'style_Bokeh' )
style_cols.remove( 'style_Texture' )
print style_cols
In [8]:
def assignment_df_tag( df, drop=True ):
if drop: df = df.dropna( axis=1 )
return [name for name in df.columns.tolist() if 'tagged' in name][0]
def assignment_series_tag( ser, drop=True ):
if drop: ser = ser.dropna()
return [name for name in ser.index.tolist() if 'tagged' in name][0]
In [9]:
test_df = result_df[ result_df['_split'] == 'test' ]
In [10]:
[ ( len( test_df[ test_df[sn] == True ] ), sn ) for sn in style_cols ]
Out[10]:
In [11]:
# total number of unique workers
unique_workers = answer_df.drop_duplicates( cols=['worker_id'] )['worker_id'].tolist()
print len( unique_workers )
In [12]:
answers_per_worker = [ len( answer_df[ answer_df['worker_id'] == worker_id ] )
for worker_id in unique_workers ]
answers_per_worker = pd.DataFrame( answers_per_worker, index=unique_workers, columns=['count'] )
answers_per_worker.sort( columns='count', ascending=False, inplace=True )
In [13]:
answers_per_worker.head(15)
Out[13]:
In [14]:
plt.rcParams['figure.figsize'] = ( 16, 8 )
plt.plot( range( len( answers_per_worker ) ), answers_per_worker['count'] )
plt.ylabel( 'num answers' )
plt.xlabel( 'worker' )
Out[14]:
In [15]:
assignments = answer_df.drop_duplicates( cols=['assignment_id'] )['assignment_id'].tolist()
print len( assignments )
assign_means = []
for assign in assignments:
assign_df = answer_df[ answer_df['assignment_id'] == assign ]
tag_col = assignment_df_tag( assign_df )
assign_means += [ assign_df[ tag_col ].astype( 'int' ).mean() ]
In [ ]:
print len( assign_means )
print assign_means[:10]
In [ ]:
plt.rcParams['figure.figsize'] = ( 8, 6 )
plt.hist( assign_means, bins=10, range=(0,1) )
plt.xlabel( 'percent answered true' )
plt.ylabel( 'number of assignments' )
axs = plt.axis()
axs = [ 0, 1 ] + list( axs[2:] )
plt.axis( axs )
In this plot, we've looked at each assignment individually, computed the perecent of the 10 questions that were answered true. This plot is a histogram of those percentages. Across all assignments, we see a pretty good distribution centered around 50% of the images being of the style, with a slight bias towards less than half.
In [16]:
assign_allone_df = pd.DataFrame()
for assign in assignments:
assign_df = answer_df[ answer_df['assignment_id'] == assign ]
tag_col = assignment_df_tag( assign_df )
update = False
if len( assign_df[ assign_df[tag_col] == True ] ) is len( assign_df ):
update = True
if len( assign_df[ assign_df[tag_col] == False ] ) is len( assign_df ):
update = True
if update:
assign_allone_df = pd.concat( [ assign_allone_df, assign_df ] )
In [ ]:
assign_allone_df.head()
In [ ]:
assign_allone_df = assign_allone_df.drop_duplicates( cols=['assignment_id'] )
assign_allone_df
In [ ]:
allone_assigns = [ ( row['worker_id'], assignment_series_tag( row ) )
for index, row in assign_allone_df.iterrows() ]
allone_assigns
I'd be interested in any other suggestions on how to validate worker answers.
In [17]:
style_means = []
for sn in style_cols:
tn = 'tagged_' + sn[6:]
style_df = answer_df[ answer_df[tn].notnull() ]
assignments = style_df.drop_duplicates( cols=['assignment_id'] )['assignment_id'].tolist()
means = [ style_df[ style_df['assignment_id'] == assign ][tn].astype( 'int' ).mean()
for assign in assignments ]
style_means += [ means ]
In [18]:
print len( style_means )
In [19]:
plt.rcParams['figure.figsize'] = ( 16, 18 )
for i, m in enumerate( style_means ):
plt.subplot( 6, 3, i )
plt.title( style_cols[i][6:] )
plt.ylabel( 'n. assignments' )
plt.hist( m, bins=10, range=(0,1) )
axs = plt.axis()
axs = [ 0, 1 ] + list( axs[2:] )
plt.axis( axs )
Plots similar to the above all-assignment plot, but broken down by style. We selected all the assignments corresponding to a given style, computed the percent tagged true in each assignment, and created a histogram.
Most styles are pretty well distributed. Long_Exposure, Pastel, Romantic and Melancholy all have biases toward marking less that 50% true, while several others have slight biases towards marking more than 50% true.
In [ ]:
turker_means = []
for turker in answers_per_worker.index.tolist():
worker_df = answer_df[ answer_df['worker_id'] == turker ]
assignments = worker_df.drop_duplicates( cols=['assignment_id'] )['assignment_id'].tolist()
means = []
for assign in assignments:
assign_df = worker_df[ worker_df['assignment_id'] == assign ]
assign_df = assign_df.dropna( axis=1 )
tag_cols = [name for name in assign_df.columns.tolist() if 'tagged' in name]
means += [ assign_df[ tag_cols[0] ].astype( 'int' ).mean() ]
turker_means += [ means ]
In [ ]:
plt.rcParams['figure.figsize'] = ( 16, 16 )
for i, m in enumerate( turker_means[:15] ):
plt.subplot( 6, 3, i+1 )
plt.title( unique_workers[i] + ' : ' + str(len(m)) )
plt.ylabel( 'n. assignments' )
plt.hist( m, bins=5, range=(0,1) )
axs = plt.axis()
axs = [ 0, 1 ] + list( axs[2:] )
plt.axis( axs )
Same style of plot, but broken down by turker, showing the top 15 turkers, all of which completed at least 10 HITs. Again, a strong central distribution.
In [20]:
flickr_df = vislab.datasets.flickr.get_df()
In [21]:
sn = style_cols[0]
tn = 'tagged_' + sn[6:]
print sn, tn
In [22]:
print len( result_df )
print len( result_df[ result_df[tn].isnull() ] )
print len( result_df ) - len( result_df[ result_df[tn].isnull() ] )
In [23]:
tag_df = result_df.dropna( subset=[tn] )
In [24]:
tag_df.head(9)[ [ 'assignment_id', 'hit_id', 'worker_id', tn, sn ] ]
Out[24]:
In [25]:
len( tag_df.dropna( subset=[sn] ) )
Out[25]:
In [29]:
pred_list = tag_df[tn].astype(int)
truth_list = tag_df[sn].astype(int)
print len( pred_list ) == len( truth_list )
print np.array( [ p == t for p,t in zip( pred_list.index, truth_list.index )] ).all()
In [30]:
import sklearn.metrics
In [31]:
print sklearn.metrics.accuracy_score( truth_list, pred_list )
print sklearn.metrics.precision_recall_fscore_support( truth_list, pred_list, pos_label=1, average='micro' )
In [43]:
result_df.shape
Out[43]:
In [41]:
acc_l = []
pre_l = []
rec_l = []
len_l = []
cor_l = []
for sn in style_cols:
tn = 'tagged_' + sn[6:]
tag_df = test_df.dropna( subset=[tn] )
pred_list = tag_df[tn].astype(int)
truth_list = tag_df[sn].astype(int)
acc = sklearn.metrics.accuracy_score( truth_list, pred_list )
prec_rec = sklearn.metrics.precision_recall_fscore_support( truth_list, pred_list, pos_label=1, average='micro' )
acc_l += [ acc ]
pre_l += [ prec_rec[0] ]
rec_l += [ prec_rec[1] ]
len_l += [ len( truth_list ) ]
cor_l += [ len( [ t for t in truth_list if t ] ) / float( len( truth_list ) ) ]
d = {
'accuracy': pd.Series( acc_l, index=style_cols ),
'precision': pd.Series( pre_l, index=style_cols ),
'recall': pd.Series( rec_l, index=style_cols ),
'_length': pd.Series( len_l, index=style_cols ),
'_%true': pd.Series( cor_l, index=style_cols ),
}
In [40]:
acc_df = pd.DataFrame( d )
# acc_df.to_hdf('/Users/sergeyk/Dropbox/mturk-results/acc_df.h5', 'df', mode='w')
acc_df
Out[40]:
In [108]:
sn = style_cols[1]
tn = 'tagged_' + sn[6:]
cn = 'conf_' + tn[7:]
print sn, tn, cn
In [109]:
tag_df = result_df.dropna( subset=[tn] )
In [110]:
images = tag_df.index.unique()
print len( images )
print images[:10]
In [111]:
img = images[2]
In [112]:
temp_df = tag_df.ix[ img ]
In [113]:
print len( temp_df )
print len( temp_df[ temp_df[tn] == True ] )
In [114]:
tag_df.ix[img][cn]
Out[114]:
In [115]:
true_imgs = []
for i in range(4):
true_imgs += [[]]
for img in images:
cn = 'conf_' + tn[7:]
true_len = int( tag_df.ix[img][cn] * 3 )
true_imgs[ true_len ] += [ img ]
In [116]:
dis_html = '''
<table>
<tr>
<td width="50px">{}</td>
<td><img src="{}"></td>
<td><img src="{}"></td>
<td><img src="{}"></td>
<td><img src="{}"></td>
</tr>
</table>'''
In [117]:
from IPython.display import HTML
In [118]:
img_urls = tag_df.ix[ true_imgs[2][:4] ]['image_url'].unique()
img_urls
Out[118]:
In [119]:
HTML( dis_html.format( *( ['2 true'] + list( img_urls ) ) ) )
Out[119]:
In [120]:
def ranked_images_for_style( sn ):
tn = 'tagged_' + sn[6:]
cn = 'conf_' + tn[7:]
tag_df = result_df.dropna( subset=[tn] )
images = tag_df.index.unique()
true_imgs = []
for i in range(4):
true_imgs += [[]]
for img in images:
temp_df = tag_df.ix[img]
true_len = int( tag_df.ix[img][cn] * 3 )
true_imgs[ true_len ] += [ img ]
return true_imgs
In [121]:
def display_ranked_images( sn ):
true_imgs = ranked_images_for_style( sn )
tn = 'tagged_' + sn[6:]
tag_df = result_df.dropna( subset=[tn] )
answer = ''
for i in range( 4 ):
img_urls = tag_df.ix[ true_imgs[i][:4] ]['image_url'].unique()
result = dis_html.format( *( ['{} true'.format(i)] + list( img_urls ) ) )
answer += result
return HTML( answer )
In [122]:
i = 0
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [123]:
i = 1
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [124]:
i = 2
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [125]:
i = 3
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [126]:
i = 4
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [127]:
i = 5
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [128]:
i = 6
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [129]:
i = 7
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [130]:
i = 8
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [131]:
i = 9
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [132]:
i = 10
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [133]:
i = 11
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [134]:
i = 12
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [135]:
i = 13
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [136]:
i = 14
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [137]:
i = 15
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [138]:
i = 16
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [139]:
i = 17
sn = style_cols[i]
display( HTML( '''<h1>{}</h1>'''.format(sn) ) )
display( display_ranked_images( sn ) )
In [ ]: