In [1]:
%pylab inline
from common import RobotCamomile
from getpass import getpass
from tortilla.utils import bunchify
import numpy as np
from datetime import datetime
In [7]:
client = RobotCamomile('http://api.mediaeval.niderb.fr', 'robot_label')
In [8]:
corpus = client.getCorpusByName('mediaeval.test')
allLayer = client.getLayerByName(corpus, 'mediaeval.groundtruth.label.all')
consensusLayer = client.getLayerByName(corpus, 'mediaeval.groundtruth.label.consensus')
In [6]:
allAnnotations = []
for medium, annotations in client.getAnnotations_iter(allLayer):
allAnnotations += annotations
In [ ]:
consensusAnnotations = client.getAnnotations(consensusLayer)
In [ ]:
print len(consensusAnnotations), ' shots with consensus'
In [ ]:
consensus = {c.fragment: bunchify(c.data) for c in consensusAnnotations}
In [ ]:
# total number of annotations, per annotator
count = {}
# durations of each annotation, per annotator
duration = {}
# date of each annotation, per annotation
date = {}
# total number of annotations for which a consensus was reached, per annotator
countWithConsensus = {}
# total number of annotations for which dontKnow was selected at least once, per annotator
dontKnow = {}
# total number of annotations that differ from consensus, per annotator
bad = {}
for annotation in allAnnotations:
# skip old annotations
if 'log' not in annotation.data:
continue
# which shot was annotated?
shot = annotation.fragment
# who annotated this shot?
annotator = annotation.data.annotator
# how long did it take to produre this annotations?
duration.setdefault(annotator, []).append(annotation.data.log.duration)
# it may happen that
if 'date' in annotation.data.log:
date.setdefault(annotator, []).append(datetime.strptime(annotation.data.log.date, '%Y-%m-%dT%H:%M:%S.%fZ'))
# number of annotations by this annotator
count[annotator] = count.get(annotator, 0) + 1
status = annotation.data.get('known', {})
status['?unknown?'] = 'speakingFace' if annotation.data.unknown else 'noFace'
if 'dontKnow' in status.values():
dontKnow[annotator] = dontKnow.get(annotator, 0) + 1
# number of annotations by this annotator for which a consensus was reached
if shot not in consensus:
continue
countWithConsensus[annotator] = countWithConsensus.get(annotator, 0) + 1
k = consensus[shot]
if k == '':
k = {}
kSpeakingFace = set([personName for personName, decision in k.iteritems() if decision == 'speakingFace'])
aSpeakingFace = set([personName for personName, decision in status.iteritems() if decision == 'speakingFace'])
if kSpeakingFace != aSpeakingFace:
missed = kSpeakingFace - aSpeakingFace
falseAlarm = aSpeakingFace - kSpeakingFace
bad[annotator] = bad.get(annotator, 0) + 1
In [ ]:
def timeline(date, annotator, m='2015/07/10', M=None):
allDates = [date2num(d) for d in date[annotator]]
m = int(date2num(datetime.strptime(m, '%Y/%m/%d')))
if M is None:
M = datetime.strftime(datetime.today(), '%Y/%m/%d')
M = int(date2num(datetime.strptime(M, '%Y/%m/%d')) + 1)
nDays = M-m
figsize(nDays*5, 2)
hist(allDates, bins=linspace(m, M, 24*(M-m)))
xlim(m, M)
ylim(0, 500)
xticks(range(m, M), [datetime.strftime(num2date(d), '%Y/%m/%d') for d in range(m, M)]);
title('{annotator} - {n} annotations'.format(annotator=annotator, n=len(date[annotator])))
savefig('/tmp/{annotator}.png'.format(annotator=annotator))
close()
In [ ]:
for annotator in count:
totalNumberOfAnnotations = count[annotator]
estimatedAccuracy = 100. - 100. * bad.get(annotator, 0) / countWithConsensus[annotator]
# estimatedAccuracy = 100. * countWithConsensus[annotator] /count[annotator]
dontKnowRatio = 100. * dontKnow.get(annotator, 0) / count[annotator]
print annotator
print totalNumberOfAnnotations, 'shots'
print 'Consensus', countWithConsensus[annotator]
print 'Quality {accuracy:.2f}%'.format(accuracy=estimatedAccuracy)
print 'Skipped {dontKnow:.1f}%'.format(dontKnow=dontKnowRatio)
timeline(date, annotator)
print
In [ ]:
m='2015/07/10'
M=None
m = int(date2num(datetime.strptime(m, '%Y/%m/%d')))
if M is None:
M = datetime.strftime(datetime.today(), '%Y/%m/%d')
M = int(date2num(datetime.strptime(M, '%Y/%m/%d')) + 1)
for annotator, D in date.iteritems():
allDates = [date2num(d) for d in D]
nByMinute, _ = np.histogram(allDates, bins=linspace(m, M, 24*6*5*(M-m)))
print annotator, ceil(np.sum(nByMinute > 0) / (6.*5)), 'hours'