NE PAS COMMITER CE NOTEBOOK AVEC LES MOTS DE PASSE EN CLAIR


In [1]:
%pylab inline
from common import RobotCamomile
from getpass import getpass
from tortilla.utils import bunchify
import numpy as np
from datetime import datetime


Populating the interactive namespace from numpy and matplotlib

In [7]:
client = RobotCamomile('http://api.mediaeval.niderb.fr', 'robot_label')


Password for robot_label: ········

In [8]:
corpus = client.getCorpusByName('mediaeval.test')
allLayer = client.getLayerByName(corpus, 'mediaeval.groundtruth.label.all')
consensusLayer = client.getLayerByName(corpus, 'mediaeval.groundtruth.label.consensus')

Load all annotations


In [6]:
allAnnotations = []
for medium, annotations in client.getAnnotations_iter(allLayer):
    allAnnotations += annotations


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-6-d0511ec54673> in <module>()
      1 allAnnotations = []
----> 2 for medium, annotations in client.getAnnotations_iter(allLayer):
      3     allAnnotations += annotations

/Users/bredin/Development/MediaEval/robots/common.pyc in getAnnotations_iter(self, layer, returns_id)
    227         for medium in media:
    228             annotations = self.getAnnotations(
--> 229                 layer=layer, medium=medium, returns_id=returns_id)
    230             yield medium, annotations
    231 

/Users/bredin/Python/mediaeval_robots/lib/python2.7/site-packages/camomile/client.pyc in f2(*args, **kwargs)
     44             return f1(*args, **kwargs)
     45         except requests.exceptions.HTTPError as e:
---> 46             error = e.response.json().get('error', None)
     47             if error:
     48                 if e.response.status_code < 500:

/Users/bredin/Python/mediaeval_robots/lib/python2.7/site-packages/requests/models.pyc in json(self, **kwargs)
    817                     # used.
    818                     pass
--> 819         return json.loads(self.text, **kwargs)
    820 
    821     @property

/usr/local/Cellar/python/2.7.10/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.pyc in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    336             parse_int is None and parse_float is None and
    337             parse_constant is None and object_pairs_hook is None and not kw):
--> 338         return _default_decoder.decode(s)
    339     if cls is None:
    340         cls = JSONDecoder

/usr/local/Cellar/python/2.7.10/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.pyc in decode(self, s, _w)
    364 
    365         """
--> 366         obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    367         end = _w(s, end).end()
    368         if end != len(s):

/usr/local/Cellar/python/2.7.10/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.pyc in raw_decode(self, s, idx)
    382             obj, end = self.scan_once(s, idx)
    383         except StopIteration:
--> 384             raise ValueError("No JSON object could be decoded")
    385         return obj, end

ValueError: No JSON object could be decoded

Load consensus


In [ ]:
consensusAnnotations = client.getAnnotations(consensusLayer)

In [ ]:
print len(consensusAnnotations), ' shots with consensus'
# same as above, but medium by medium consensusAnnotations = [] for medium, annotations in client.getAnnotations_iter(consensusLayer): consensusAnnotations += annotations

In [ ]:
consensus = {c.fragment: bunchify(c.data) for c in consensusAnnotations}

Stats


In [ ]:
# total number of annotations, per annotator
count = {}
# durations of each annotation, per annotator
duration = {}
# date of each annotation, per annotation
date = {}
# total number of annotations for which a consensus was reached, per annotator
countWithConsensus = {}
# total number of annotations for which dontKnow was selected at least once, per annotator
dontKnow = {}
# total number of annotations that differ from consensus, per annotator
bad = {}

for annotation in allAnnotations:
    
    # skip old annotations
    if 'log' not in annotation.data:
        continue
    
    # which shot was annotated?
    shot = annotation.fragment
    
    # who annotated this shot?
    annotator = annotation.data.annotator
    
    # how long did it take to produre this annotations?
    duration.setdefault(annotator, []).append(annotation.data.log.duration)

    # it may happen that
    if 'date' in annotation.data.log:
        date.setdefault(annotator, []).append(datetime.strptime(annotation.data.log.date, '%Y-%m-%dT%H:%M:%S.%fZ'))

    # number of annotations by this annotator
    count[annotator] = count.get(annotator, 0) + 1

    status = annotation.data.get('known', {})
    status['?unknown?'] = 'speakingFace' if annotation.data.unknown else 'noFace'

    if 'dontKnow' in status.values():
        dontKnow[annotator] = dontKnow.get(annotator, 0) + 1

    # number of annotations by this annotator for which a consensus was reached
    if shot not in consensus:
        continue

    countWithConsensus[annotator] = countWithConsensus.get(annotator, 0) + 1
    k = consensus[shot]
    if k == '':
        k = {}
    
    kSpeakingFace = set([personName for personName, decision in k.iteritems() if decision == 'speakingFace'])
    aSpeakingFace = set([personName for personName, decision in status.iteritems() if decision == 'speakingFace'])
    
    if kSpeakingFace != aSpeakingFace:
        missed = kSpeakingFace - aSpeakingFace
        falseAlarm = aSpeakingFace - kSpeakingFace
        bad[annotator] = bad.get(annotator, 0) + 1

In [ ]:
def timeline(date, annotator, m='2015/07/10', M=None):

    allDates = [date2num(d) for d in date[annotator]]

    m = int(date2num(datetime.strptime(m, '%Y/%m/%d')))
    if M is None:
        M = datetime.strftime(datetime.today(), '%Y/%m/%d')
    M = int(date2num(datetime.strptime(M, '%Y/%m/%d')) + 1)

    nDays = M-m
    figsize(nDays*5, 2)
    hist(allDates, bins=linspace(m, M, 24*(M-m)))
    xlim(m, M)
    ylim(0, 500)
    xticks(range(m, M), [datetime.strftime(num2date(d), '%Y/%m/%d') for d in range(m, M)]);
    title('{annotator} - {n} annotations'.format(annotator=annotator, n=len(date[annotator])))
    savefig('/tmp/{annotator}.png'.format(annotator=annotator))
    close()

In [ ]:
for annotator in count:
    totalNumberOfAnnotations = count[annotator]    
    estimatedAccuracy = 100. - 100. * bad.get(annotator, 0) / countWithConsensus[annotator]
    # estimatedAccuracy = 100. * countWithConsensus[annotator] /count[annotator] 
    dontKnowRatio = 100. * dontKnow.get(annotator, 0) / count[annotator]
    print annotator
    print totalNumberOfAnnotations, 'shots'
    print 'Consensus', countWithConsensus[annotator]
    print 'Quality {accuracy:.2f}%'.format(accuracy=estimatedAccuracy)
    print 'Skipped {dontKnow:.1f}%'.format(dontKnow=dontKnowRatio)
    timeline(date, annotator)
    print

In [ ]:
m='2015/07/10'
M=None
m = int(date2num(datetime.strptime(m, '%Y/%m/%d')))
if M is None:
   M = datetime.strftime(datetime.today(), '%Y/%m/%d')
M = int(date2num(datetime.strptime(M, '%Y/%m/%d')) + 1)

for annotator, D in date.iteritems():
    allDates = [date2num(d) for d in D]
    nByMinute, _ = np.histogram(allDates, bins=linspace(m, M, 24*6*5*(M-m)))
    print annotator, ceil(np.sum(nByMinute > 0) / (6.*5)), 'hours'