In [45]:
%matplotlib inline
from __future__ import division, print_function
import numpy as np
import SHS_data
def evaluate_query(query, retrieved, correct_uris=None):
"""Evaluate retrieval results for a given query.
Args:
query (str): query URI
retrieved (list): ordered list of top k retrieved documents
(k can be anything)
clique_uris (list): list of documents to be found. Set to None
to look up using ID, pass a list to save i/o time.
Returns:
dict: dictionary of results with evaluation metrics as keys.
currently implemented:
- ap (average precision)
- precision at 1
- recall at 5
"""
...
More elegant average precision formulation than currently used (but not very usable):
precisions = [__precision__(ranks[:i+1], ranks[i]) for i in range(len(ranks))]
In [126]:
import evaluation
reload(evaluation)
cliques_by_name, cliques_by_uri = SHS_data.read_cliques()
# QUERY
n_test = 1988
test_clique = cliques_by_name.keys()[n_test]
print('CLIQUE: ' + test_clique)
# CORRECT URIS
test_uris = cliques_by_name[test_clique]
test_query, test_correct = test_uris[0], test_uris[1:]
print('QUERY: ' + test_query)
print('CORRECT: {}'.format(test_correct))
# INCORRECT URIS
test_incorrect = cliques_by_name.values()[n_test+1]
print('test_incorrect: {}'.format(test_incorrect))
# RETRIEVED
# test_retrieved = test_incorrect + test_correct # expect (0.25, 0, 1) for n_test = 1988
# test_retrieved = test_correct + test_incorrect # expect (1, 1, 1)
# test_retrieved = test_incorrect # expect (0, 0, 0)
# test_retrieved = test_correct # expect (1, 1, 1)
# test_retrieved = [] # expect (0, 0, 0)
print('RETRIEVED: {}'.format(test_retrieved))
# RESULT
res = evaluation.evaluate_query(test_query, test_retrieved)
print('RESULT: {}'.format(res))
In [ ]: