In [ ]:
# hide
%load_ext autoreload
%autoreload 2

Vespa - Evaluate query models

Define metrics and evaluate query models

Example setup

Connect to the application and define a query model.


In [ ]:
from vespa.application import Vespa
from vespa.query import Query, RankProfile, OR

app = Vespa(url = "https://api.cord19.vespa.ai")
query_model = Query(
    match_phase = OR(),
    rank_profile = RankProfile(name="bm25", list_features=True))

Define some labelled data.


In [ ]:
labelled_data = [
    {
        "query_id": 0, 
        "query": "Intrauterine virus infections and congenital heart disease",
        "relevant_docs": [{"id": 0, "score": 1}, {"id": 3, "score": 1}]
    },
    {
        "query_id": 1, 
        "query": "Clinical and immunologic studies in identical twins discordant for systemic lupus erythematosus",
        "relevant_docs": [{"id": 1, "score": 1}, {"id": 5, "score": 1}]
    }
]

Define metrics


In [ ]:
from vespa.evaluation import MatchRatio, Recall, ReciprocalRank

eval_metrics = [MatchRatio(), Recall(at=10), ReciprocalRank(at=10)]

Evaluate in batch


In [ ]:
evaluation = app.evaluate(
    labelled_data = labelled_data,
    eval_metrics = eval_metrics, 
    query_model = query_model, 
    id_field = "id",
)
evaluation


Out[ ]:
query_id match_ratio_retrieved_docs match_ratio_docs_available match_ratio_value recall_10_value reciprocal_rank_10_value
0 0 52526 58692 0.894943 0 0
1 1 54048 58692 0.920875 0 0

Evaluate specific query

You can have finer control with the evaluate_query method.


In [ ]:
from pandas import concat, DataFrame

evaluation = []
for query_data in labelled_data:
    query_evaluation = app.evaluate_query(
        eval_metrics = eval_metrics, 
        query_model = query_model, 
        query_id = query_data["query_id"], 
        query = query_data["query"], 
        id_field = "id",
        relevant_docs = query_data["relevant_docs"],
        default_score = 0
    )
    evaluation.append(query_evaluation)
evaluation = DataFrame.from_records(evaluation)
evaluation


Out[ ]:
query_id match_ratio_retrieved_docs match_ratio_docs_available match_ratio_value recall_10_value reciprocal_rank_10_value
0 0 52526 58692 0.894943 0 0
1 1 54048 58692 0.920875 0 0