In [2]:
import numpy as np
In [3]:
ones = np.array([1,0,0,1])
In [4]:
[1 if elem == 0 else 0 for elem in ones]
Out[4]:
In [5]:
a_set = np.unique(np.array([1,1,0,0,1,0]))
In [19]:
np.unique(np.array([1,1,0,0,1,0]).ravel()) == np.array([0,1])
Out[19]:
In [37]:
valid_elems = [0,1]
In [38]:
elemts = np.unique(np.array([1,1.0,0,0.0,1,0]).ravel())
elems_are_valid = [True if elem in valid_elems else False for elem in elemts]
np.array(elems_are_valid).all()
Out[38]:
In [36]:
np.unique(np.array([0,0,0,0,0,0])) == np.array([0]).all()
Out[36]:
In [54]:
# original code
def ranking_precision_score(y_true, y_score, k=None):
"""Precision at rank k
Parameters
----------
y_true : array-like, shape = [n_samples]
Ground truth (true relevance labels).
y_score : array-like, shape = [n_samples]
Predicted scores.
k : int
Rank.
Returns
-------
precision @k : float
"""
unique_y = np.unique(y_true)
if len(unique_y) > 2:
raise ValueError("Only supported for two relevance levels.")
# edited by felipe:
# just assume the true value is 1 and false is zero
# otherwise we get errors when, for instance, the label array is [1,1,1,1]
# pos_label = unique_y[1]
pos_label = 1
n_pos = np.sum(y_true == pos_label)
order = np.argsort(y_score)[::-1]
y_true = np.take(y_true, order[:k])
n_relevant = np.sum(y_true == pos_label)
# Divide by min(n_pos, k) such that the best achievable score is always 1.0.
# return float(n_relevant) / min(n_pos, k)
return float(n_relevant) / k
In [55]:
scores = np.array([
[0.5,0.4,0.1],
[0.0,0.8,0.2]
])
actual = np.array([
[1,0,0],
[1,0,1]
])
print(ranking_precision_score(actual[0], scores[0], k=2))
print(ranking_precision_score(actual[1], scores[1], k=2))
In [56]:
scores = np.array([
[0.1, 0.4, 0.35, 0.8]
])
actual = np.array([
[0,0,1,1]
])
print(ranking_precision_score(actual[0], scores[0], k=4))
In [ ]: