In [1]:
import numpy as np
In [2]:
np.random.seed(42)
In [3]:
y_true = np.random.randint(0,2, size=5)
y_true
Out[3]:
In [4]:
# For the sake of argument, let's say the classifier is not
# very start, and always predicts label 1.
# We can mock this behavior by hard-coding the
# prediction labels:
y_pred = np.ones(5, dtype=np.int32)
y_pred
Out[4]:
In [5]:
# A naive implementation of an accuracy metric might
# sum up all occurrences where the predicted class label
# matched the true class label:
np.sum(y_true == y_pred) / len(y_true)
Out[5]:
In [6]:
from sklearn import metrics
In [7]:
metrics.accuracy_score(y_true, y_pred)
Out[7]:
In [11]:
# We have a true positive where the true label is a 1
# and we also predicted a 1
truly_a_positive = (y_true == 1)
In [12]:
predicted_a_positive = (y_pred == 1)
In [14]:
true_positive = np.sum(predicted_a_positive *\
truly_a_positive)
In [15]:
# Similarle, a false positive is where we predicted a 1
# but the ground truth was really a 0
false_positive = np.sum((y_pred == 1) *\
(y_true == 0))
false_positive
Out[15]:
In [17]:
# Our not-so-smart classifier never predicted 0
# so (y_pred == 0) should never be true
false_negative = np.sum((y_pred == 0) *\
(y_true == 1))
false_negative
Out[17]:
In [18]:
true_negative = np.sum((y_pred == 0) *\
(y_true == 0))
true_negative
Out[18]:
In [19]:
# Accuracy should be the number of true positives
# plus the number of true negatives (that is, everything
# we got right) divided by the ttal number
# of data points
accuracy = (true_positive + true_negative) / len(y_true)
accuracy
Out[19]:
In [20]:
# Precsion is then given as the number of true positives
# divided by the number of all true predictions
precision = true_positive / (true_positive + true_negative)
precision
Out[20]:
In [21]:
metrics.precision_score(y_true, y_pred)
Out[21]:
In [22]:
# Finally, recall is given as the fractin of all positives
# that we correctly classified as positives
recall = true_positive / (true_positive + false_negative)
recall
Out[22]:
In [23]:
metrics.recall_score(y_true, y_pred)
Out[23]:
In [24]:
# Let's create another mock-up dataset
x = np.linspace(0, 10, 100)
In [25]:
# Adding noise
y_true = np.sin(x) + np.random.rand(x.size) - 0.5
In [26]:
# The predicted *y* values are given as follows
y_pred = np.sin(x)
In [28]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
In [29]:
plt.plot(x, y_pred, linewidth=4, label='model')
plt.plot(x, y_true, 'o', label='data')
plt.xlabel('x')
plt.ylabel('y')
plt.legend(loc='lower left')
Out[29]:
In [30]:
mse = np.mean((y_true - y_pred) ** 2)
mse
Out[30]:
In [31]:
metrics.mean_squared_error(y_true, y_pred)
Out[31]:
In [32]:
# fraction of variance unexplained
fvu = np.var(y_true - y_pred) / np.var(y_true)
fvu
Out[32]:
In [33]:
# Fraction of variance explained
fve = 1.0 - fvu
fve
Out[33]:
In [34]:
metrics.explained_variance_score(y_true, y_pred)
Out[34]:
In [35]:
r2 = 1.0 - mse / np.var(y_true)
r2
Out[35]:
In [36]:
metrics.r2_score(y_true, y_pred)
Out[36]:
In [37]:
metrics.r2_score(y_true, np.mean(y_true) * np.ones_like(y_true))
Out[37]:
In [ ]: