notebook.community

Edit and run



In [1]:

    
import numpy as np



In [2]:

    
np.random.seed(42)



In [3]:

    
y_true = np.random.randint(0,2, size=5)
y_true









    Out[3]:





array([0, 1, 0, 0, 0])



In [4]:

    
# For the sake of argument, let's say the classifier is not
# very start, and always predicts label 1. 
# We can mock this behavior by hard-coding the 
# prediction labels:

y_pred = np.ones(5, dtype=np.int32)
y_pred









    Out[4]:





array([1, 1, 1, 1, 1])



In [5]:

    
# A naive implementation of an accuracy metric might
# sum up all occurrences where the predicted class label
# matched the true class label:

np.sum(y_true == y_pred) / len(y_true)









    Out[5]:





0.2



In [6]:

    
from sklearn import metrics



In [7]:

    
metrics.accuracy_score(y_true, y_pred)









    Out[7]:





0.2



In [11]:

    
# We have a true positive where the true label is a 1
# and we also predicted a 1
truly_a_positive = (y_true == 1)



In [12]:

    
predicted_a_positive = (y_pred == 1)



In [14]:

    
true_positive = np.sum(predicted_a_positive *\
                       truly_a_positive)

true_positive



In [15]:

    
# Similarle, a false positive is where we predicted a 1 
# but the ground truth was really a 0
false_positive = np.sum((y_pred == 1) *\
                       (y_true == 0))
false_positive









    Out[15]:





4



In [17]:

    
# Our not-so-smart classifier never predicted 0
# so (y_pred == 0) should never be true
false_negative = np.sum((y_pred == 0) *\
                       (y_true == 1))
false_negative









    Out[17]:





0



In [18]:

    
true_negative = np.sum((y_pred == 0) *\
                      (y_true == 0))
true_negative









    Out[18]:





0



In [19]:

    
# Accuracy should be the number of true positives
# plus the number of true negatives (that is, everything
# we got right) divided by the ttal number
# of data points
accuracy = (true_positive + true_negative) / len(y_true)
accuracy









    Out[19]:





0.2



In [20]:

    
# Precsion is then given as the number of true positives 
# divided by the number of all true predictions
precision = true_positive / (true_positive + true_negative)
precision









    Out[20]:





1.0



In [21]:

    
metrics.precision_score(y_true, y_pred)









    Out[21]:





0.2



In [22]:

    
# Finally, recall is given as the fractin of all positives
# that we correctly classified as positives
recall = true_positive / (true_positive + false_negative)
recall









    Out[22]:





1.0



In [23]:

    
metrics.recall_score(y_true, y_pred)









    Out[23]:





1.0



In [24]:

    
# Let's create another mock-up dataset
x = np.linspace(0, 10, 100)



In [25]:

    
# Adding noise
y_true = np.sin(x) + np.random.rand(x.size) - 0.5



In [26]:

    
# The predicted *y* values are given as follows
y_pred = np.sin(x)



In [28]:

    
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline



In [29]:

    
plt.plot(x, y_pred, linewidth=4, label='model')
plt.plot(x, y_true, 'o', label='data')
plt.xlabel('x')
plt.ylabel('y')
plt.legend(loc='lower left')









    Out[29]:





<matplotlib.legend.Legend at 0x24c82511cf8>



In [30]:

    
mse = np.mean((y_true - y_pred) ** 2)
mse









    Out[30]:





0.08531839480842378



In [31]:

    
metrics.mean_squared_error(y_true, y_pred)









    Out[31]:





0.08531839480842378



In [32]:

    
# fraction of variance unexplained
fvu = np.var(y_true - y_pred) / np.var(y_true)
fvu









    Out[32]:





0.163970326266295



In [33]:

    
# Fraction of variance explained
fve = 1.0 - fvu
fve









    Out[33]:





0.836029673733705



In [34]:

    
metrics.explained_variance_score(y_true, y_pred)









    Out[34]:





0.836029673733705



In [35]:

    
r2 = 1.0 - mse / np.var(y_true)
r2









    Out[35]:





0.8358169419264746



In [36]:

    
metrics.r2_score(y_true, y_pred)









    Out[36]:





0.8358169419264746



In [37]:

    
metrics.r2_score(y_true, np.mean(y_true) * np.ones_like(y_true))









    Out[37]:





0.0



In [ ]: