Author(s): kozyr@google.com
Before we get started on data, we have to choose our project performance metric and decide the statistical testing criteria. We'll make use of the metric code we write here when we get to Step 6 (Training) and we'll use the criteria in Step 9 (Testing).
In [0]:
# Required libraries:
import numpy as np
import pandas as pd
import seaborn as sns
In [0]:
# Accuracy metric:
def get_accuracy(truth, predictions, threshold=0.5, roundoff=2):
"""
Args:
truth: can be Boolean (False, True), int (0, 1), or float (0, 1)
predictions: number between 0 and 1, inclusive
threshold: we convert predictions to 1s if they're above this value
roundoff: report accuracy to how many decimal places?
Returns:
accuracy: number correct divided by total predictions
"""
truth = np.array(truth) == (1|True)
predicted = np.array(predictions) >= threshold
matches = sum(predicted == truth)
accuracy = float(matches) / len(truth)
return round(accuracy, roundoff)
In [0]:
# Try it out:
acc = get_accuracy(truth=[0, False, 1], predictions=[0.2, 0.7, 0.6])
print 'Accuracy is ' + str(acc) + '.'
In [0]:
def get_loss(predictions, truth):
# Our methods will be using cross-entropy loss.
return -np.mean(truth * np.log(predictions) + (1 - truth) * np.log(1 - predictions))
In [0]:
# Simulate some situations:
loss = []
acc = []
for i in range(1000):
for n in [10, 100, 1000]:
p = np.random.uniform(0.01, 0.99, (1, 1))
y = np.random.binomial(1, p, (n, 1))
x = np.random.uniform(0.01, 0.99, (n, 1))
acc = np.append(acc, get_accuracy(truth=y, predictions=x, roundoff=6))
loss = np.append(loss, get_loss(predictions=x, truth=y))
df = pd.DataFrame({'accuracy': acc, 'cross-entropy': loss})
In [0]:
# Visualize with Seaborn
import seaborn as sns
%matplotlib inline
sns.regplot(x="accuracy", y="cross-entropy", data=df)
In [0]:
# Testing setup:
SIGNIFICANCE_LEVEL = 0.05
TARGET_ACCURACY = 0.80
# Hypothesis test we'll use:
from statsmodels.stats.proportion import proportions_ztest
In [0]:
# Using standard notation for a one-sided test of one population proportion:
n = 100 # Example number of predictions
x = 95 # Example number of correct predictions
p_value = proportions_ztest(count=x, nobs=n, value=TARGET_ACCURACY, alternative='larger')[1]
if p_value < SIGNIFICANCE_LEVEL:
print 'Congratulations! Your model is good enough to build. It passes testing. Awesome!'
else:
print 'Too bad. Better luck next project. To try again, you need a pristine test dataset.'