In [1]:
%%timeit
4+5
The loss function:
In [3]:
import numpy as np
def log_loss(actual, predicted):
"""
accepts two numpy arrays
arrays should be same length, and one dimension
ravel if not 1-d
"""
actual = np.ravel(actual)
predicted = np.ravel(predicted)
assert len(actual) == len(predicted), 'Error, size of act is not the same as size of pred'
return -(1/len(actual))*np.sum((1-act)*np.log(1-predicted) + act*np.log(predicted))
In [38]:
def finite_log_loss(actual, predicted, epsilon=1e-15):
"""
accepts two numpy arrays
arrays should be same length, and one dimension
ravel if not 1-d
threshold to avoid infinite loss
"""
actual = np.ravel(actual)
predicted = np.ravel(predicted)
predicted = np.maximum(predicted, epsilon) # pad below
predicted = np.minimum(1-epsilon, predicted) # pad above
assert len(actual) == len(predicted), 'Error, size of act is not the same as size of pred'
return -(1/len(actual))*np.sum((1-act)*np.log(1-predicted) + act*np.log(predicted))
In [4]:
import matplotlib.pyplot as plt
from scipy.stats import beta
%matplotlib inline
plt.style.use('bmh')
x = np.linspace(0, 1, 100)
ab = [(0.4, 0.6), (1, 0.3), (2.3, 2)]
ax = ['ax1', 'ax2', 'ax3']
fig, ax = plt.subplots(1, 3, sharey=True, figsize=(16, 8))
for i in range(len(ax)):
ax[i].fill_between(x, 0, beta(a = ab[i][0], b = ab[i][1]).pdf(x))
In [5]:
from scipy.stats import bernoulli
act = bernoulli(p=0.4).rvs(size=10000, random_state=0)
pred = beta(0.4, 0.6).rvs(size=10000, random_state=0)
In [6]:
log_loss(act, pred)
Out[6]:
In [ ]: