In [1]:
%%timeit
4+5


10000000 loops, best of 3: 24.6 ns per loop

Part 1

The loss function:


In [3]:
import numpy as np

def log_loss(actual, predicted):
    """
    accepts two numpy arrays
    arrays should be same length, and one dimension
    ravel if not 1-d
    """
    actual = np.ravel(actual)
    predicted = np.ravel(predicted)
    assert len(actual) == len(predicted), 'Error, size of act is not the same as size of pred'
    return -(1/len(actual))*np.sum((1-act)*np.log(1-predicted) + act*np.log(predicted))

In [38]:
def finite_log_loss(actual, predicted, epsilon=1e-15):
    """
    accepts two numpy arrays
    arrays should be same length, and one dimension
    ravel if not 1-d
    threshold to avoid infinite loss
    """
    actual = np.ravel(actual)
    predicted = np.ravel(predicted)
    predicted = np.maximum(predicted, epsilon)  # pad below
    predicted = np.minimum(1-epsilon, predicted)  # pad above
    assert len(actual) == len(predicted), 'Error, size of act is not the same as size of pred'
    return -(1/len(actual))*np.sum((1-act)*np.log(1-predicted) + act*np.log(predicted))

Part 2


In [4]:
import matplotlib.pyplot as plt
from scipy.stats import beta
%matplotlib inline
plt.style.use('bmh')


x = np.linspace(0, 1, 100)
ab = [(0.4, 0.6), (1, 0.3), (2.3, 2)]
ax = ['ax1', 'ax2', 'ax3']

fig, ax = plt.subplots(1, 3, sharey=True, figsize=(16, 8))
for i in range(len(ax)):
    ax[i].fill_between(x, 0, beta(a = ab[i][0], b = ab[i][1]).pdf(x))


Part 3


In [5]:
from scipy.stats import bernoulli
act = bernoulli(p=0.4).rvs(size=10000, random_state=0)
pred = beta(0.4, 0.6).rvs(size=10000, random_state=0)

In [6]:
log_loss(act, pred)


Out[6]:
1.4248157210179457

In [ ]: