notebook.community

Edit and run



In [1]:

    
import matplotlib.pyplot as plt
import tmetrics
import numpy as np
import pandas as pd
import theano
import theano.tensor as T
%matplotlib inline
%config InlineBackend.print_figure_kwargs={'bbox_inches':None}









    



Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled)



In [2]:

    
def plot_loss_function(fn, title='', **kwargs):
    yt = T.ivector('y_true')
    yp = T.fvector('y_predicted')
    true = np.ones((1000,)).astype('int32')
    false = np.zeros((1000,)).astype('int32')
    predicted = np.linspace(1e-6, 1-1e-6, 1000).astype('float32')
    out = fn(yt, yp, **kwargs)
    f = theano.function([yt, yp], out)
    true_losses = f(true, predicted)
    false_losses = f(false, predicted)
    plt.figure(figsize=(12, 8))
    plt.plot(predicted, true_losses, label='ground truth == 1')
    plt.plot(predicted, false_losses, label='ground truth == 0')
    plt.legend(loc='best')
    plt.xlabel('predicted probability')
    plt.ylabel('loss')
    plt.title(title)
    plt.xlim(-.05, abs(predicted.max() * 1.05))
    max_loss = max(true_losses.max(), false_losses.max())
    plt.ylim(max_loss - (max_loss * 1.05), max_loss * 1.05)
    plt.grid(True)



In [4]:

    
plot_loss_function(tmetrics.classification.binary_crossentropy, 'binary crossentropy')



In [5]:

    
plot_loss_function(tmetrics.classification.binary_hinge_loss, 'binary hinge loss', **{'delta': 1})



In [3]:

    
plot_loss_function(tmetrics.classification.squared_error, 'squared error')



In [85]:

    
plot_loss_function(cubed_error, 'cubed error')



In [87]:

    
plot_loss_function(quadratic_error, 'quadratic error')



In [91]:

    
plot_loss_function(pow7_error, 'pow7 error')



In [86]:

    
quadratic_error = lambda n, m: abs(n-m) ** 4



In [90]:

    
pow7_error = lambda n, m: abs(n-m) ** 7



In [22]:









    Out[22]:





array([ 0.0001 ,  0.25005,  0.5    ,  0.74995,  0.9999 ])



In [81]:

    
n = T.ivector('n')
m = T.ivector('m')
cubed_error = (n-m) ** 3



In [84]:

    
cubed_error = lambda n, m: abs(n-m) ** 3



In [94]:









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-94-109757818a25> in <module>()
----> 1 p = np.concatenate([np.random.binomial(n=1, p=abs(b-x), size=(1, b.shape[0])) for x in np.linspace(.001, .999, 10)], axis=0)

mtrand.pyx in mtrand.RandomState.binomial (numpy/random/mtrand/mtrand.c:20864)()

mtrand.pyx in mtrand.discnp_array (numpy/random/mtrand/mtrand.c:4762)()

ValueError: shape mismatch: objects cannot be broadcast to a single shape



In [66]:

    
np.random.



In [87]:

    
from scipy.spatial.distance import hamming



In [88]:

    
b.shape









    Out[88]:





(430,)



In [97]:

    
b = np.concatenate([b.reshape(1, b.shape[0]) for _ in xrange(10)])
b.shape









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-97-c9af7d8292ae> in <module>()
----> 1 b = np.concatenate([b.reshape(1, b.shape[0]) for _ in xrange(10)])
      2 b.shape

ValueError: total size of new array must be unchanged



In [93]:

    
hamming?



In [103]:

    
hamming(b, p)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-103-bf2cbfa0ad11> in <module>()
----> 1 hamming(b, p)

/home/jstrong/src/envs/neural/local/lib/python2.7/site-packages/scipy/spatial/distance.pyc in hamming(u, v)
    365 
    366     """
--> 367     u = _validate_vector(u)
    368     v = _validate_vector(v)
    369     if u.shape != v.shape:

/home/jstrong/src/envs/neural/local/lib/python2.7/site-packages/scipy/spatial/distance.pyc in _validate_vector(u, dtype)
    123     u = np.atleast_1d(u)
    124     if u.ndim > 1:
--> 125         raise ValueError("Input vector should be 1-D.")
    126     return u
    127 

ValueError: Input vector should be 1-D.



In [109]:

    
b = np.random.binomial(n=1, p=.5, size=430)
p = np.concatenate([np.random.binomial(n=1, p=abs(b-x), size=(1, b.shape[0])) for x in np.linspace(.001, .999, 1000)], axis=0)
b = np.concatenate([b.reshape(1, b.shape[0]) for _ in xrange(1000)])
p.shape, b.shape









    Out[109]:





((1000, 430), (1000, 430))



In [112]:

    
plt.plot(np.linspace(.001, .999, 1000),(b != p).mean(axis=-1))









    Out[112]:





[<matplotlib.lines.Line2D at 0x7f04fa06fa50>]



In [122]:

    
jaccard = ((b != p) & ((p != 0) | (b != 0))).sum(axis=1) / np.double(((p != 0) | (b != 0)).sum(axis=1))



In [123]:

    
plt.plot(jaccard)









    Out[123]:





[<matplotlib.lines.Line2D at 0x7f04f8b82250>]



In [127]:

    
def jaccard(b, p):
    return ((b != p) & ((p != 0) | (b != 0))).sum(axis=-1) / np.double(((p != 0) | (b != 0)).sum(axis=-1))



In [128]:

    
jaccard(b[0], p[0])









    Out[128]:





0.004830917874396135



In [132]:

    
np.asarray([([1] * x) + ([0] * (1000-x)) for x in range(1000)])









    Out[132]:





array([[0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 1, 0, ..., 0, 0, 0],
       ..., 
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 1, 0, 0],
       [1, 1, 1, ..., 1, 1, 0]])



In [66]:

    
def plot_loss_function_2D(fn, title='', **kwargs):
    #for examples that need a matrix of 0/1 predictions
    n = 1000
    yt = T.fmatrix('y_true')
    yp = T.fmatrix('y_predicted')
    out = fn(yt, yp, **kwargs)
    f = theano.function([yt, yp], out, allow_input_downcast=True)
    true = np.asarray([np.random.binomial(n=1, p=x, size=n) for x in np.linspace(1e-6, 1-1e-6, n)])
    false = np.asarray([np.random.binomial(n=1, p=x, size=n) for x in np.linspace(1-1e-6, 1e-6, n)])
    
    predicted = np.asarray([np.random.binomial(n=1, p=x, size=n) for x in np.linspace(1e-6, 1-1e-6, n)])
    #true = np.ones((n,n)).astype('float32')
    #false = np.zeros((n, n)).astype('float32')
    #predicted = np.asarray([([0] * x) + ([1] * (n-x)) for x in range(n)])
    
    
    #import pdb; pdb.set_trace()
    true_losses = f(true, predicted)
    false_losses = f(false, predicted)
    plt.figure(figsize=(12, 8))
    plt.scatter(predicted.mean(axis=1), true_losses, alpha=.2)
    plt.scatter(predicted.mean(axis=1), false_losses, alpha=.2)
    #plt.plot([1-x/float(n) for x in range(n)], false_losses, label='ground truth == 0')
    #plt.legend(loc='best')
    plt.xlabel('(predictions == 1).mean(axis=1)')
    plt.ylabel('loss')
    plt.title(title)
    plt.xlim(-.05, abs(predicted.max() * 1.05))
    #max_loss = true_losses.max()
    #plt.ylim(max_loss - (max_loss * 1.05), max_loss * 1.05)
    plt.grid(True)



In [77]:

    
def plot_loss_function_2D(fn, title='', **kwargs):
    #for examples that need a matrix of 0/1 predictions
    n = 10000
    yt = T.fmatrix('y_true')
    yp = T.fmatrix('y_predicted')
    out = fn(yt, yp, **kwargs)
    f = theano.function([yt, yp], out, allow_input_downcast=True)
    true = np.asarray([np.random.binomial(n=1, p=.99, size=n) for x in np.linspace(1e-6, 1-1e-6, n)])
    #true = np.ones((n, n))
    #np.zeros((n, n))
    false = np.asarray([np.random.binomial(n=1, p=.01, size=n) for x in np.linspace(1-1e-6, 1e-6, n)])
    
    predicted = np.asarray([np.random.binomial(n=1, p=x, size=n) for x in np.linspace(1e-6, 1-1e-6, n)])
    #true = np.ones((n,n)).astype('float32')
    #false = np.zeros((n, n)).astype('float32')
    #predicted = np.asarray([([0] * x) + ([1] * (n-x)) for x in range(n)])
    
    
    #import pdb; pdb.set_trace()
    true_losses = f(true, predicted)
    false_losses = f(false, predicted)
    plt.figure(figsize=(12, 8))
    plt.scatter(predicted.mean(axis=1), true_losses, alpha=.2)
    plt.scatter(predicted.mean(axis=1), false_losses, alpha=.2)
    #plt.plot([1-x/float(n) for x in range(n)], false_losses, label='ground truth == 0')
    #plt.legend(loc='best')
    plt.xlabel('(predictions == 1).mean(axis=1)')
    plt.ylabel('loss')
    plt.title(title)
    plt.xlim(-.05, abs(predicted.max() * 1.05))
    #max_loss = true_losses.max()
    #plt.ylim(max_loss - (max_loss * 1.05), max_loss * 1.05)
    plt.grid(True)



In [78]:

    
plot_loss_function_2D(tmetrics.classification.hamming_loss, 'hamming_loss')



In [79]:

    
plot_loss_function_2D(tmetrics.classification.jaccard_similarity, 'jaccard')



In [80]:

    
plot_loss_function_2D(tmetrics.classification.kulsinski_similarity, 'kulsinski')



In [1]:

    
from scipy.spatial.distance import jaccard, kulsinski
n=1000
true = np.ones((n,n)).astype('float32')
false = np.zeros((n, n)).astype('float32')
predicted = np.asarray([([0] * x) + ([1] * (n-x)) for x in range(n)])
plt.plot([kulsinski(true[i].astype(np.double), np.random.binomial(n=1, p=p, size=n).astype(np.double)) for i, p in enumerate(np.linspace(.00001, .99999, n))])









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-33566cf1d3b2> in <module>()
      1 from scipy.spatial.distance import jaccard, kulsinski
      2 n=1000
----> 3 true = np.ones((n,n)).astype('float32')
      4 false = np.zeros((n, n)).astype('float32')
      5 predicted = np.asarray([([0] * x) + ([1] * (n-x)) for x in range(n)])

NameError: name 'np' is not defined



In [ ]:

    
jaccard(np.random.random(10), np.random.binomial(n=1, p=.5, size=10))



In [5]:

    
def plot_python_probabalistic_loss(fn, title='', upper_bound=.9, lower_bound=.1, n_iter=50, **kwargs):
    #for examples that need a matrix of 0/1 predictions
    n = 1000
    y = np.random.binomial(n=1, p=.5, size=(n, n))
    plt.figure(figsize=(12, 8))
    diffs = []
    losses = []
    for diff in np.linspace(lower_bound, upper_bound, 50):
        p = abs(y-diff)
        p = normal_probabilities(p)
        
        loss = fn(y, p, **kwargs)
        diffs.append(diff)
        losses.append(loss)
        
    
    plt.plot(diffs, losses)
    #plt.plot([1-x/float(n) for x in range(n)], false_losses, label='ground truth == 0')
    #plt.legend(loc='best')
    plt.xlabel('x for abs(y_true-x)')
    plt.ylabel('loss')
    plt.title(title)
    #plt.xlim(-.05, abs(predicted.max() * 1.05))
    #max_loss = true_losses.max()
    #plt.ylim(max_loss - (max_loss * 1.05), max_loss * 1.05)
    plt.grid(True)



In [3]:

    
from scipy.spatial.distance import *



In [8]:

    
plot_python_probabalistic_loss(canberra, 'canberra')



In [10]:

    
plot_python_probabalistic_loss(chebyshev, 'chebyshev')



In [11]:

    
plot_python_probabalistic_loss(cityblock, 'cityblock')



In [33]:

    
plot_python_probabalistic_loss(huber, 'huber')









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-33-5dfb64184a64> in <module>()
----> 1 plot_python_probabalistic_loss(huber, 'huber')

<ipython-input-27-db1eb1966553> in plot_python_probabalistic_loss(fn, title, **kwargs)
     15 
     16 
---> 17     plt.plot(diffs, losses)
     18     #plt.plot([1-x/float(n) for x in range(n)], false_losses, label='ground truth == 0')
     19     #plt.legend(loc='best')

/home/jstrong/src/envs/neural/local/lib/python2.7/site-packages/matplotlib/pyplot.pyc in plot(*args, **kwargs)
   3097         ax.hold(hold)
   3098     try:
-> 3099         ret = ax.plot(*args, **kwargs)
   3100         draw_if_interactive()
   3101     finally:

/home/jstrong/src/envs/neural/local/lib/python2.7/site-packages/matplotlib/axes/_axes.pyc in plot(self, *args, **kwargs)
   1371         lines = []
   1372 
-> 1373         for line in self._get_lines(*args, **kwargs):
   1374             self.add_line(line)
   1375             lines.append(line)

/home/jstrong/src/envs/neural/local/lib/python2.7/site-packages/matplotlib/axes/_base.pyc in _grab_next_args(self, *args, **kwargs)
    302                 return
    303             if len(remaining) <= 3:
--> 304                 for seg in self._plot_args(remaining, kwargs):
    305                     yield seg
    306                 return

/home/jstrong/src/envs/neural/local/lib/python2.7/site-packages/matplotlib/axes/_base.pyc in _plot_args(self, tup, kwargs)
    280             x = np.arange(y.shape[0], dtype=float)
    281 
--> 282         x, y = self._xy_from_xy(x, y)
    283 
    284         if self.command == 'plot':

/home/jstrong/src/envs/neural/local/lib/python2.7/site-packages/matplotlib/axes/_base.pyc in _xy_from_xy(self, x, y)
    223             raise ValueError("x and y must have same first dimension")
    224         if x.ndim > 2 or y.ndim > 2:
--> 225             raise ValueError("x and y can be no greater than 2-D")
    226 
    227         if x.ndim == 1:

ValueError: x and y can be no greater than 2-D



In [32]:

    
from scipy.special import *
from scipy.spatial.distance import *
huber









    Out[32]:





<ufunc 'huber'>



In [29]:

    
def auc(fpr, tpr):
    return np.trapz(tpr, fpr)

def mean_roc_score(y_true, y_predicted):
    fpr, tpr, _ = tmetrics.classification.last_axis_roc_curve(y_true, y_predicted)
    areas = auc(fpr, tpr)
    return 1-areas.mean()



In [65]:

    
plot_python_probabalistic_loss(mean_roc_score, 'mean roc_score')



In [10]:

    
plot_python_probabalistic_loss(f, 'binary_hinge delta=2')



In [118]:

    
plot_python_probabalistic_loss(lambda y, p: roc_loss(y, p).mean(), 'mean roc, lower bound .3, upper bound .6', lower_bound=.4, upper_bound=.6, n_iter=100)



In [31]:

    
plot_python_probabalistic_loss(lambda x, y: np.mean(f_xentropy(x, y)), 'binary xentropy')



In [10]:

    
a = np.random.random((3, 3))



In [56]:

    
y = np.random.binomial(n=1, p=.5, size=(100, 100))
p = (y + np.random.binomial(n=1, p=.5, size=y.shape)).astype('float32') / 2
y.shape, p.shape









    Out[56]:





((100, 100), (100, 100))



In [16]:

    
fpr, tpr, _ = tmetrics.classification.last_axis_roc_curve(y, p)
auc(fpr, tpr)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-27721df2ae11> in <module>()
----> 1 fpr, tpr, _ = tmetrics.classification.last_axis_roc_curve(y, p)
      2 auc(fpr, tpr)

NameError: name 'y' is not defined



In [108]:

    
plot_python_probabalistic_loss(xentropy_roc_loss, 'xentropy roc loss')



In [114]:

    
plot_python_probabalistic_loss(f_sqe, 'squared_error')



In [32]:

    
plot_python_probabalistic_loss(lambda y, p: roc_loss(y, p).mean(), 'mean roc', lower_bound=.3, upper_bound=.6)



In [21]:

    
plot_python_probabalistic_loss(precision_recall, 'precision_recall', lower_bound=.1, upper_bound=.9)



In [81]:

    
import lasagne
yt = T.imatrix('yt')
yp = T.fmatrix('yp')
binary_hinge = (tmetrics.classification.binary_hinge_loss(yt, yp, delta=0)).mean()
m_hinge = (tmetrics.classification.multiclass_hinge_loss(yt, yp)).mean()
f_hinge = theano.function([yt, yp], m_hinge, allow_input_downcast=True)



In [13]:

    
yt = T.imatrix('yt')
yp = T.fmatrix('yp')
pre, rec, _ = tmetrics.classification.precision_recall_curves(yt, yp)
pr_scores = tmetrics.classification.auc(rec, pre)
pr_mean_score = 1-pr_scores.mean()
precision_recall = theano.function([yt, yp], pr_mean_score, allow_input_downcast=True)



In [23]:

    
binary_xentropy = tmetrics.classification.binary_crossentropy(yt, yp).mean(axis=1)
f_xentropy = theano.function([yt, yp], binary_xentropy, allow_input_downcast=True)



In [113]:

    
squared_error = tmetrics.classification.squared_error(yt, yp)
f_sqe = theano.function([yt, yp], squared_error.mean(), allow_input_downcast=True)



In [15]:

    
#fpr, tpr, _ = tmetrics.classification.last_axis_roc_curve(yt, yp)
#mean_roc = tmetrics.classification.auc(fpr, tpr).mean()
f_roc = theano.function([yt, yp], mean_roc, allow_input_downcast=True)









    



---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-15-db70b4432cb5> in <module>()
----> 1 fpr, tpr, _ = tmetrics.classification.last_axis_roc_curve(yt, yp)
      2 mean_roc = tmetrics.classification.auc(fpr, tpr).mean()
      3 f_roc = theano.function([yt, yp], mean_roc, allow_input_downcast=True)

/home/jstrong/src/tmetrics/tmetrics/classification.py in last_axis_roc_curve(y_true, y_predicted)
    278 
    279 def last_axis_roc_curve(y_true, y_predicted):
--> 280     fps, tps, thresholds = _last_axis_binary_clf_curve(y_true, y_predicted)
    281     i = [slice(None)] * fps.ndim
    282     i[-1] = -1

/home/jstrong/src/tmetrics/tmetrics/classification.py in _last_axis_binary_clf_curve(y_true, y_predicted)
    260 
    261     """
--> 262     assert y_true.shape == y_predicted.shape
    263     axis = -1
    264     sort_idx = list(np.ogrid[[slice(x) for x in y_predicted.shape]])

AssertionError:



In [48]:

    
y.shape, p.shape









    Out[48]:





((100, 100), (100, 100))



In [61]:

    
np.nanmean(f(y.astype('float32'), p.astype('float32')))









    Out[61]:





0.69314718246459961



In [54]:

    
y









    Out[54]:





array([[0, 1, 0, ..., 1, 1, 1],
       [0, 0, 1, ..., 1, 1, 0],
       [1, 1, 0, ..., 1, 1, 0],
       ..., 
       [1, 1, 1, ..., 0, 1, 1],
       [1, 0, 1, ..., 0, 1, 1],
       [0, 0, 0, ..., 1, 0, 1]])



In [70]:

    
p









    Out[70]:





array([[ 0.5,  0. ,  0.5, ...,  0. ,  0.5,  0.5],
       [ 0.5,  0.5,  1. , ...,  0.5,  0.5,  1. ],
       [ 1. ,  0.5,  0.5, ...,  0. ,  0. ,  0.5],
       ..., 
       [ 1. ,  0. ,  0. , ...,  0.5,  0.5,  0.5],
       [ 0.5,  0. ,  0.5, ...,  0.5,  0. ,  0.5],
       [ 0. ,  0.5,  0. , ...,  1. ,  0. ,  0. ]], dtype=float32)



In [63]:

    
nanok = lambda t, p: np.nanmean(f(yt, yp))



In [79]:

    
from scipy.special import expit as sigmoid, logit

def random_between_zero_and_one(p=.5, scale=.1):
    a = np.random.normal(p, scale)
    return sigmoid(a)



In [17]:

    
def mean_roc(y_true, y_predicted):
    fpr, tpr, _ = tmetrics.classification.last_axis_roc_curve(y_true, y_predicted)
    return np.trapz(tpr, fpr)

def roc_loss(y_true, y_predicted):
    return 1-mean_roc(y_true, y_predicted)

def scale(a):
    return (a-a.min())/(a.max()-a.min())

def xentropy_roc_loss(y_true, y_predicted):
    losses =np.concatenate([roc_loss(y_true, y_predicted)[:, np.newaxis], scale(f_xentropy(y_true, y_predicted))[:, np.newaxis]], axis=1)
    return losses.mean(axis=1).mean()



In [75]:

    
np.linspace(.001, .999, 9).reshape((3,3))









    Out[75]:





array([[ 0.001  ,  0.12575,  0.2505 ],
       [ 0.37525,  0.5    ,  0.62475],
       [ 0.7495 ,  0.87425,  0.999  ]])



In [7]:

    
def normal_probabilities(p):
    tol = 1e-6
    return np.minimum(np.maximum(np.random.normal(p, .1), 0 + tol), 1-tol).astype('float32')



In [84]:

    
np.set_printoptions(suppress=True)



In [85]:

    
n=100
y = np.random.binomial(n=1, p=.5, size=(n, n))
plt.figure(figsize=(12, 8))
diffs = []
losses = []
for diff in np.linspace(1e-6, 1-1e-6, n)[:1]:
    p = abs(y-diff)
    p = normal_probabilities(p)
    plt.scatter(y, p)









    



/home/jstrong/src/envs/neural/local/lib/python2.7/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):



In [91]:

    
((p < .001) | (p > .999)).sum() / float(p.size)









    Out[91]:





0.50460000000000005



In [87]:

    
p.shape









    Out[87]:





(100, 100)



In [88]:

    
100 * 100









    Out[88]:





10000



In [96]:

    
(np.random.random((5, 5))+ np.random.random((5,5))) / 2









    Out[96]:





array([[ 0.58962013,  0.77736516,  0.38358553,  0.35601016,  0.35696999],
       [ 0.82816403,  0.17186857,  0.32013101,  0.67102438,  0.44986955],
       [ 0.58745217,  0.09031283,  0.20464858,  0.68982841,  0.45587067],
       [ 0.51627868,  0.39785086,  0.9652497 ,  0.11217689,  0.9925036 ],
       [ 0.5303759 ,  0.41586029,  0.56415442,  0.46074264,  0.2029958 ]])



In [ ]: