Testing Notebook

This notebook is for visual tests of the software that is pretty informal. Some of the tests in this notebook have been solidified in their respective python scripts to be run by pytest.



In [3]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_context('paper',font_scale=1.4)

Utilities



In [1]:

    
import sys
sys.path.insert(0, '/Users/nickc/Documents/skedm')



In [70]:

    
import skedm.utilities as ut

weighted_mean(indices, distances, ytrain )



In [71]:

    
distances = np.array([ [.1,.2,.3,.4],
                 [.1,.2,.3,.4]])
X = np.array([ [1,2,3,4],
                 [1,2,3,4],
                 ])

#by hand
arr = np.array([.1, .2, .3, .4])
num = np.array([1/.1, 1/.2, 1/.3, 1/.4])
denom = np.sum(num)
W = num/denom

mean = np.sum(W*np.array([1,2,3,4]))

gg = ut.weighted_mean(X,distances)

np.testing.assert_array_almost_equal(np.array([mean,mean]),gg,decimal=4)

mi_digitize(X)



In [15]:

    
x = np.around(np.linspace(0,5,10),1)
x









    Out[15]:





array([ 0. ,  0.6,  1.1,  1.7,  2.2,  2.8,  3.3,  3.9,  4.4,  5. ])



In [13]:

    
ut.mi_digitize(x)









    Out[13]:





array([1, 1, 1, 2, 2, 3, 3, 4, 4, 4])

quick_mode_axis1(X)



In [24]:

    
X = np.array([[2, 1, 3, 1, 3, 3],
       [2, 0, 3, 1, 2, 2],
       [3, 1, 1, 0, 2, 2],
       [3, 1, 2, 3, 1, 1]],dtype=int)



In [25]:

    
ut.quick_mode_axis1(X)









    Out[25]:





array([ 3.,  2.,  1.,  1.])

keep_diversity(X)



In [26]:

    
X = np.array([[2, 1, 3, 1, 3, 3],
            [2, 2, 2, 2, 2, 2],
            [3, 1, 1, 0, 2, 2],
            [1, 1, 1, 1, 1, 1]],dtype=int)

ut.keep_diversity(X)









    Out[26]:





array([ True, False,  True, False], dtype=bool)

weighted_mode(a, w)



In [27]:

    
x = [4, 1, 4, 2, 4, 2]
weights = [1, 3, 0.5, 1.5, 1, 2]



In [28]:

    
ut.weighted_mode(x, weights)









    Out[28]:





(array([ 2.]), array([ 3.5]))



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

edm



In [35]:

    
import skedm as edm



In [37]:

    
X = np.array([
         [ 0.3,  0.6],
         [ 0.2,  1.4],
         [ 1.2,  0.2]])



In [38]:

    
y = X.sum(axis=1,keepdims=True)



In [39]:

    
print(X)
print(y)









    



[[ 0.3  0.6]
 [ 0.2  1.4]
 [ 1.2  0.2]]
[[ 0.9]
 [ 1.6]
 [ 1.4]]

Regression Uniform weighting



In [40]:

    
R = edm.Regression()



In [41]:

    
R.fit(X,y)

dist_calc(X)



In [42]:

    
R.dist_calc(X)



In [43]:

    
#X[1] to X[2] and X[3]
d12 = np.sqrt( (0.3 - 0.2)**2 + (0.6 - 1.4)**2 )
d13 = np.sqrt( (0.3 - 1.2)**2 + (0.6 - 0.2)**2 )

#X[2] to X[1] and X[3]
d21 = np.sqrt( (0.2 - 0.3)**2 + (1.4 - 0.6)**2 )
d23 = np.sqrt( (0.2 - 1.2)**2 + (1.4 - 0.2)**2 )

#X[2] to X[1] and X[3]
d21 = np.sqrt( (0.2 - 0.3)**2 + (1.4 - 0.6)**2 )
d23 = np.sqrt( (0.2 - 1.2)**2 + (1.4 - 0.2)**2 )

#X[3] to X[1] and X[2]
d31 = np.sqrt( (1.2 - 0.3)**2 + (0.2 - 0.6)**2 )
d32= np.sqrt( (1.2 - 0.2)**2 + (0.2 - 1.4)**2 )


print('X1:',d12, d13)
print('X2:',d21, d23)
print('X3:',d31, d32)









    



X1: 0.80622577483 0.98488578018
X2: 0.80622577483 1.56204993518
X3: 0.98488578018 1.56204993518



In [44]:

    
R.dist









    Out[44]:





array([[ 0.        ,  0.80622577,  0.98488578],
       [ 0.        ,  0.80622577,  1.56204994],
       [ 0.        ,  0.98488578,  1.56204994]])



In [45]:

    
R.ind









    Out[45]:





array([[0, 1, 2],
       [1, 0, 2],
       [2, 0, 1]])

predict(Xtest,nn_list)



In [51]:

    
X









    Out[51]:





array([[ 0.3,  0.6],
       [ 0.2,  1.4],
       [ 1.2,  0.2]])



In [46]:

    
y









    Out[46]:





array([[ 0.9],
       [ 1.6],
       [ 1.4]])



In [47]:

    
p = R.predict(X,[1,2,3])

p[0] will be itself



In [49]:

    
p[0]









    Out[49]:





array([[ 0.9],
       [ 1.6],
       [ 1.4]])

p[1] will be an average between itself and its second neighbor



In [52]:

    
p[1]









    Out[52]:





array([[ 1.25],
       [ 1.25],
       [ 1.15]])



In [55]:

    
#first sample
p1_test = np.empty((3,1))
p1_test[0] = (0.9 + 1.6)/2
p1_test[1] = (1.6 + 0.9)/2
p1_test[2] = (1.4 + 0.9)/2
p1_test









    Out[55]:





array([[ 1.25],
       [ 1.25],
       [ 1.15]])

p[2] will be an average of all



In [57]:

    
p[2]









    Out[57]:





array([[ 1.3],
       [ 1.3],
       [ 1.3]])



In [61]:

    
np.array([np.mean(y)]*3).reshape(3,1)









    Out[61]:





array([[ 1.3],
       [ 1.3],
       [ 1.3]])

Regression with distance weighting



In [115]:

    
Xtr = np.array([
         [ 0.3,  0.6],
         [ 0.2,  1.4],
         [ 1.2,  0.2]])
Xte = np.array([
         [ 0.7,  1.6],
         [ 1.3,  0.4],
         ])

ytr = Xtr.sum(axis=1,keepdims=True)
yte = Xte.sum(axis=1,keepdims=True)



In [143]:

    
yte









    Out[143]:





array([[ 2.3],
       [ 1.7]])



In [122]:

    
R = edm.Regression(weights='distance')



In [123]:

    
R.fit(Xtr,ytr)



In [124]:

    
p = R.predict(Xte,[1,2,3])



In [125]:

    
print(R.ind)
print(R.dist)









    



[[1 0 2]
 [2 0 1]]
[[ 0.53851648  1.07703296  1.48660687]
 [ 0.2236068   1.0198039   1.48660687]]



In [126]:

    
print('ytr',ytr)
print('yte',yte)

print('Xtr',Xtr)
print('Xte',Xte)









    



ytr [[ 0.9]
 [ 1.6]
 [ 1.4]]
yte [[ 2.3]
 [ 1.7]]
Xtr [[ 0.3  0.6]
 [ 0.2  1.4]
 [ 1.2  0.2]]
Xte [[ 0.7  1.6]
 [ 1.3  0.4]]

p[0] should return itself



In [127]:

    
p[0]









    Out[127]:





array([[ 1.6],
       [ 1.4]])

p[1] should return a weighted average of the first two



In [129]:

    
R.dist









    Out[129]:





array([[ 0.53851648,  1.07703296,  1.48660687],
       [ 0.2236068 ,  1.0198039 ,  1.48660687]])



In [130]:

    
R.ind









    Out[130]:





array([[1, 0, 2],
       [2, 0, 1]])



In [132]:

    
ytr









    Out[132]:





array([[ 0.9],
       [ 1.6],
       [ 1.4]])



In [137]:

    
W = 1/R.dist[:,0:2]
W/= np.sum(W,axis=1,keepdims=True)



In [138]:

    
W









    Out[138]:





array([[ 0.66666667,  0.33333333],
       [ 0.82016658,  0.17983342]])



In [146]:

    
np.sum(W*ytr[R.ind[:,0:2],0],axis=1).reshape(-1,1)









    Out[146]:





array([[ 1.36666667],
       [ 1.31008329]])



In [128]:

    
p[1]









    Out[128]:





array([[ 1.36666522],
       [ 1.31008072]])

p[2] should return a weighted average of them all



In [216]:

    
p[2]









    Out[216]:





array([[ 3.],
       [ 3.]])

Test uniform classification



In [230]:

    
Xtr = np.array([
         [ 3, 6],
         [ 2, 1],
         [ 1, 3]])
Xte = np.array([
         [ 3,  5],
         [ 2,  2],
         ])
ytr = np.array([[9],
               [3],
               [4]])



In [231]:

    
R = edm.Classification()



In [232]:

    
R.fit(Xtr,ytr)



In [233]:

    
p = R.predict(Xte,[1,2,3])



In [234]:

    
R.dist









    Out[234]:





array([[ 0.5,  1. ,  1. ],
       [ 0.5,  1. ,  1. ]])



In [235]:

    
R.ind









    Out[235]:





array([[0, 1, 2],
       [1, 0, 2]])

p[0] should just be the nearest neighbor



In [236]:

    
np.array([[9.],
         [3.]])
p[0]









    Out[236]:





array([[ 9.],
       [ 3.]])

p[1] should be the mode of the two, but since there is only two, it should actually still be the same thing as the first one.



In [237]:

    
p[1]









    Out[237]:





array([[ 9.],
       [ 3.]])

p[2] should be the mode of the three of them. They are all different, so it will just be the first one.



In [238]:

    
p[2]









    Out[238]:





array([[ 9.],
       [ 3.]])

Test distance classification



In [287]:

    
Xtr = np.array([
         [ 3, 5],
         [ 2, 1],
         [ 1, 3]])
Xte = np.array([
         [ 3,  5],
         [ 2,  2],
         ])
ytr = np.array([[9],
               [3],
               [9]])



In [288]:

    
R = edm.Classification(weights='distance')



In [289]:

    
R.fit(Xtr,ytr)



In [290]:

    
p = R.predict(Xte,[1,2,3])



In [291]:

    
R.dist









    Out[291]:





array([[ 0. ,  1. ,  1. ],
       [ 0.5,  1. ,  1. ]])



In [292]:

    
R.ind









    Out[292]:





array([[0, 1, 2],
       [1, 0, 2]])



In [301]:

    
W = 1/(R.dist+.00001)
W/np.sum(W,axis=1,keepdims=True)
#W









    Out[301]:





array([[  9.99980001e-01,   9.99970001e-06,   9.99970001e-06],
       [  4.99997500e-01,   2.50001250e-01,   2.50001250e-01]])



In [302]:

    
W









    Out[302]:





array([[  1.00000000e+05,   9.99990000e-01,   9.99990000e-01],
       [  1.99996000e+00,   9.99990000e-01,   9.99990000e-01]])



In [303]:

    
W[1,1] + W[1,2]









    Out[303]:





1.9999800001999979



In [305]:

    
W[1,0]









    Out[305]:





1.9999600007999843



In [294]:

    
np.array([[9.],
         [3.]])
p[0]









    Out[294]:





array([[ 9.],
       [ 3.]])



In [295]:

    
p[1]









    Out[295]:





array([[ 9.],
       [ 3.]])



In [296]:

    
p[2]









    Out[296]:





array([[ 9.],
       [ 9.]])



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [306]:

    
ut.quick_mode_axis1(x).shape









    Out[306]:





(2,)



In [184]:

    
np.mean(stats.mode(x,axis=1)[0].ravel() == ut.quick_mode_axis1(x))









    Out[184]:





1.0



In [189]:

    
stats.mode([5,5,5,7,7,4,4,4])









    Out[189]:





ModeResult(mode=array([4]), count=array([3]))



In [188]:

    
ut.quick_mode_axis1(np.array([5,5,5,7,7,4,4,4]).reshape(1,-1))









    Out[188]:





array([ 4.])



In [211]:

    
x = np.array([[5,5,7,7,7,4,4,4],
             [4,4,3,3,3,5,5,5]])



In [212]:

    
sl = x[1,:]
sl









    Out[212]:





array([4, 4, 3, 3, 3, 5, 5, 5])



In [213]:

    
loc = np.bincount(sl)[sl].argmax()
loc









    Out[213]:





2



In [214]:

    
sl[loc]









    Out[214]:





3



In [229]:

    
ut.quick_mode_axis1_keep_nearest_neigh(x)









    Out[229]:





array([ 7.,  3.])



In [215]:

    
X = X.astype(int)
len_x = len(X)
mode = np.zeros(len_x)
for i in range(len_x):
    mode[i] = np.bincount(X[i,:])[X[i,:]].argmax() #reorder before argmax









    



  File "<ipython-input-215-b43e6a109574>", line 6
    return mode
               ^
SyntaxError: 'return' outside function