Testing Notebook

This notebook is for visual tests of the software that is pretty informal. Some of the tests in this notebook have been solidified in their respective python scripts to be run by pytest.


In [3]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_context('paper',font_scale=1.4)

Utilities


In [1]:
import sys
sys.path.insert(0, '/Users/nickc/Documents/skedm')

In [70]:
import skedm.utilities as ut

weighted_mean(indices, distances, ytrain )


In [71]:
distances = np.array([ [.1,.2,.3,.4],
                 [.1,.2,.3,.4]])
X = np.array([ [1,2,3,4],
                 [1,2,3,4],
                 ])

#by hand
arr = np.array([.1, .2, .3, .4])
num = np.array([1/.1, 1/.2, 1/.3, 1/.4])
denom = np.sum(num)
W = num/denom

mean = np.sum(W*np.array([1,2,3,4]))

gg = ut.weighted_mean(X,distances)

np.testing.assert_array_almost_equal(np.array([mean,mean]),gg,decimal=4)

mi_digitize(X)


In [15]:
x = np.around(np.linspace(0,5,10),1)
x


Out[15]:
array([ 0. ,  0.6,  1.1,  1.7,  2.2,  2.8,  3.3,  3.9,  4.4,  5. ])

In [13]:
ut.mi_digitize(x)


Out[13]:
array([1, 1, 1, 2, 2, 3, 3, 4, 4, 4])

quick_mode_axis1(X)


In [24]:
X = np.array([[2, 1, 3, 1, 3, 3],
       [2, 0, 3, 1, 2, 2],
       [3, 1, 1, 0, 2, 2],
       [3, 1, 2, 3, 1, 1]],dtype=int)

In [25]:
ut.quick_mode_axis1(X)


Out[25]:
array([ 3.,  2.,  1.,  1.])

keep_diversity(X)


In [26]:
X = np.array([[2, 1, 3, 1, 3, 3],
            [2, 2, 2, 2, 2, 2],
            [3, 1, 1, 0, 2, 2],
            [1, 1, 1, 1, 1, 1]],dtype=int)

ut.keep_diversity(X)


Out[26]:
array([ True, False,  True, False], dtype=bool)

weighted_mode(a, w)


In [27]:
x = [4, 1, 4, 2, 4, 2]
weights = [1, 3, 0.5, 1.5, 1, 2]

In [28]:
ut.weighted_mode(x, weights)


Out[28]:
(array([ 2.]), array([ 3.5]))

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:

edm


In [35]:
import skedm as edm

In [37]:
X = np.array([
         [ 0.3,  0.6],
         [ 0.2,  1.4],
         [ 1.2,  0.2]])

In [38]:
y = X.sum(axis=1,keepdims=True)

In [39]:
print(X)
print(y)


[[ 0.3  0.6]
 [ 0.2  1.4]
 [ 1.2  0.2]]
[[ 0.9]
 [ 1.6]
 [ 1.4]]

Regression Uniform weighting


In [40]:
R = edm.Regression()

In [41]:
R.fit(X,y)

dist_calc(X)


In [42]:
R.dist_calc(X)

In [43]:
#X[1] to X[2] and X[3]
d12 = np.sqrt( (0.3 - 0.2)**2 + (0.6 - 1.4)**2 )
d13 = np.sqrt( (0.3 - 1.2)**2 + (0.6 - 0.2)**2 )

#X[2] to X[1] and X[3]
d21 = np.sqrt( (0.2 - 0.3)**2 + (1.4 - 0.6)**2 )
d23 = np.sqrt( (0.2 - 1.2)**2 + (1.4 - 0.2)**2 )

#X[2] to X[1] and X[3]
d21 = np.sqrt( (0.2 - 0.3)**2 + (1.4 - 0.6)**2 )
d23 = np.sqrt( (0.2 - 1.2)**2 + (1.4 - 0.2)**2 )

#X[3] to X[1] and X[2]
d31 = np.sqrt( (1.2 - 0.3)**2 + (0.2 - 0.6)**2 )
d32= np.sqrt( (1.2 - 0.2)**2 + (0.2 - 1.4)**2 )


print('X1:',d12, d13)
print('X2:',d21, d23)
print('X3:',d31, d32)


X1: 0.80622577483 0.98488578018
X2: 0.80622577483 1.56204993518
X3: 0.98488578018 1.56204993518

In [44]:
R.dist


Out[44]:
array([[ 0.        ,  0.80622577,  0.98488578],
       [ 0.        ,  0.80622577,  1.56204994],
       [ 0.        ,  0.98488578,  1.56204994]])

In [45]:
R.ind


Out[45]:
array([[0, 1, 2],
       [1, 0, 2],
       [2, 0, 1]])

predict(Xtest,nn_list)


In [51]:
X


Out[51]:
array([[ 0.3,  0.6],
       [ 0.2,  1.4],
       [ 1.2,  0.2]])

In [46]:
y


Out[46]:
array([[ 0.9],
       [ 1.6],
       [ 1.4]])

In [47]:
p = R.predict(X,[1,2,3])

p[0] will be itself


In [49]:
p[0]


Out[49]:
array([[ 0.9],
       [ 1.6],
       [ 1.4]])

p[1] will be an average between itself and its second neighbor


In [52]:
p[1]


Out[52]:
array([[ 1.25],
       [ 1.25],
       [ 1.15]])

In [55]:
#first sample
p1_test = np.empty((3,1))
p1_test[0] = (0.9 + 1.6)/2
p1_test[1] = (1.6 + 0.9)/2
p1_test[2] = (1.4 + 0.9)/2
p1_test


Out[55]:
array([[ 1.25],
       [ 1.25],
       [ 1.15]])

p[2] will be an average of all


In [57]:
p[2]


Out[57]:
array([[ 1.3],
       [ 1.3],
       [ 1.3]])

In [61]:
np.array([np.mean(y)]*3).reshape(3,1)


Out[61]:
array([[ 1.3],
       [ 1.3],
       [ 1.3]])

Regression with distance weighting


In [115]:
Xtr = np.array([
         [ 0.3,  0.6],
         [ 0.2,  1.4],
         [ 1.2,  0.2]])
Xte = np.array([
         [ 0.7,  1.6],
         [ 1.3,  0.4],
         ])

ytr = Xtr.sum(axis=1,keepdims=True)
yte = Xte.sum(axis=1,keepdims=True)

In [143]:
yte


Out[143]:
array([[ 2.3],
       [ 1.7]])

In [122]:
R = edm.Regression(weights='distance')

In [123]:
R.fit(Xtr,ytr)

In [124]:
p = R.predict(Xte,[1,2,3])

In [125]:
print(R.ind)
print(R.dist)


[[1 0 2]
 [2 0 1]]
[[ 0.53851648  1.07703296  1.48660687]
 [ 0.2236068   1.0198039   1.48660687]]

In [126]:
print('ytr',ytr)
print('yte',yte)

print('Xtr',Xtr)
print('Xte',Xte)


ytr [[ 0.9]
 [ 1.6]
 [ 1.4]]
yte [[ 2.3]
 [ 1.7]]
Xtr [[ 0.3  0.6]
 [ 0.2  1.4]
 [ 1.2  0.2]]
Xte [[ 0.7  1.6]
 [ 1.3  0.4]]

p[0] should return itself


In [127]:
p[0]


Out[127]:
array([[ 1.6],
       [ 1.4]])

p[1] should return a weighted average of the first two


In [129]:
R.dist


Out[129]:
array([[ 0.53851648,  1.07703296,  1.48660687],
       [ 0.2236068 ,  1.0198039 ,  1.48660687]])

In [130]:
R.ind


Out[130]:
array([[1, 0, 2],
       [2, 0, 1]])

In [132]:
ytr


Out[132]:
array([[ 0.9],
       [ 1.6],
       [ 1.4]])

In [137]:
W = 1/R.dist[:,0:2]
W/= np.sum(W,axis=1,keepdims=True)

In [138]:
W


Out[138]:
array([[ 0.66666667,  0.33333333],
       [ 0.82016658,  0.17983342]])

In [146]:
np.sum(W*ytr[R.ind[:,0:2],0],axis=1).reshape(-1,1)


Out[146]:
array([[ 1.36666667],
       [ 1.31008329]])

In [128]:
p[1]


Out[128]:
array([[ 1.36666522],
       [ 1.31008072]])

p[2] should return a weighted average of them all


In [216]:
p[2]


Out[216]:
array([[ 3.],
       [ 3.]])

Test uniform classification


In [230]:
Xtr = np.array([
         [ 3, 6],
         [ 2, 1],
         [ 1, 3]])
Xte = np.array([
         [ 3,  5],
         [ 2,  2],
         ])
ytr = np.array([[9],
               [3],
               [4]])

In [231]:
R = edm.Classification()

In [232]:
R.fit(Xtr,ytr)

In [233]:
p = R.predict(Xte,[1,2,3])

In [234]:
R.dist


Out[234]:
array([[ 0.5,  1. ,  1. ],
       [ 0.5,  1. ,  1. ]])

In [235]:
R.ind


Out[235]:
array([[0, 1, 2],
       [1, 0, 2]])

p[0] should just be the nearest neighbor


In [236]:
np.array([[9.],
         [3.]])
p[0]


Out[236]:
array([[ 9.],
       [ 3.]])

p[1] should be the mode of the two, but since there is only two, it should actually still be the same thing as the first one.


In [237]:
p[1]


Out[237]:
array([[ 9.],
       [ 3.]])

p[2] should be the mode of the three of them. They are all different, so it will just be the first one.


In [238]:
p[2]


Out[238]:
array([[ 9.],
       [ 3.]])

Test distance classification


In [287]:
Xtr = np.array([
         [ 3, 5],
         [ 2, 1],
         [ 1, 3]])
Xte = np.array([
         [ 3,  5],
         [ 2,  2],
         ])
ytr = np.array([[9],
               [3],
               [9]])

In [288]:
R = edm.Classification(weights='distance')

In [289]:
R.fit(Xtr,ytr)

In [290]:
p = R.predict(Xte,[1,2,3])

In [291]:
R.dist


Out[291]:
array([[ 0. ,  1. ,  1. ],
       [ 0.5,  1. ,  1. ]])

In [292]:
R.ind


Out[292]:
array([[0, 1, 2],
       [1, 0, 2]])

In [301]:
W = 1/(R.dist+.00001)
W/np.sum(W,axis=1,keepdims=True)
#W


Out[301]:
array([[  9.99980001e-01,   9.99970001e-06,   9.99970001e-06],
       [  4.99997500e-01,   2.50001250e-01,   2.50001250e-01]])

In [302]:
W


Out[302]:
array([[  1.00000000e+05,   9.99990000e-01,   9.99990000e-01],
       [  1.99996000e+00,   9.99990000e-01,   9.99990000e-01]])

In [303]:
W[1,1] + W[1,2]


Out[303]:
1.9999800001999979

In [305]:
W[1,0]


Out[305]:
1.9999600007999843

In [294]:
np.array([[9.],
         [3.]])
p[0]


Out[294]:
array([[ 9.],
       [ 3.]])

In [295]:
p[1]


Out[295]:
array([[ 9.],
       [ 3.]])

In [296]:
p[2]


Out[296]:
array([[ 9.],
       [ 9.]])

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [306]:
ut.quick_mode_axis1(x).shape


Out[306]:
(2,)

In [184]:
np.mean(stats.mode(x,axis=1)[0].ravel() == ut.quick_mode_axis1(x))


Out[184]:
1.0

In [189]:
stats.mode([5,5,5,7,7,4,4,4])


Out[189]:
ModeResult(mode=array([4]), count=array([3]))

In [188]:
ut.quick_mode_axis1(np.array([5,5,5,7,7,4,4,4]).reshape(1,-1))


Out[188]:
array([ 4.])

In [211]:
x = np.array([[5,5,7,7,7,4,4,4],
             [4,4,3,3,3,5,5,5]])

In [212]:
sl = x[1,:]
sl


Out[212]:
array([4, 4, 3, 3, 3, 5, 5, 5])

In [213]:
loc = np.bincount(sl)[sl].argmax()
loc


Out[213]:
2

In [214]:
sl[loc]


Out[214]:
3

In [229]:
ut.quick_mode_axis1_keep_nearest_neigh(x)


Out[229]:
array([ 7.,  3.])

In [215]:
X = X.astype(int)
len_x = len(X)
mode = np.zeros(len_x)
for i in range(len_x):
    mode[i] = np.bincount(X[i,:])[X[i,:]].argmax() #reorder before argmax


  File "<ipython-input-215-b43e6a109574>", line 6
    return mode
               ^
SyntaxError: 'return' outside function