Testing Notebook

This notebook is for visual tests of the software that is pretty informal. Some of the tests in this notebook have been solidified in their respective python scripts to be run by pytest.


In [1]:
%load_ext autoreload
%autoreload 2

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_context('paper',font_scale=1.4)


//anaconda/envs/skedm_env/lib/python3.5/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
//anaconda/envs/skedm_env/lib/python3.5/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

Utilities


In [2]:
import sys
sys.path.insert(0, '../..')

In [4]:
import skccm.utilities as ut

exp_weight(X)


In [22]:
X = np.array([ [0.1,0.2,.3,.4],
                 [.3,.3,.7,.7]])

In [23]:
ut.exp_weight(distances)


Out[23]:
array([[ 0.64388159,  0.23689449,  0.08715733,  0.03206659],
       [ 0.39569207,  0.39569207,  0.10430793,  0.10430793]])

In [14]:
top = np.exp(distances)
bot = np.exp(distances).sum(axis=1,keepdims=True)

In [17]:
top/bot


Out[17]:
array([[ 0.21383822,  0.23632778,  0.26118259,  0.28865141],
       [ 0.20065617,  0.20065617,  0.29934383,  0.29934383]])

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:

ccm


In [24]:
import skccm as ccm

In [33]:
X1 = np.array([
         [ 0.3,  0.6],
         [ 0.2,  1.4],
         [ 1.2,  0.2],
         [ 0.9,  1.9]])

X2 = np.array([
         [ 0.2,  0.3],
         [ 0.7,  2.2],
         [ 0.8,  1.4],
         [ 1.7,  1.7]])

In [34]:
C = ccm.CCM()

In [35]:
C.fit(X1,X2)

In [37]:
sc1, sc2 = C.predict(X1,X2,[3,4])

In [40]:
sc1[0]


Out[40]:
array([[ 0.3       ,  0.6       ],
       [ 0.2       ,  1.4       ],
       [ 1.2       ,  0.2       ],
       [ 0.67985163,  0.72526634]])

In [38]:
y = X.sum(axis=1,keepdims=True)

In [39]:
print(X)
print(y)


[[ 0.3  0.6]
 [ 0.2  1.4]
 [ 1.2  0.2]]
[[ 0.9]
 [ 1.6]
 [ 1.4]]

Regression Uniform weighting


In [40]:
R = edm.Regression()

In [41]:
R.fit(X,y)

dist_calc(X)


In [42]:
R.dist_calc(X)

In [43]:
#X[1] to X[2] and X[3]
d12 = np.sqrt( (0.3 - 0.2)**2 + (0.6 - 1.4)**2 )
d13 = np.sqrt( (0.3 - 1.2)**2 + (0.6 - 0.2)**2 )

#X[2] to X[1] and X[3]
d21 = np.sqrt( (0.2 - 0.3)**2 + (1.4 - 0.6)**2 )
d23 = np.sqrt( (0.2 - 1.2)**2 + (1.4 - 0.2)**2 )

#X[2] to X[1] and X[3]
d21 = np.sqrt( (0.2 - 0.3)**2 + (1.4 - 0.6)**2 )
d23 = np.sqrt( (0.2 - 1.2)**2 + (1.4 - 0.2)**2 )

#X[3] to X[1] and X[2]
d31 = np.sqrt( (1.2 - 0.3)**2 + (0.2 - 0.6)**2 )
d32= np.sqrt( (1.2 - 0.2)**2 + (0.2 - 1.4)**2 )


print('X1:',d12, d13)
print('X2:',d21, d23)
print('X3:',d31, d32)


X1: 0.80622577483 0.98488578018
X2: 0.80622577483 1.56204993518
X3: 0.98488578018 1.56204993518

In [44]:
R.dist


Out[44]:
array([[ 0.        ,  0.80622577,  0.98488578],
       [ 0.        ,  0.80622577,  1.56204994],
       [ 0.        ,  0.98488578,  1.56204994]])

In [45]:
R.ind


Out[45]:
array([[0, 1, 2],
       [1, 0, 2],
       [2, 0, 1]])

predict(Xtest,nn_list)


In [51]:
X


Out[51]:
array([[ 0.3,  0.6],
       [ 0.2,  1.4],
       [ 1.2,  0.2]])

In [46]:
y


Out[46]:
array([[ 0.9],
       [ 1.6],
       [ 1.4]])

In [47]:
p = R.predict(X,[1,2,3])

p[0] will be itself


In [49]:
p[0]


Out[49]:
array([[ 0.9],
       [ 1.6],
       [ 1.4]])

p[1] will be an average between itself and its second neighbor


In [52]:
p[1]


Out[52]:
array([[ 1.25],
       [ 1.25],
       [ 1.15]])

In [55]:
#first sample
p1_test = np.empty((3,1))
p1_test[0] = (0.9 + 1.6)/2
p1_test[1] = (1.6 + 0.9)/2
p1_test[2] = (1.4 + 0.9)/2
p1_test


Out[55]:
array([[ 1.25],
       [ 1.25],
       [ 1.15]])

p[2] will be an average of all


In [57]:
p[2]


Out[57]:
array([[ 1.3],
       [ 1.3],
       [ 1.3]])

In [61]:
np.array([np.mean(y)]*3).reshape(3,1)


Out[61]:
array([[ 1.3],
       [ 1.3],
       [ 1.3]])

Regression with distance weighting


In [115]:
Xtr = np.array([
         [ 0.3,  0.6],
         [ 0.2,  1.4],
         [ 1.2,  0.2]])
Xte = np.array([
         [ 0.7,  1.6],
         [ 1.3,  0.4],
         ])

ytr = Xtr.sum(axis=1,keepdims=True)
yte = Xte.sum(axis=1,keepdims=True)

In [143]:
yte


Out[143]:
array([[ 2.3],
       [ 1.7]])

In [122]:
R = edm.Regression(weights='distance')

In [123]:
R.fit(Xtr,ytr)

In [124]:
p = R.predict(Xte,[1,2,3])

In [125]:
print(R.ind)
print(R.dist)


[[1 0 2]
 [2 0 1]]
[[ 0.53851648  1.07703296  1.48660687]
 [ 0.2236068   1.0198039   1.48660687]]

In [126]:
print('ytr',ytr)
print('yte',yte)

print('Xtr',Xtr)
print('Xte',Xte)


ytr [[ 0.9]
 [ 1.6]
 [ 1.4]]
yte [[ 2.3]
 [ 1.7]]
Xtr [[ 0.3  0.6]
 [ 0.2  1.4]
 [ 1.2  0.2]]
Xte [[ 0.7  1.6]
 [ 1.3  0.4]]

p[0] should return itself


In [127]:
p[0]


Out[127]:
array([[ 1.6],
       [ 1.4]])

p[1] should return a weighted average of the first two


In [129]:
R.dist


Out[129]:
array([[ 0.53851648,  1.07703296,  1.48660687],
       [ 0.2236068 ,  1.0198039 ,  1.48660687]])

In [130]:
R.ind


Out[130]:
array([[1, 0, 2],
       [2, 0, 1]])

In [132]:
ytr


Out[132]:
array([[ 0.9],
       [ 1.6],
       [ 1.4]])

In [137]:
W = 1/R.dist[:,0:2]
W/= np.sum(W,axis=1,keepdims=True)

In [138]:
W


Out[138]:
array([[ 0.66666667,  0.33333333],
       [ 0.82016658,  0.17983342]])

In [146]:
np.sum(W*ytr[R.ind[:,0:2],0],axis=1).reshape(-1,1)


Out[146]:
array([[ 1.36666667],
       [ 1.31008329]])

In [128]:
p[1]


Out[128]:
array([[ 1.36666522],
       [ 1.31008072]])

p[2] should return a weighted average of them all


In [216]:
p[2]


Out[216]:
array([[ 3.],
       [ 3.]])

Test uniform classification


In [230]:
Xtr = np.array([
         [ 3, 6],
         [ 2, 1],
         [ 1, 3]])
Xte = np.array([
         [ 3,  5],
         [ 2,  2],
         ])
ytr = np.array([[9],
               [3],
               [4]])

In [231]:
R = edm.Classification()

In [232]:
R.fit(Xtr,ytr)

In [233]:
p = R.predict(Xte,[1,2,3])

In [234]:
R.dist


Out[234]:
array([[ 0.5,  1. ,  1. ],
       [ 0.5,  1. ,  1. ]])

In [235]:
R.ind


Out[235]:
array([[0, 1, 2],
       [1, 0, 2]])

p[0] should just be the nearest neighbor


In [236]:
np.array([[9.],
         [3.]])
p[0]


Out[236]:
array([[ 9.],
       [ 3.]])

p[1] should be the mode of the two, but since there is only two, it should actually still be the same thing as the first one.


In [237]:
p[1]


Out[237]:
array([[ 9.],
       [ 3.]])

p[2] should be the mode of the three of them. They are all different, so it will just be the first one.


In [238]:
p[2]


Out[238]:
array([[ 9.],
       [ 3.]])

Test distance classification


In [287]:
Xtr = np.array([
         [ 3, 5],
         [ 2, 1],
         [ 1, 3]])
Xte = np.array([
         [ 3,  5],
         [ 2,  2],
         ])
ytr = np.array([[9],
               [3],
               [9]])

In [288]:
R = edm.Classification(weights='distance')

In [289]:
R.fit(Xtr,ytr)

In [290]:
p = R.predict(Xte,[1,2,3])

In [291]:
R.dist


Out[291]:
array([[ 0. ,  1. ,  1. ],
       [ 0.5,  1. ,  1. ]])

In [292]:
R.ind


Out[292]:
array([[0, 1, 2],
       [1, 0, 2]])

In [301]:
W = 1/(R.dist+.00001)
W/np.sum(W,axis=1,keepdims=True)
#W


Out[301]:
array([[  9.99980001e-01,   9.99970001e-06,   9.99970001e-06],
       [  4.99997500e-01,   2.50001250e-01,   2.50001250e-01]])

In [302]:
W


Out[302]:
array([[  1.00000000e+05,   9.99990000e-01,   9.99990000e-01],
       [  1.99996000e+00,   9.99990000e-01,   9.99990000e-01]])

In [303]:
W[1,1] + W[1,2]


Out[303]:
1.9999800001999979

In [305]:
W[1,0]


Out[305]:
1.9999600007999843

In [294]:
np.array([[9.],
         [3.]])
p[0]


Out[294]:
array([[ 9.],
       [ 3.]])

In [295]:
p[1]


Out[295]:
array([[ 9.],
       [ 3.]])

In [296]:
p[2]


Out[296]:
array([[ 9.],
       [ 9.]])

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [306]:
ut.quick_mode_axis1(x).shape


Out[306]:
(2,)

In [184]:
np.mean(stats.mode(x,axis=1)[0].ravel() == ut.quick_mode_axis1(x))


Out[184]:
1.0

In [189]:
stats.mode([5,5,5,7,7,4,4,4])


Out[189]:
ModeResult(mode=array([4]), count=array([3]))

In [188]:
ut.quick_mode_axis1(np.array([5,5,5,7,7,4,4,4]).reshape(1,-1))


Out[188]:
array([ 4.])

In [211]:
x = np.array([[5,5,7,7,7,4,4,4],
             [4,4,3,3,3,5,5,5]])

In [212]:
sl = x[1,:]
sl


Out[212]:
array([4, 4, 3, 3, 3, 5, 5, 5])

In [213]:
loc = np.bincount(sl)[sl].argmax()
loc


Out[213]:
2

In [214]:
sl[loc]


Out[214]:
3

In [229]:
ut.quick_mode_axis1_keep_nearest_neigh(x)


Out[229]:
array([ 7.,  3.])

In [215]:
X = X.astype(int)
len_x = len(X)
mode = np.zeros(len_x)
for i in range(len_x):
    mode[i] = np.bincount(X[i,:])[X[i,:]].argmax() #reorder before argmax
return mode


  File "<ipython-input-215-b43e6a109574>", line 6
    return mode
               ^
SyntaxError: 'return' outside function