Baayen, R. H. and Milin, P. and Filipovic Durdevic, D. and Hendrix, P. and Marelli, M. 2011. "An amorphous model for morphological processing in visual comprehension based on naive discriminative learning." Psychological Review 118:438-482.



In [1]:

    
import pandas as pd
import pandas.rpy.common as com
import numpy as np
from sklearn.feature_extraction import DictVectorizer

%load_ext autoreload
%autoreload 2

%load_ext rmagic

%precision 2
pd.set_option('display.precision', 3)



In [2]:

    
%%R
library(ndl)









    





This is ndl version 0.2.16. 
For an overview of the package, type 'help("ndl.package")'.



In [3]:

    
data = com.load_data('plurals')
data['Cues'] = [list(w) for w in data['WordForm']]
data['Outcomes'] = [w.split('_') for w in data['Outcomes']]
data









    Out[3]:






  
    
      
      WordForm
      Frequency
      Outcomes
      Cues
    
  
  
    
      1 
        hand
        10
          [hand, NIL]
          [h, a, n, d]
    
    
      2 
       hands
        20
       [hand, PLURAL]
       [h, a, n, d, s]
    
    
      3 
        land
         8
          [land, NIL]
          [l, a, n, d]
    
    
      4 
       lands
         3
       [land, PLURAL]
       [l, a, n, d, s]
    
    
      5 
         and
        35
           [and, NIL]
             [a, n, d]
    
    
      6 
         sad
        18
           [sad, NIL]
             [s, a, d]
    
    
      7 
          as
        35
            [as, NIL]
                [a, s]
    
    
      8 
         lad
       102
           [lad, NIL]
             [l, a, d]
    
    
      9 
         lad
        54
        [lad, PLURAL]
             [l, a, d]
    
    
      10
        lass
       134
          [lass, NIL]
          [l, a, s, s]
    
  

10 rows × 4 columns

Appendix

The first step is to construct the co-occurrence matrix $C$ (eq. 37), where $C_{ij}$ is the frequency with which cue $i$ co-occurs with cue $j$ ($C_{ii}$ is the marginal frequency of cue $i$).



In [4]:

    
cues = DictVectorizer(dtype=float,sparse=False)
D = cues.fit_transform({}.fromkeys(c,True) for c in data.Cues) * data.Frequency[:,np.newaxis]
D









    Out[4]:





array([[  10.,   10.,   10.,    0.,   10.,    0.],
       [  20.,   20.,   20.,    0.,   20.,   20.],
       [   8.,    8.,    0.,    8.,    8.,    0.],
       [   3.,    3.,    0.,    3.,    3.,    3.],
       [  35.,   35.,    0.,    0.,   35.,    0.],
       [  18.,   18.,    0.,    0.,    0.,   18.],
       [  35.,    0.,    0.,    0.,    0.,   35.],
       [ 102.,  102.,    0.,  102.,    0.,    0.],
       [  54.,   54.,    0.,   54.,    0.,    0.],
       [ 134.,    0.,    0.,  134.,    0.,  134.]])



In [5]:

    
cues.get_feature_names()









    Out[5]:





['a', 'd', 'h', 'l', 'n', 's']

Now sum up to get $C$:



In [6]:

    
n = len(cues.get_feature_names())
C = np.zeros((n,n))
for row in D:
    for nz in np.nonzero(row):
        C[nz] += row
C









    Out[6]:





array([[ 419.,  250.,   30.,  301.,   76.,  210.],
       [ 250.,  250.,   30.,  167.,   76.,   41.],
       [  30.,   30.,   30.,    0.,   30.,   20.],
       [ 301.,  167.,    0.,  301.,   11.,  137.],
       [  76.,   76.,   30.,   11.,   76.,   23.],
       [ 210.,   41.,   20.,  137.,   23.,  210.]])

Then we normalize to get $C'$, the conditional probabilty matrix (eqs. 38 and 39), where: $$C'_{ij}=p(j|i)=\frac{C_{ij}}{\sum_kC_{ik}}$$



In [7]:

    
Z = C.sum(axis=1)
C1 = C / Z[:,np.newaxis]
C1









    Out[7]:





array([[ 0.33,  0.19,  0.02,  0.23,  0.06,  0.16],
       [ 0.31,  0.31,  0.04,  0.21,  0.09,  0.05],
       [ 0.21,  0.21,  0.21,  0.  ,  0.21,  0.14],
       [ 0.33,  0.18,  0.  ,  0.33,  0.01,  0.15],
       [ 0.26,  0.26,  0.1 ,  0.04,  0.26,  0.08],
       [ 0.33,  0.06,  0.03,  0.21,  0.04,  0.33]])

Next, Outcome matrix $O$, where $O_{ij}$ is number of types cue $i$ occurred with outcome $j$:



In [8]:

    
out = DictVectorizer(dtype=float,sparse=False)
X = out.fit_transform([{}.fromkeys(c,True) for c in data.Outcomes]) * data.Frequency[:,np.newaxis]
X









    Out[8]:





array([[  10.,    0.,    0.,    0.,   10.,    0.,    0.,    0.,    0.],
       [   0.,   20.,    0.,    0.,   20.,    0.,    0.,    0.,    0.],
       [   8.,    0.,    0.,    0.,    0.,    0.,    8.,    0.,    0.],
       [   0.,    3.,    0.,    0.,    0.,    0.,    3.,    0.,    0.],
       [  35.,    0.,   35.,    0.,    0.,    0.,    0.,    0.,    0.],
       [  18.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,   18.],
       [  35.,    0.,    0.,   35.,    0.,    0.,    0.,    0.,    0.],
       [ 102.,    0.,    0.,    0.,    0.,  102.,    0.,    0.,    0.],
       [   0.,   54.,    0.,    0.,    0.,   54.,    0.,    0.,    0.],
       [ 134.,    0.,    0.,    0.,    0.,    0.,    0.,  134.,    0.]])



In [9]:

    
out.get_feature_names()









    Out[9]:





['NIL', 'PLURAL', 'and', 'as', 'hand', 'lad', 'land', 'lass', 'sad']



In [10]:

    
O = np.zeros((len(cues.get_feature_names()),len(out.get_feature_names())))
for i in xrange(len(X)):
    for nz in np.nonzero(D[i]):
        O[nz] += X[i]
O









    Out[10]:





array([[ 342.,   77.,   35.,   35.,   30.,  156.,   11.,  134.,   18.],
       [ 173.,   77.,   35.,    0.,   30.,  156.,   11.,    0.,   18.],
       [  10.,   20.,    0.,    0.,   30.,    0.,    0.,    0.,    0.],
       [ 244.,   57.,    0.,    0.,    0.,  156.,   11.,  134.,    0.],
       [  53.,   23.,   35.,    0.,   30.,    0.,   11.,    0.,    0.],
       [ 187.,   23.,    0.,   35.,   20.,    0.,    3.,  134.,   18.]])

As above, we renormalize $O$ to get the conditional outcome matrix $O'$, where: $$O'_{ij}=p(o_j|c_i)=\frac{p(c_i,o_j)}{p(c_i)}=\frac{O_{ij}}{\sum_kC_{ik}}$$



In [11]:

    
O1 = O / Z[:,np.newaxis]
O1









    Out[11]:





array([[ 0.27,  0.06,  0.03,  0.03,  0.02,  0.12,  0.01,  0.1 ,  0.01],
       [ 0.21,  0.09,  0.04,  0.  ,  0.04,  0.19,  0.01,  0.  ,  0.02],
       [ 0.07,  0.14,  0.  ,  0.  ,  0.21,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.27,  0.06,  0.  ,  0.  ,  0.  ,  0.17,  0.01,  0.15,  0.  ],
       [ 0.18,  0.08,  0.12,  0.  ,  0.1 ,  0.  ,  0.04,  0.  ,  0.  ],
       [ 0.29,  0.04,  0.  ,  0.05,  0.03,  0.  ,  0.  ,  0.21,  0.03]])

Finally, we find the weight matrix W by solving equation (47): $C'W=O'$



In [12]:

    
np.linalg.solve(C1,O1)









    Out[12]:





array([[  1.45e+00,  -4.49e-01,   3.75e-01,   1.03e+00,   2.57e-16,
          4.09e-01,  -3.75e-01,  -3.41e-02,  -4.09e-01],
       [ -5.31e-01,   5.31e-01,  -1.62e-01,  -4.44e-01,  -1.98e-16,
          3.95e-01,   1.62e-01,  -5.56e-01,   6.05e-01],
       [ -4.91e-01,   4.91e-01,  -6.89e-01,   5.35e-02,   1.00e+00,
          3.65e-01,  -3.11e-01,  -5.35e-02,  -3.65e-01],
       [ -2.23e-01,   2.23e-01,  -2.15e-01,  -6.20e-01,  -5.38e-17,
          1.65e-01,   2.15e-01,   6.20e-01,  -1.65e-01],
       [  8.83e-02,  -8.83e-02,   6.12e-01,  -4.20e-01,  -4.31e-17,
         -8.08e-01,   3.88e-01,   4.20e-01,  -1.92e-01],
       [ -2.72e-01,   2.72e-01,  -2.05e-01,  -3.35e-01,  -2.03e-16,
         -5.41e-01,   2.05e-01,   3.35e-01,   5.41e-01]])

Alternatively, find weight matrix $W$ using the pseudoinverse $C^+$ as in equation (48): $W=C^+O'$ This has the advantage of working even when $C$ is singular.



In [13]:

    
W = np.linalg.pinv(C1).dot(O1)
W









    Out[13]:





array([[  1.45e+00,  -4.49e-01,   3.75e-01,   1.03e+00,  -4.88e-15,
          4.09e-01,  -3.75e-01,  -3.41e-02,  -4.09e-01],
       [ -5.31e-01,   5.31e-01,  -1.62e-01,  -4.44e-01,   2.44e-15,
          3.95e-01,   1.62e-01,  -5.56e-01,   6.05e-01],
       [ -4.91e-01,   4.91e-01,  -6.89e-01,   5.35e-02,   1.00e+00,
          3.65e-01,  -3.11e-01,  -5.35e-02,  -3.65e-01],
       [ -2.23e-01,   2.23e-01,  -2.15e-01,  -6.20e-01,   2.80e-15,
          1.65e-01,   2.15e-01,   6.20e-01,  -1.65e-01],
       [  8.83e-02,  -8.83e-02,   6.12e-01,  -4.20e-01,   7.77e-16,
         -8.08e-01,   3.88e-01,   4.20e-01,  -1.92e-01],
       [ -2.72e-01,   2.72e-01,  -2.05e-01,  -3.35e-01,   3.00e-15,
         -5.41e-01,   2.05e-01,   3.35e-01,   5.41e-01]])



In [17]:

    
pd.DataFrame(W,columns=out.get_feature_names(),index=cues.get_feature_names())









    Out[17]:






  
    
      
      NIL
      PLURAL
      and
      as
      hand
      lad
      land
      lass
      sad
    
  
  
    
      a
       1.45
      -0.45
       0.38
       1.03
      -4.88e-15
       0.41
      -0.38
      -0.03
      -0.41
    
    
      d
      -0.53
       0.53
      -0.16
      -0.44
       2.44e-15
       0.39
       0.16
      -0.56
       0.61
    
    
      h
      -0.49
       0.49
      -0.69
       0.05
       1.00e+00
       0.36
      -0.31
      -0.05
      -0.36
    
    
      l
      -0.22
       0.22
      -0.21
      -0.62
       2.80e-15
       0.17
       0.21
       0.62
      -0.17
    
    
      n
       0.09
      -0.09
       0.61
      -0.42
       7.77e-16
      -0.81
       0.39
       0.42
      -0.19
    
    
      s
      -0.27
       0.27
      -0.21
      -0.34
       3.00e-15
      -0.54
       0.21
       0.34
       0.54
    
  

6 rows × 9 columns

Compute activations. Let $u$ be a vector of cues that are active for a given input. For example, for the input hands, we have:



In [20]:

    
u=cues.transform([{}.fromkeys(list('hands'),True)]).T
u









    Out[20]:





array([[ 1.],
       [ 1.],
       [ 1.],
       [ 0.],
       [ 1.],
       [ 1.]])

Given $u$, the activation $a_j$ of a meaning $j$ is: $$a_j=\sum_iW_{ij}=W^Tu$$



In [21]:

    
W.T.dot(u)









    Out[21]:





array([[ 0.24],
       [ 0.76],
       [-0.07],
       [-0.11],
       [ 1.  ],
       [-0.18],
       [ 0.07],
       [ 0.11],
       [ 0.18]])



In [22]:

    
pd.DataFrame(W.T.dot(u),index=out.get_feature_names())









    Out[22]:






  
    
      
      0
    
  
  
    
      NIL
       0.24
    
    
      PLURAL
       0.76
    
    
      and
      -0.07
    
    
      as
      -0.11
    
    
      hand
       1.00
    
    
      lad
      -0.18
    
    
      land
       0.07
    
    
      lass
       0.11
    
    
      sad
       0.18
    
  

9 rows × 1 columns



In [27]:

    
targets = ['hands','hand']
pd.DataFrame(W.T.dot(cues.transform([{}.fromkeys(list(t),True) for t in targets]).T),index=out.get_feature_names(),columns=targets)









    Out[27]:






  
    
      
      hands
      hand
    
  
  
    
      NIL
       0.24
       0.51
    
    
      PLURAL
       0.76
       0.49
    
    
      and
      -0.07
       0.14
    
    
      as
      -0.11
       0.22
    
    
      hand
       1.00
       1.00
    
    
      lad
      -0.18
       0.36
    
    
      land
       0.07
      -0.14
    
    
      lass
       0.11
      -0.22
    
    
      sad
       0.18
      -0.36
    
  

9 rows × 2 columns

The same thing, but packaged up in a function:



In [28]:

    
from ndl import *



In [29]:

    
ndl(data)









    Out[29]:






  
    
      
      NIL
      PLURAL
      and
      as
      hand
      lad
      land
      lass
      sad
    
  
  
    
      a
       1.45
      -0.45
       0.38
       1.03
      -4.88e-15
       0.41
      -0.38
      -0.03
      -0.41
    
    
      d
      -0.53
       0.53
      -0.16
      -0.44
       2.44e-15
       0.39
       0.16
      -0.56
       0.61
    
    
      h
      -0.49
       0.49
      -0.69
       0.05
       1.00e+00
       0.36
      -0.31
      -0.05
      -0.36
    
    
      l
      -0.22
       0.22
      -0.21
      -0.62
       2.80e-15
       0.17
       0.21
       0.62
      -0.17
    
    
      n
       0.09
      -0.09
       0.61
      -0.42
       7.77e-16
      -0.81
       0.39
       0.42
      -0.19
    
    
      s
      -0.27
       0.27
      -0.21
      -0.34
       3.00e-15
      -0.54
       0.21
       0.34
       0.54
    
  

6 rows × 9 columns



In [ ]:

	WordForm	Frequency	Outcomes	Cues
1	hand	10	[hand, NIL]	[h, a, n, d]
2	hands	20	[hand, PLURAL]	[h, a, n, d, s]
3	land	8	[land, NIL]	[l, a, n, d]
4	lands	3	[land, PLURAL]	[l, a, n, d, s]
5	and	35	[and, NIL]	[a, n, d]
6	sad	18	[sad, NIL]	[s, a, d]
7	as	35	[as, NIL]	[a, s]
8	lad	102	[lad, NIL]	[l, a, d]
9	lad	54	[lad, PLURAL]	[l, a, d]
10	lass	134	[lass, NIL]	[l, a, s, s]

	NIL	PLURAL	and	as	hand	lad	land	lass	sad
a	1.45	-0.45	0.38	1.03	-4.88e-15	0.41	-0.38	-0.03	-0.41
d	-0.53	0.53	-0.16	-0.44	2.44e-15	0.39	0.16	-0.56	0.61
h	-0.49	0.49	-0.69	0.05	1.00e+00	0.36	-0.31	-0.05	-0.36
l	-0.22	0.22	-0.21	-0.62	2.80e-15	0.17	0.21	0.62	-0.17
n	0.09	-0.09	0.61	-0.42	7.77e-16	-0.81	0.39	0.42	-0.19
s	-0.27	0.27	-0.21	-0.34	3.00e-15	-0.54	0.21	0.34	0.54

	0
NIL	0.24
PLURAL	0.76
and	-0.07
as	-0.11
hand	1.00
lad	-0.18
land	0.07
lass	0.11
sad	0.18

	hands	hand
NIL	0.24	0.51
PLURAL	0.76	0.49
and	-0.07	0.14
as	-0.11	0.22
hand	1.00	1.00
lad	-0.18	0.36
land	0.07	-0.14
lass	0.11	-0.22
sad	0.18	-0.36