In [1]:

    
entities = {'self', 'addressee', 'other'}

1 entity referent

self ("me")
addressee ("you here")
other ("somebody else")

2+ entity referent

self, addressee ("me and you here" / inclusive we)
self, other ("me and somebody else" / exclusive we)
addressee, addressee ("the two or more of you here")
addressee, other ("one of you here and somebody else")
other, other ("the two or more of them")

3+ entity referent

self, addressee, addressee ("me and the two or more of you here")
self, addressee, other ("me, one of you here, and somebody else")
self, other, other ("me and two or more other people")
addressee, addressee, other ("the two or more of you and somebody else")
addressee, other, other ("one of you and two or more other people")

4+ entity referent

self, addressee, addressee, other ("me, the two or more of you here, and somebody else")
self, addressee, other, other ("me, one of you here, and two or more other people")
addressee, addressee, other, other ("the two or more of you here and two or more other people")

5+ entity referent

self, addressee, addressee, other, other ("me, the two or more of you here, and two or more other people")

There are 17 possible markers if there's no distinction between 2 entities of the same type and 3+ entities of the same type.

a dual or trial entity number could be added to have a 3-way distinction between e.g. [other, other] and [other, other, other]
another entity category besides self, addressee, and other could be added (invisible/divine entities)
multiple self referents could be included (choral we)

Also, what about the issue of mis-identifying the cue as "self" rather than "addressee" (kids calling themselves "you")?



In [2]:

    
from itertools import combinations, combinations_with_replacement

referents = []

for i in xrange(1, len(entities) * 2):
    for combo in combinations_with_replacement(entities, i):
        
        # choral we is impossible
        if combo.count('self') > 1:
            continue
            
        # only singular vs plural
        if combo.count('addressee') > 2:
            continue
            
        if combo.count('other') > 2:
            continue
            
        # compound cues
        referent = list(combo)
        
        for j in xrange(2, len(combo) + 1):
            for compound in combinations(combo, j):
                
                if compound not in referent:
                    referent.append(compound)
            
        referents.append(referent)



In [3]:

    
len(referents)









    Out[3]:





17



In [4]:

    
referents









    Out[4]:





[['addressee'],
 ['self'],
 ['other'],
 ['addressee', 'addressee', ('addressee', 'addressee')],
 ['addressee', 'self', ('addressee', 'self')],
 ['addressee', 'other', ('addressee', 'other')],
 ['self', 'other', ('self', 'other')],
 ['other', 'other', ('other', 'other')],
 ['addressee',
  'addressee',
  'self',
  ('addressee', 'addressee'),
  ('addressee', 'self'),
  ('addressee', 'addressee', 'self')],
 ['addressee',
  'addressee',
  'other',
  ('addressee', 'addressee'),
  ('addressee', 'other'),
  ('addressee', 'addressee', 'other')],
 ['addressee',
  'self',
  'other',
  ('addressee', 'self'),
  ('addressee', 'other'),
  ('self', 'other'),
  ('addressee', 'self', 'other')],
 ['addressee',
  'other',
  'other',
  ('addressee', 'other'),
  ('other', 'other'),
  ('addressee', 'other', 'other')],
 ['self',
  'other',
  'other',
  ('self', 'other'),
  ('other', 'other'),
  ('self', 'other', 'other')],
 ['addressee',
  'addressee',
  'self',
  'other',
  ('addressee', 'addressee'),
  ('addressee', 'self'),
  ('addressee', 'other'),
  ('self', 'other'),
  ('addressee', 'addressee', 'self'),
  ('addressee', 'addressee', 'other'),
  ('addressee', 'self', 'other'),
  ('addressee', 'addressee', 'self', 'other')],
 ['addressee',
  'addressee',
  'other',
  'other',
  ('addressee', 'addressee'),
  ('addressee', 'other'),
  ('other', 'other'),
  ('addressee', 'addressee', 'other'),
  ('addressee', 'other', 'other'),
  ('addressee', 'addressee', 'other', 'other')],
 ['addressee',
  'self',
  'other',
  'other',
  ('addressee', 'self'),
  ('addressee', 'other'),
  ('self', 'other'),
  ('other', 'other'),
  ('addressee', 'self', 'other'),
  ('addressee', 'other', 'other'),
  ('self', 'other', 'other'),
  ('addressee', 'self', 'other', 'other')],
 ['addressee',
  'addressee',
  'self',
  'other',
  'other',
  ('addressee', 'addressee'),
  ('addressee', 'self'),
  ('addressee', 'other'),
  ('self', 'other'),
  ('other', 'other'),
  ('addressee', 'addressee', 'self'),
  ('addressee', 'addressee', 'other'),
  ('addressee', 'self', 'other'),
  ('addressee', 'other', 'other'),
  ('self', 'other', 'other'),
  ('addressee', 'addressee', 'self', 'other'),
  ('addressee', 'addressee', 'other', 'other'),
  ('addressee', 'self', 'other', 'other'),
  ('addressee', 'addressee', 'self', 'other', 'other')]]

Spoken English collapses these to 6 possibilities: I, you, s/he, we, you guys, they



In [5]:

    
def english(referents):
    # first-person
    if 'self' in referents:
        
        if 'addressee' in referents: # inclusive we
            # doesn't matter who else is being referred to
            return 'we'
        
        if 'other' in referents: # exclusive we
            # doesn't matter who else is being referred to
            return 'we'    
            
        return 'I'
    
    # second-person, if the speaker isn't included
    elif 'addressee' in referents:
        
        if referents.count('addressee') > 1: # inclusive you
            return 'you guys'
        
        if 'other' in referents: # exclusive you
            return 'you guys'
        
        return 'you'
    
    # third-person, if the addressee isn't included either
    elif 'other' in referents:
        
        if referents.count('other') > 1:
            return 'they'
        
        return 's/he'



In [6]:

    
english(['self', 'addressee'])









    Out[6]:





'we'



In [7]:

    
english(['self', 'other'])









    Out[7]:





'we'



In [8]:

    
english(['addressee', 'other'])









    Out[8]:





'you guys'



In [9]:

    
english(['addressee', 'addressee']) # also ('addressee', 'addressee') compound









    Out[9]:





'you guys'



In [10]:

    
import pandas

data = pandas.DataFrame()

data['Cues'] = referents
data['Outcomes'] = [english(referent) for referent in referents]
data









    Out[10]:






  
    
      
      Cues
      Outcomes
    
  
  
    
      0 
                                             [addressee]
            you
    
    
      1 
                                                  [self]
              I
    
    
      2 
                                                 [other]
           s/he
    
    
      3 
          [addressee, addressee, (addressee, addressee)]
       you guys
    
    
      4 
                    [addressee, self, (addressee, self)]
             we
    
    
      5 
                  [addressee, other, (addressee, other)]
       you guys
    
    
      6 
                            [self, other, (self, other)]
             we
    
    
      7 
                          [other, other, (other, other)]
           they
    
    
      8 
       [addressee, addressee, self, (addressee, addre...
             we
    
    
      9 
       [addressee, addressee, other, (addressee, addr...
       you guys
    
    
      10
       [addressee, self, other, (addressee, self), (a...
             we
    
    
      11
       [addressee, other, other, (addressee, other), ...
       you guys
    
    
      12
       [self, other, other, (self, other), (other, ot...
             we
    
    
      13
       [addressee, addressee, self, other, (addressee...
             we
    
    
      14
       [addressee, addressee, other, other, (addresse...
       you guys
    
    
      15
       [addressee, self, other, other, (addressee, se...
             we
    
    
      16
       [addressee, addressee, self, other, other, (ad...
             we
    
  

17 rows × 2 columns

Assume that the distribution of referent sets is uniform, which is probably not true.



In [11]:

    
import numpy

def sampler(p):
    
    def uniform():
        return numpy.random.choice(p)
    
    return uniform

referent_sampler = sampler(len(data))



In [12]:

    
import ndl

def activation(W):
    return pandas.DataFrame([ndl.activation(c, W) for c in data.Cues], index=data.index)



In [13]:

    
W = ndl.rw(data, M=100, distribution=referent_sampler)
A = activation(W)
A









    Out[13]:






  
    
      
      I
      s/he
      they
      we
      you
      you guys
    
  
  
    
      0 
      -0.000848
      -0.004426
      -0.010796
       0.094586
       0.051966
       0.115364
    
    
      1 
       0.038021
      -0.004950
      -0.012859
       0.244965
      -0.003339
      -0.044676
    
    
      2 
      -0.000927
       0.055316
       0.066743
       0.114067
      -0.002913
       0.074110
    
    
      3 
      -0.001350
      -0.006548
      -0.015506
       0.147016
       0.048047
       0.193614
    
    
      4 
       0.036280
      -0.010656
      -0.027279
       0.482914
       0.044615
       0.028538
    
    
      5 
      -0.002198
       0.044708
       0.041887
       0.229084
       0.045385
       0.275810
    
    
      6 
       0.036122
       0.043941
       0.038201
       0.528713
      -0.007516
      -0.007599
    
    
      7 
      -0.001687
       0.048333
       0.142420
       0.160588
      -0.004411
       0.100827
    
    
      8 
       0.035268
      -0.013435
      -0.033228
       0.616866
       0.038504
       0.078863
    
    
      9 
      -0.002760
       0.039354
       0.030369
       0.304269
       0.039766
       0.394813
    
    
      10
       0.033516
       0.029490
       0.003721
       0.845571
       0.034954
       0.119107
    
    
      11
      -0.003384
       0.035624
       0.107613
       0.277364
       0.041903
       0.337897
    
    
      12
       0.034578
       0.034353
       0.102137
       0.654354
      -0.009695
      -0.001124
    
    
      13
       0.032382
       0.021968
      -0.011912
       1.048176
       0.026274
       0.188487
    
    
      14
      -0.004007
       0.029325
       0.091090
       0.359332
       0.035411
       0.474638
    
    
      15
       0.031102
       0.017140
       0.053163
       0.995321
       0.029755
       0.143911
    
    
      16
       0.029846
       0.008438
       0.030560
       1.225235
       0.019831
       0.220925
    
  

17 rows × 6 columns



In [14]:

    
pandas.DataFrame([data['Outcomes'], A.idxmax(1), A.idxmax(1) == data['Outcomes']], 
                 index = ['Truth', 'Prediction', 'Accurate?']).T









    Out[14]:






  
    
      
      Truth
      Prediction
      Accurate?
    
  
  
    
      0 
            you
       you guys
       False
    
    
      1 
              I
             we
       False
    
    
      2 
           s/he
             we
       False
    
    
      3 
       you guys
       you guys
        True
    
    
      4 
             we
             we
        True
    
    
      5 
       you guys
       you guys
        True
    
    
      6 
             we
             we
        True
    
    
      7 
           they
             we
       False
    
    
      8 
             we
             we
        True
    
    
      9 
       you guys
       you guys
        True
    
    
      10
             we
             we
        True
    
    
      11
       you guys
       you guys
        True
    
    
      12
             we
             we
        True
    
    
      13
             we
             we
        True
    
    
      14
       you guys
       you guys
        True
    
    
      15
             we
             we
        True
    
    
      16
             we
             we
        True
    
  

17 rows × 3 columns

With 100 trials, the learner is getting a lot of them right, but just by predicting 'you guys' or 'we' (if self is a referent) all of the time, since those cover most of the referent sets.



In [15]:

    
import sim



In [16]:

    
english_learning = sim.Simulation(english, data, referent_sampler, 2000)



In [17]:

    
import matplotlib.pyplot as plt
%matplotlib inline



In [18]:

    
trajectory = [english_learning.accuracy(i) for i in xrange(1, english_learning.MAX_M)]

plt.plot(range(1, len(trajectory) + 1), trajectory, '-')
plt.xlabel('Trial Number')









    Out[18]:





<matplotlib.text.Text at 0xf6fb198>



In [19]:

    
%load_ext rpy2.ipython

%Rpush trajectory



In [20]:

    
%%R

trajectory = data.frame(trial=1:length(trajectory), learned=trajectory)

library('ggplot2')

ggplot(trajectory, aes(trial, learned)) + 
    geom_point(alpha=0.25) + 
    stat_smooth() +
    coord_cartesian(ylim=c(0,1))



In [ ]:

	Cues	Outcomes
0	[addressee]	you
1	[self]	I
2	[other]	s/he
3	[addressee, addressee, (addressee, addressee)]	you guys
4	[addressee, self, (addressee, self)]	we
5	[addressee, other, (addressee, other)]	you guys
6	[self, other, (self, other)]	we
7	[other, other, (other, other)]	they
8	[addressee, addressee, self, (addressee, addre...	we
9	[addressee, addressee, other, (addressee, addr...	you guys
10	[addressee, self, other, (addressee, self), (a...	we
11	[addressee, other, other, (addressee, other), ...	you guys
12	[self, other, other, (self, other), (other, ot...	we
13	[addressee, addressee, self, other, (addressee...	we
14	[addressee, addressee, other, other, (addresse...	you guys
15	[addressee, self, other, other, (addressee, se...	we
16	[addressee, addressee, self, other, other, (ad...	we

	I	s/he	they	we	you	you guys
0	-0.000848	-0.004426	-0.010796	0.094586	0.051966	0.115364
1	0.038021	-0.004950	-0.012859	0.244965	-0.003339	-0.044676
2	-0.000927	0.055316	0.066743	0.114067	-0.002913	0.074110
3	-0.001350	-0.006548	-0.015506	0.147016	0.048047	0.193614
4	0.036280	-0.010656	-0.027279	0.482914	0.044615	0.028538
5	-0.002198	0.044708	0.041887	0.229084	0.045385	0.275810
6	0.036122	0.043941	0.038201	0.528713	-0.007516	-0.007599
7	-0.001687	0.048333	0.142420	0.160588	-0.004411	0.100827
8	0.035268	-0.013435	-0.033228	0.616866	0.038504	0.078863
9	-0.002760	0.039354	0.030369	0.304269	0.039766	0.394813
10	0.033516	0.029490	0.003721	0.845571	0.034954	0.119107
11	-0.003384	0.035624	0.107613	0.277364	0.041903	0.337897
12	0.034578	0.034353	0.102137	0.654354	-0.009695	-0.001124
13	0.032382	0.021968	-0.011912	1.048176	0.026274	0.188487
14	-0.004007	0.029325	0.091090	0.359332	0.035411	0.474638
15	0.031102	0.017140	0.053163	0.995321	0.029755	0.143911
16	0.029846	0.008438	0.030560	1.225235	0.019831	0.220925

	Truth	Prediction	Accurate?
0	you	you guys	False
1	I	we	False
2	s/he	we	False
3	you guys	you guys	True
4	we	we	True
5	you guys	you guys	True
6	we	we	True
7	they	we	False
8	we	we	True
9	you guys	you guys	True
10	we	we	True
11	you guys	you guys	True
12	we	we	True
13	we	we	True
14	you guys	you guys	True
15	we	we	True
16	we	we	True