notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

%matplotlib inline

%precision 2
pd.set_option('display.precision', 3)

import ndl,sim
from zt import ztnbinom

Set up compute cluster and initialize its environment. (Make sure it's got all the right versions of the files, especially sim.py!)



In [14]:

    
from IPython.parallel import Client

rc = Client(profile='home')
dview = rc.direct_view()
dview.block = True
lview = rc.load_balanced_view()
lview.block = True
rc.ids









    Out[14]:





[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]



In [15]:

    
%%px

import sys
sys.path = ['/home1/malouf/learning'] + sys.path
import sim
#from sim import Simulation

Create data that's distributed like the corpus counts in Ramscar et al.'s PNAS paper (see the Input Modeling notebook for details).



In [4]:

    
def cues(N):
    card = ztnbinom.rvs(3,.6)
    feats = range(card) + ['exactly%d'%card]
    return [feats,codeFunc(card)]



In [5]:

    
ns = [ztnbinom.rvs(3,.6) for i in xrange(10000)]
data = np.zeros((max(ns)))
for i in ns:
    data[i-1] += 1
data









    Out[5]:





array([  3.31e+03,   2.57e+03,   1.77e+03,   1.08e+03,   6.19e+02,
         3.13e+02,   1.65e+02,   7.60e+01,   5.30e+01,   2.00e+01,
         7.00e+00,   3.00e+00,   2.00e+00,   1.00e+00,   1.00e+00])



In [18]:

    
data = pd.DataFrame(data,columns=['Frequency'],index=range(1,len(data)+1))
data['Cues'] = [range(1,i+1) + ['exactly%d'%i] for i in data.index]
data['Number'] = data.index
data









    Out[18]:






  
    
      
      Frequency
      Cues
      Number
    
  
  
    
      1 
       3313
                                           [1, exactly1]
        1
    
    
      2 
       2570
                                        [1, 2, exactly2]
        2
    
    
      3 
       1773
                                     [1, 2, 3, exactly3]
        3
    
    
      4 
       1084
                                  [1, 2, 3, 4, exactly4]
        4
    
    
      5 
        619
                               [1, 2, 3, 4, 5, exactly5]
        5
    
    
      6 
        313
                            [1, 2, 3, 4, 5, 6, exactly6]
        6
    
    
      7 
        165
                         [1, 2, 3, 4, 5, 6, 7, exactly7]
        7
    
    
      8 
         76
                      [1, 2, 3, 4, 5, 6, 7, 8, exactly8]
        8
    
    
      9 
         53
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, exactly9]
        9
    
    
      10
         20
              [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, exactly10]
       10
    
    
      11
          7
          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, exactly11]
       11
    
    
      12
          3
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, exactl...
       12
    
    
      13
          2
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ex...
       13
    
    
      14
          1
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
       14
    
    
      15
          1
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
       15
    
  

15 rows × 3 columns



In [19]:

    
%%time
r = sim.experiment(data, P=200, view=lview)









    



CPU times: user 43.2 s, sys: 12.6 s, total: 55.8 s
Wall time: 3min 56s



In [20]:

    
sim.all_results(r)

Now add a background feature (basically an intercept)



In [8]:

    
data['Cues'] = [['background'] + cues for cues in data['Cues']]
data









    Out[8]:






  
    
      
      Frequency
      Cues
      Number
      Outcomes
    
  
  
    
      1 
       3313
                               [background, 1, exactly1]
        1
       notdu
    
    
      2 
       2570
                            [background, 1, 2, exactly2]
        2
          du
    
    
      3 
       1773
                         [background, 1, 2, 3, exactly3]
        3
       notdu
    
    
      4 
       1084
                      [background, 1, 2, 3, 4, exactly4]
        4
       notdu
    
    
      5 
        619
                   [background, 1, 2, 3, 4, 5, exactly5]
        5
       notdu
    
    
      6 
        313
                [background, 1, 2, 3, 4, 5, 6, exactly6]
        6
       notdu
    
    
      7 
        165
             [background, 1, 2, 3, 4, 5, 6, 7, exactly7]
        7
       notdu
    
    
      8 
         76
          [background, 1, 2, 3, 4, 5, 6, 7, 8, exactly8]
        8
       notdu
    
    
      9 
         53
       [background, 1, 2, 3, 4, 5, 6, 7, 8, 9, exactly9]
        9
       notdu
    
    
      10
         20
       [background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ex...
       10
       notdu
    
    
      11
          7
       [background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11...
       11
       notdu
    
    
      12
          3
       [background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11...
       12
       notdu
    
    
      13
          2
       [background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11...
       13
       notdu
    
    
      14
          1
       [background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11...
       14
       notdu
    
    
      15
          1
       [background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11...
       15
       notdu
    
  

15 rows × 4 columns



In [9]:

    
%%time
r2 = sim.experiment(data, P=200, view=lview)
sim.all_results(r2)









    



CPU times: user 47.1 s, sys: 11.5 s, total: 58.7 s
Wall time: 4min 12s



In [16]:

    
ns = [ztnbinom.rvs(3,.45) for i in xrange(10000)]
data2 = np.zeros((max(ns)))
for i in ns:
    data2[i-1] += 1
data2 = pd.DataFrame(data2,columns=['Frequency'],index=range(1,len(data2)+1))
data2['Cues'] = [range(1,i+1) + ['exactly%d'%i] for i in data2.index]
data2['Number'] = data2.index
data2









    Out[16]:






  
    
      
      Frequency
      Cues
      Number
    
  
  
    
      1 
       1653
                                           [1, exactly1]
        1
    
    
      2 
       1877
                                        [1, 2, exactly2]
        2
    
    
      3 
       1637
                                     [1, 2, 3, exactly3]
        3
    
    
      4 
       1363
                                  [1, 2, 3, 4, exactly4]
        4
    
    
      5 
       1035
                               [1, 2, 3, 4, 5, exactly5]
        5
    
    
      6 
        793
                            [1, 2, 3, 4, 5, 6, exactly6]
        6
    
    
      7 
        559
                         [1, 2, 3, 4, 5, 6, 7, exactly7]
        7
    
    
      8 
        361
                      [1, 2, 3, 4, 5, 6, 7, 8, exactly8]
        8
    
    
      9 
        265
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, exactly9]
        9
    
    
      10
        149
              [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, exactly10]
       10
    
    
      11
        108
          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, exactly11]
       11
    
    
      12
         79
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, exactl...
       12
    
    
      13
         47
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ex...
       13
    
    
      14
         36
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
       14
    
    
      15
         14
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
       15
    
    
      16
         10
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
       16
    
    
      17
          5
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
       17
    
    
      18
          5
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
       18
    
    
      19
          0
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
       19
    
    
      20
          3
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
       20
    
    
      21
          1
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
       21
    
  

21 rows × 3 columns



In [17]:

    
%%time
r3 = sim.experiment(data, P=200, view=lview)
sim.all_results(r3)









    



CPU times: user 43 s, sys: 12.6 s, total: 55.5 s
Wall time: 3min 59s



In [ ]:

	Frequency	Cues	Number
1	3313	[1, exactly1]	1
2	2570	[1, 2, exactly2]	2
3	1773	[1, 2, 3, exactly3]	3
4	1084	[1, 2, 3, 4, exactly4]	4
5	619	[1, 2, 3, 4, 5, exactly5]	5
6	313	[1, 2, 3, 4, 5, 6, exactly6]	6
7	165	[1, 2, 3, 4, 5, 6, 7, exactly7]	7
8	76	[1, 2, 3, 4, 5, 6, 7, 8, exactly8]	8
9	53	[1, 2, 3, 4, 5, 6, 7, 8, 9, exactly9]	9
10	20	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, exactly10]	10
11	7	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, exactly11]	11
12	3	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, exactl...	12
13	2	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ex...	13
14	1	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...	14
15	1	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...	15

	Frequency	Cues	Number	Outcomes
1	3313	[background, 1, exactly1]	1	notdu
2	2570	[background, 1, 2, exactly2]	2	du
3	1773	[background, 1, 2, 3, exactly3]	3	notdu
4	1084	[background, 1, 2, 3, 4, exactly4]	4	notdu
5	619	[background, 1, 2, 3, 4, 5, exactly5]	5	notdu
6	313	[background, 1, 2, 3, 4, 5, 6, exactly6]	6	notdu
7	165	[background, 1, 2, 3, 4, 5, 6, 7, exactly7]	7	notdu
8	76	[background, 1, 2, 3, 4, 5, 6, 7, 8, exactly8]	8	notdu
9	53	[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, exactly9]	9	notdu
10	20	[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ex...	10	notdu
11	7	[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11...	11	notdu
12	3	[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11...	12	notdu
13	2	[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11...	13	notdu
14	1	[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11...	14	notdu
15	1	[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11...	15	notdu

	Frequency	Cues	Number
1	1653	[1, exactly1]	1
2	1877	[1, 2, exactly2]	2
3	1637	[1, 2, 3, exactly3]	3
4	1363	[1, 2, 3, 4, exactly4]	4
5	1035	[1, 2, 3, 4, 5, exactly5]	5
6	793	[1, 2, 3, 4, 5, 6, exactly6]	6
7	559	[1, 2, 3, 4, 5, 6, 7, exactly7]	7
8	361	[1, 2, 3, 4, 5, 6, 7, 8, exactly8]	8
9	265	[1, 2, 3, 4, 5, 6, 7, 8, 9, exactly9]	9
10	149	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, exactly10]	10
11	108	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, exactly11]	11
12	79	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, exactl...	12
13	47	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ex...	13
14	36	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...	14
15	14	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...	15
16	10	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...	16
17	5	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...	17
18	5	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...	18
19	0	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...	19
20	3	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...	20
21	1	[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...	21