In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
%matplotlib inline
%precision 2
pd.set_option('display.precision', 3)
import ndl,sim
from zt import ztnbinom
Set up compute cluster and initialize its environment. (Make sure it's got all the right versions of the files, especially sim.py!)
In [14]:
from IPython.parallel import Client
rc = Client(profile='home')
dview = rc.direct_view()
dview.block = True
lview = rc.load_balanced_view()
lview.block = True
rc.ids
Out[14]:
In [15]:
%%px
import sys
sys.path = ['/home1/malouf/learning'] + sys.path
import sim
#from sim import Simulation
Create data that's distributed like the corpus counts in Ramscar et al.'s PNAS paper (see the Input Modeling notebook for details).
In [4]:
def cues(N):
card = ztnbinom.rvs(3,.6)
feats = range(card) + ['exactly%d'%card]
return [feats,codeFunc(card)]
In [5]:
ns = [ztnbinom.rvs(3,.6) for i in xrange(10000)]
data = np.zeros((max(ns)))
for i in ns:
data[i-1] += 1
data
Out[5]:
In [18]:
data = pd.DataFrame(data,columns=['Frequency'],index=range(1,len(data)+1))
data['Cues'] = [range(1,i+1) + ['exactly%d'%i] for i in data.index]
data['Number'] = data.index
data
Out[18]:
In [19]:
%%time
r = sim.experiment(data, P=200, view=lview)
In [20]:
sim.all_results(r)
Now add a background feature (basically an intercept)
In [8]:
data['Cues'] = [['background'] + cues for cues in data['Cues']]
data
Out[8]:
In [9]:
%%time
r2 = sim.experiment(data, P=200, view=lview)
sim.all_results(r2)
In [16]:
ns = [ztnbinom.rvs(3,.45) for i in xrange(10000)]
data2 = np.zeros((max(ns)))
for i in ns:
data2[i-1] += 1
data2 = pd.DataFrame(data2,columns=['Frequency'],index=range(1,len(data2)+1))
data2['Cues'] = [range(1,i+1) + ['exactly%d'%i] for i in data2.index]
data2['Number'] = data2.index
data2
Out[16]:
In [17]:
%%time
r3 = sim.experiment(data, P=200, view=lview)
sim.all_results(r3)
In [ ]: