notebook.community

Edit and run



In [1]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
import numpy as np
import trees
from trees.ddt import *
import mpld3
import seaborn as sns
sns.set_style('white')
from tqdm import tqdm
from sklearn.decomposition import PCA
import networkx as nx



In [18]:

    
X, y = trees.data.load('zoo')
pca = PCA(n_components=2)
X = pca.fit_transform(X)
X += np.random.normal(scale=.15, size=X.shape)
max = 200
X = X[:max]
y = y[:max]
N, D = X.shape
scatter(*X.T);
interactor = trees.interact.Interactor(X, y, trees.interact.Database('../zoo.db'))
constraints = set([])
def plot_tree(ddt):
    scatter(*X.T)
    def plot_node(node, size=40):
        if isinstance(node, Leaf):
            return
        scatter(*node.state, color='g', alpha=0.5,s=size)
        for child in node.children:
            plot(*zip(node.state, child.state), color='g', alpha=0.2)
            plot_node(child, size=size/2)
    plot_node(ddt.root)
def generate_constraint(a, b, c):
    return tuple(map(lambda x: y.index(x), (a, b, c)))
"""
ALL_CONSTRAINTS = {
    generate_constraint('killer+whale', 'antelope', 'beaver'),
    generate_constraint('killer+whale', 'antelope', 'grizzly+bear'),
    generate_constraint('killer+whale', 'antelope', 'dalmatian'),
    generate_constraint('killer+whale', 'dalmatian', 'beaver'),
    generate_constraint('killer+whale', 'grizzly+bear', 'beaver'),
    generate_constraint('antelope', 'dalmatian', 'beaver'),
    generate_constraint('antelope', 'grizzly+bear', 'beaver'),
    generate_constraint('dalmatian', 'grizzly+bear', 'killer+whale'),
    generate_constraint('dalmatian', 'grizzly+bear', 'antelope'),
    generate_constraint('dalmatian', 'grizzly+bear', 'beaver'),
}
"""









    Out[18]:





"\nALL_CONSTRAINTS = {\n    generate_constraint('killer+whale', 'antelope', 'beaver'),\n    generate_constraint('killer+whale', 'antelope', 'grizzly+bear'),\n    generate_constraint('killer+whale', 'antelope', 'dalmatian'),\n    generate_constraint('killer+whale', 'dalmatian', 'beaver'),\n    generate_constraint('killer+whale', 'grizzly+bear', 'beaver'),\n    generate_constraint('antelope', 'dalmatian', 'beaver'),\n    generate_constraint('antelope', 'grizzly+bear', 'beaver'),\n    generate_constraint('dalmatian', 'grizzly+bear', 'killer+whale'),\n    generate_constraint('dalmatian', 'grizzly+bear', 'antelope'),\n    generate_constraint('dalmatian', 'grizzly+bear', 'beaver'),\n}\n"



In [35]:

    
df = Inverse(c=1)
lm = GaussianLikelihoodModel(sigma=np.cov(X.T) / 8.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile()
ddt = InteractiveDirichletDiffusionTree(df, lm, constraints=interactor.interactions[:200])
sampler = MetropolisHastingsSampler(ddt, X)
sampler.initialize_assignments()
#assert sampler.ddt.verify_constraints(ALL_CONSTRAINTS)
mpld3.display(sampler.ddt.plot_mpld3(y))









    



INFO:root:Compiling <Inverse> object...
INFO:root:Done compiling <Inverse> object.
INFO:root:Compiling <GaussianLikelihoodModel> object...
INFO:root:Done compiling <GaussianLikelihoodModel> object.






    Out[35]:



In [36]:

    
logger.setLevel(logging.INFO)
scores = []
for i in tqdm(xrange(1000)):
    scores.append(sampler.ddt.score_constraints(interactor.interactions) / float(len(interactor.interactions)))
    #assert sampler.ddt.verify_constraints(interactor.interactions)
    sampler.sample();
plot(scores)









    









    Out[36]:





[<matplotlib.lines.Line2D at 0x7f4622465e90>]



In [28]:

    
len(interactor.interactions)









    Out[28]:





214



In [ ]:

    
for constraint in ALL_CONSTRAINTS:
    if not sampler.ddt.verify_constraint(constraint):
        print constraint



In [ ]:

    
list(sampler.ddt.point_index(i)[1][0] for i in (2, 0, 4))



In [7]:

    
mpld3.display(old_ddt.plot_mpld3(y))









    Out[7]:



In [ ]:

    
mpld3.display(sampler.ddt.plot_mpld3(y))



In [37]:

    
200 / 214.0









    Out[37]:





0.9345794392523364



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

    
y.index('beaver')



In [ ]:

    
y[0]



In [ ]:



In [ ]:



In [ ]: