In [16]:
import nltk

In [27]:
tree = nltk.Tree.parse('(NP (Adj old) (NP (N men) (Conj and) (N women)))')

In [28]:
tree


Out[28]:
Tree('NP', [Tree('Adj', ['old']), Tree('NP', [Tree('N', ['men']), Tree('Conj', ['and']), Tree('N', ['women'])])])

In [9]:
tree2=nltk.Tree.parse('(S(NP(DT The)(NN boy))(VP(VP(VBD saw)(NP(DT a)(NN girl)))(PP(IN with)(NP(DT a)(NN telescope)))))')

In [10]:
tree2


Out[10]:
Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('NN', ['boy'])]), Tree('VP', [Tree('VP', [Tree('VBD', ['saw']), Tree('NP', [Tree('DT', ['a']), Tree('NN', ['girl'])])]), Tree('PP', [Tree('IN', ['with']), Tree('NP', [Tree('DT', ['a']), Tree('NN', ['telescope'])])])])])

In [60]:
tree2.draw()


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-60-b29728a74b03> in <module>()
----> 1 tree2.draw()

/usr/lib/python2.7/site-packages/nltk/tree.pyc in draw(self)
    648         """
    649         from nltk.draw.tree import draw_trees
--> 650         draw_trees(self)
    651 
    652     def __repr__(self):

/usr/lib/python2.7/site-packages/nltk/draw/tree.pyc in draw_trees(*trees)
    862     :rtype: None
    863     """
--> 864     TreeView(*trees).mainloop()
    865     return
    866 

/usr/lib/python2.7/site-packages/nltk/draw/tree.pyc in mainloop(self, *args, **kwargs)
    853         """
    854         if in_idle(): return
--> 855         self._top.mainloop(*args, **kwargs)
    856 
    857 def draw_trees(*trees):

/usr/lib/python2.7/lib-tk/Tkinter.pyc in mainloop(self, n)
   1066     def mainloop(self, n=0):
   1067         """Call the mainloop of Tk."""
-> 1068         self.tk.mainloop(n)
   1069     def quit(self):
   1070         """Quit the Tcl interpreter. All widgets will be destroyed."""

KeyboardInterrupt: 

In [18]:
tr=open("train.txt",'r')

In [19]:
trains=tr.readlines()

In [20]:
trains


Out[20]:
['(S(NP(DT The)(NN boy))(VP(VP(VBD saw)(NP(DT a)(NN girl)))(PP(IN with)(NP(DT a)(NN telescope)))))\n',
 '(S(NP(DT The)(NN girl))(VP(VBD saw)(NP(NP(DT a)(NN boy))(PP(IN with)(NP(DT a)(NN telescope))))))']

In [21]:
trainTrees=map(nltk.Tree.parse,trains)

In [24]:
trainTrees[1]


Out[24]:
Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('NN', ['girl'])]), Tree('VP', [Tree('VBD', ['saw']), Tree('NP', [Tree('NP', [Tree('DT', ['a']), Tree('NN', ['boy'])]), Tree('PP', [Tree('IN', ['with']), Tree('NP', [Tree('DT', ['a']), Tree('NN', ['telescope'])])])])])])

In [42]:
tree.chomsky_normal_form()

In [43]:
tree2


Out[43]:
Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('NN', ['boy'])]), Tree('VP', [Tree('VP', [Tree('VBD', ['saw']), Tree('NP', [Tree('DT', ['a']), Tree('NN', ['girl'])])]), Tree('PP', [Tree('IN', ['with']), Tree('NP', [Tree('DT', ['a']), Tree('NN', ['telescope'])])])])])

In [92]:
tree2[0][0][0]


Out[92]:
'The'

In [76]:
children=[]
for child in tree2:
    children.append(child.node)
    print len(child)
print children
tuple(children)


2
2
['NP', 'VP']
Out[76]:
('NP', 'VP')

In [96]:
countDict={}
def traverse(t):
    try:
        node=t.node
        
            
    except AttributeError:
        print t

    else:
        # Now we know that t.node is defined
        
        if node not in countDict:
            countDict[node]={}
        
        children=[]
        for child in t:
            if len(t.leaves())==1:
                children.append(child)
            else:
                children.append(child.node)
        #print children
        key=tuple(children)
        if key not in countDict[node]:
            countDict[node][key]=1
        else:
            countDict[node][key]+=1
        #print len(t.leaves())
        #print t.leaves()
        #if len(t.leaves())>1:
        #print '(', t.node,
        for child in t:
            traverse(child)
        #print ')'
        
traverse(tree2)


The
boy
saw
a
girl
with
a
telescope

In [97]:
countDict


Out[97]:
{'DT': {('The',): 1, ('a',): 2},
 'IN': {('with',): 1},
 'NN': {('boy',): 1, ('girl',): 1, ('telescope',): 1},
 'NP': {('DT', 'NN'): 3},
 'PP': {('IN', 'NP'): 1},
 'S': {('NP', 'VP'): 1},
 'VBD': {('saw',): 1},
 'VP': {('VBD', 'NP'): 1, ('VP', 'PP'): 1}}

In [2]:
a=tuple(['PP','A','B'])

In [4]:
print a


('PP', 'A', 'B')

In [6]:
from collections import defaultdict

In [7]:
pi = defaultdict(float)

In [8]:
pi[1]


Out[8]:
0.0

In [10]:
for i in xrange(1,3):
    print i


1
2

In [11]:
tuple('boy')


Out[11]:
('b', 'o', 'y')

In [12]:
p=['S', ['NP', ['NP', ['DT', 'a'], ['NN', 'boy']], ['PP', ['IN', 'with'], ['NP', ['DT', 'a'], ['NN', 'telescope']]]], ['VP', ['VBD', 'saw'], ['NP', ['DT', 'a'], ['NN', 'girl']]]]

In [13]:
p


Out[13]:
['S',
 ['NP',
  ['NP', ['DT', 'a'], ['NN', 'boy']],
  ['PP', ['IN', 'with'], ['NP', ['DT', 'a'], ['NN', 'telescope']]]],
 ['VP', ['VBD', 'saw'], ['NP', ['DT', 'a'], ['NN', 'girl']]]]

In [18]:
nltk.Tree.parse(p)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-18-ff084aa36a03> in <module>()
----> 1 nltk.Tree.parse(p)

/usr/lib/python2.7/site-packages/nltk/tree.pyc in parse(cls, s, brackets, parse_node, parse_leaf, node_pattern, leaf_pattern, remove_empty_top_bracketing)
    571         # Walk through each token, updating a stack of trees.
    572         stack = [(None, [])] # list of (node, children) tuples
--> 573         for match in token_re.finditer(s):
    574             token = match.group()
    575             # Beginning of a tree/subtree

TypeError: expected string or buffer

In [21]:
textTree=str(p)

In [32]:
textTree


Out[32]:
'(S (NP (NP (DT a) (NN boy)) (PP (IN with) (NP (DT a) (NN telescope)))) (VP (VBD saw) (NP (DT a) (NN girl))))'

In [23]:
textTree=textTree.replace('[','(')

In [25]:
textTree=textTree.replace(']',')')

In [28]:
textTree=textTree.replace('\'','')

In [31]:
textTree=textTree.replace(',','')

In [33]:
foo = ['a', 'b', 'c', 'd', 'e']

In [34]:
import random

In [36]:
random.sample(foo,3)


Out[36]:
['e', 'b', 'c']

In [ ]: