In [16]:
import nltk
In [27]:
tree = nltk.Tree.parse('(NP (Adj old) (NP (N men) (Conj and) (N women)))')
In [28]:
tree
Out[28]:
In [9]:
tree2=nltk.Tree.parse('(S(NP(DT The)(NN boy))(VP(VP(VBD saw)(NP(DT a)(NN girl)))(PP(IN with)(NP(DT a)(NN telescope)))))')
In [10]:
tree2
Out[10]:
In [60]:
tree2.draw()
In [18]:
tr=open("train.txt",'r')
In [19]:
trains=tr.readlines()
In [20]:
trains
Out[20]:
In [21]:
trainTrees=map(nltk.Tree.parse,trains)
In [24]:
trainTrees[1]
Out[24]:
In [42]:
tree.chomsky_normal_form()
In [43]:
tree2
Out[43]:
In [92]:
tree2[0][0][0]
Out[92]:
In [76]:
children=[]
for child in tree2:
children.append(child.node)
print len(child)
print children
tuple(children)
Out[76]:
In [96]:
countDict={}
def traverse(t):
try:
node=t.node
except AttributeError:
print t
else:
# Now we know that t.node is defined
if node not in countDict:
countDict[node]={}
children=[]
for child in t:
if len(t.leaves())==1:
children.append(child)
else:
children.append(child.node)
#print children
key=tuple(children)
if key not in countDict[node]:
countDict[node][key]=1
else:
countDict[node][key]+=1
#print len(t.leaves())
#print t.leaves()
#if len(t.leaves())>1:
#print '(', t.node,
for child in t:
traverse(child)
#print ')'
traverse(tree2)
In [97]:
countDict
Out[97]:
In [2]:
a=tuple(['PP','A','B'])
In [4]:
print a
In [6]:
from collections import defaultdict
In [7]:
pi = defaultdict(float)
In [8]:
pi[1]
Out[8]:
In [10]:
for i in xrange(1,3):
print i
In [11]:
tuple('boy')
Out[11]:
In [12]:
p=['S', ['NP', ['NP', ['DT', 'a'], ['NN', 'boy']], ['PP', ['IN', 'with'], ['NP', ['DT', 'a'], ['NN', 'telescope']]]], ['VP', ['VBD', 'saw'], ['NP', ['DT', 'a'], ['NN', 'girl']]]]
In [13]:
p
Out[13]:
In [18]:
nltk.Tree.parse(p)
In [21]:
textTree=str(p)
In [32]:
textTree
Out[32]:
In [23]:
textTree=textTree.replace('[','(')
In [25]:
textTree=textTree.replace(']',')')
In [28]:
textTree=textTree.replace('\'','')
In [31]:
textTree=textTree.replace(',','')
In [33]:
foo = ['a', 'b', 'c', 'd', 'e']
In [34]:
import random
In [36]:
random.sample(foo,3)
Out[36]:
In [ ]: