In [1]:
import sys
import string
import time
import getopt
import random
import math
import pathfinders

In [2]:
start = {}
lm = {} # init an empty dictionary.
seGram={}

In [4]:
lmfile = open('model26.lm', 'r')
biGrams = False
triGrams = False
for line in lmfile:
    line = string.rstrip(line)
    if line[:5] == '\\end\\':
        break
    if line == "":
        biGrams = False
        triGrams = False
        continue
# Load 2-grams model into memory..
    if biGrams == True:
        fields = line.split()
        if fields[1] == "<s>":
            seStartKey = fields[1]
            seStartValue = (fields[2],string.atof(fields[0]))
            if seGram.has_key(seStartKey):seGram[seStartKey].append(seStartValue)
            else: seGram[seStartKey]=[ seStartValue ]
        else:  
            fields = line.split()
            sekey = fields[1]
            sevalue = (fields[2],string.atof(fields[0]))
            if seGram.has_key(sekey):seGram[sekey].append(sevalue)
            else: seGram[sekey]=[ sevalue ]
# Load 3-grams model into memory..
    if triGrams == True:
        fields = line.split()
        if fields[1] == "<s>":
            startkey = (fields[1], fields[2])
            startvalue = (fields[3], string.atof(fields[0]))
            if start.has_key(startkey): start[startkey].append(startvalue)
            else: start[startkey] = [ startvalue ]
        else:
            key = (fields[1], fields[2])
            value = (fields[3], string.atof(fields[0]))
            if lm.has_key(key): lm[key].append(value)
            else: lm[key] = [ value ]
    if line[:9] == '\\2-grams:':
        biGrams = True
    elif line[:9] == '\\3-grams:':
        triGrams = True
lmfile.close()

In [5]:
nusents =4
printprob = 1
a = 4
#start_opt_list = start.keys()
start_opt_list = start.keys()
keys_list = lm.keys()
article_list = seGram.keys()
nextWord = seGram.values()

for i in range(2):
    print article_list[i]
    print nextWord[i]

while nusents > 0:
#Initialize internal state from hashable object.
    random.seed()
    
    w1="a"
    w2 = "girl"
    wi="wii"
    next_word_list = lm[(w1, w2)]
    #word_list1=seGram[(w1,w2)]
    #print word_list1
    (w3, prob) = random.choice(next_word_list)
    #print prob
    gen_sent = [ w1, w2 ]
    
    gen_logprob = []
    while 1:
        gen_sent.append(w3)
        #print gen_sent
        gen_logprob.append(prob) # prob = p(w3 | w1, w2)
        #print gen_logprob
        if w3 == "</s>":
            gen_sent.remove(w3)
            break
        prob_out = random.random()
        accumulate = 0.0
        key = (w2, w3)
        w2 = w3
        
        if lm.has_key(key):
            print >> sys.stderr, "Keys: ", key
        #print "in loop___________"    
        for (lm_w, lm_logprob) in lm[key]:

            if lm_logprob > -89:
                lm_prob = math.exp(lm_logprob)
            else:
                lm_prob = 0.0
            
            w3 = lm_w
            prob = lm_logprob
            

            if (prob_out < (lm_prob + accumulate)):
                break
            else:
                accumulate += lm_prob
        #print "out loop----------"
        #print gen_sent
    ls = " ".join(gen_sent)
    print ls
    if (printprob):
        print " ".join([ "%f" % i for i in gen_logprob])
        print ' '
        
    nusents -=1
#    continue
#print "----",gen_logprob


fawn
[('</s>', -0.8005955), ('colored', -0.8005955)]
wrought-iron
[('bannister', -0.8005955), ('chair', -0.8005955)]
a girl looking at Big Ben
-2.083694 -0.375664 -4.135746 -0.238397 -0.945642
 
a girl exchange a fork-full of cake
-3.483783 -0.617495 -1.094617 -0.617495 -0.617495 -0.581282
 
a girl chasing a ball
-3.483783 -0.617495 -0.698970 -0.331415
 
a girl covering her face
-3.483783 -0.144286 -0.301030 -0.205612
 
Keys:  ('girl', 'looking')
Keys:  ('looking', 'at')
Keys:  ('at', 'Big')
Keys:  ('Big', 'Ben')
Keys:  ('girl', 'exchange')
Keys:  ('exchange', 'a')
Keys:  ('a', 'fork-full')
Keys:  ('fork-full', 'of')
Keys:  ('of', 'cake')
Keys:  ('girl', 'chasing')
Keys:  ('chasing', 'a')
Keys:  ('a', 'ball')
Keys:  ('girl', 'covering')
Keys:  ('covering', 'her')
Keys:  ('her', 'face')

In [ ]:


In [ ]: