In [1]:
import sys
import string
import time
import getopt
import random
import math
import pathfinders
In [2]:
start = {}
lm = {} # init an empty dictionary.
seGram={}
In [4]:
lmfile = open('model26.lm', 'r')
biGrams = False
triGrams = False
for line in lmfile:
line = string.rstrip(line)
if line[:5] == '\\end\\':
break
if line == "":
biGrams = False
triGrams = False
continue
# Load 2-grams model into memory..
if biGrams == True:
fields = line.split()
if fields[1] == "<s>":
seStartKey = fields[1]
seStartValue = (fields[2],string.atof(fields[0]))
if seGram.has_key(seStartKey):seGram[seStartKey].append(seStartValue)
else: seGram[seStartKey]=[ seStartValue ]
else:
fields = line.split()
sekey = fields[1]
sevalue = (fields[2],string.atof(fields[0]))
if seGram.has_key(sekey):seGram[sekey].append(sevalue)
else: seGram[sekey]=[ sevalue ]
# Load 3-grams model into memory..
if triGrams == True:
fields = line.split()
if fields[1] == "<s>":
startkey = (fields[1], fields[2])
startvalue = (fields[3], string.atof(fields[0]))
if start.has_key(startkey): start[startkey].append(startvalue)
else: start[startkey] = [ startvalue ]
else:
key = (fields[1], fields[2])
value = (fields[3], string.atof(fields[0]))
if lm.has_key(key): lm[key].append(value)
else: lm[key] = [ value ]
if line[:9] == '\\2-grams:':
biGrams = True
elif line[:9] == '\\3-grams:':
triGrams = True
lmfile.close()
In [5]:
nusents =4
printprob = 1
a = 4
#start_opt_list = start.keys()
start_opt_list = start.keys()
keys_list = lm.keys()
article_list = seGram.keys()
nextWord = seGram.values()
for i in range(2):
print article_list[i]
print nextWord[i]
while nusents > 0:
#Initialize internal state from hashable object.
random.seed()
w1="a"
w2 = "girl"
wi="wii"
next_word_list = lm[(w1, w2)]
#word_list1=seGram[(w1,w2)]
#print word_list1
(w3, prob) = random.choice(next_word_list)
#print prob
gen_sent = [ w1, w2 ]
gen_logprob = []
while 1:
gen_sent.append(w3)
#print gen_sent
gen_logprob.append(prob) # prob = p(w3 | w1, w2)
#print gen_logprob
if w3 == "</s>":
gen_sent.remove(w3)
break
prob_out = random.random()
accumulate = 0.0
key = (w2, w3)
w2 = w3
if lm.has_key(key):
print >> sys.stderr, "Keys: ", key
#print "in loop___________"
for (lm_w, lm_logprob) in lm[key]:
if lm_logprob > -89:
lm_prob = math.exp(lm_logprob)
else:
lm_prob = 0.0
w3 = lm_w
prob = lm_logprob
if (prob_out < (lm_prob + accumulate)):
break
else:
accumulate += lm_prob
#print "out loop----------"
#print gen_sent
ls = " ".join(gen_sent)
print ls
if (printprob):
print " ".join([ "%f" % i for i in gen_logprob])
print ' '
nusents -=1
# continue
#print "----",gen_logprob
In [ ]:
In [ ]: