In [3]:
from default import *
In [15]:
Pw = Pdist(data=datafile("data/count_1w.txt"))
segmenter = Segment(Pw) # note that the default solution for this homework ignores the unigram counts
output_full = []
with open("data/input/dev.txt") as f:
for line in f:
output = " ".join(segmenter.segment(line.strip()))
output_full.append(output)
print("\n".join(output_full[:3])) # print out the first three lines of output as a sanity check
In [6]:
from zhsegment_check import fscore
with open('data/reference/dev.out', 'r') as refh:
ref_data = [str(x).strip() for x in refh.read().splitlines()]
tally = fscore(ref_data, output_full)
print("score: {:.2f}".format(tally), file=sys.stderr)