In [1]:
# This is a test
from __future__ import division
corpus = 'data/delorme.com_shu.pages_89.txt'
import math
import os
import sys
from config import CONFIG
from tests import tests1, tests2
from normalizer import Normalizer
from lib.CharacterIndex import CharacterIndex
from lib.NaiveTokenizer import NaiveTokenizer
from lib.TextStreamer import TextStreamer
from lib.Tools import (
FreqDist,
tokenizer,
splitter
)
from collections import (
Counter,
defaultdict as deft
)
def freq_ratio(maxim, minim, log_base=2.0):
if maxim < 10:
return False
multiplier = math.log(maxim, log_base)
# print maxim, minim, maxim / minim, multiplier, (multiplier * multiplier)
if maxim / minim >= (multiplier * multiplier):
return True
else:
return False
def get_name(template):
i = 1
while True:
name = template % i
if not os.path.exists(name):
return name
i += 1
for C in CONFIG:
tests = tests1.items() + tests2.items()
# Collect input from large text file:
streamer = TextStreamer(corpus)
dump = []
for doc in streamer:
for sent in splitter(doc):
dump += tokenizer(sent)
freq_dist = Counter(dump)
# Map all character n-grams to words, and all words to their
# character n-grams
index = CharacterIndex(dump + tests, min_r=C['sim_thres'])
index.build()
hits = 0
empty = 0
template = 'logs/test.%d.txt'
file = open(get_name(template), 'wb')
file.write('\n'.join(['%s=%s' % (str(x), str(y)) for x, y in C.items()]) + '\n')
for correct, error in tests:
similars = index[error]
if not similars:
empty += 1
f_w = freq_dist[error]
if not similars:
continue
similars.sort(
key=lambda x: freq_dist[x[0]],
reverse=True
)
if similars[0][0] == correct:
hits += 1
# print error, '>', correct, hits, hits / float(len(tests) - empty)
else:
file.write('error="%s" hypothesis="%s" human="%s"\n' % (error, similars[0][0], correct))
file.write('hits = "%s" precision = "%s" recall = "%s" \n' % (hits, hits / float(len(tests) - empty), hits / (hits + empty)))
file.close()
print C, ('hits = "%s" precision = "%s" recall = "%s" \n' % (hits, hits / float(len(tests) - empty), hits / (hits + empty)))
[('locally', 0.9230769230769231)]
[('locally', 0.9230769230769231)]
[('useful', 0.9230769230769231)]
[('useful', 0.9230769230769231)]
[('triangular', 0.9523809523809523)]
[('triangular', 0.9523809523809523)]
[('sources', 0.9230769230769231), ('sorce', 0.9090909090909091)]
[('sources', 0.9230769230769231), ('sorce', 0.9090909090909091)]
[('committee', 0.9411764705882353)]
[('committee', 0.9411764705882353)]
[('transportability', 0.9375)]
[('transportability', 0.9375)]
[('diagrammatically', 0.9696969696969697)]
[('diagrammatically', 0.9696969696969697)]
[('management', 0.9473684210526315)]
[('management', 0.9473684210526315)]
[('singular', 0.9411764705882353)]
[('singular', 0.9411764705882353)]
[('initial', 0.9230769230769231)]
[('initial', 0.9230769230769231)]
[('pronunciation', 0.9629629629629629)]
[('pronunciation', 0.9629629629629629)]
[('totally', 0.9230769230769231)]
[('totally', 0.9230769230769231)]
[('centrally', 0.9411764705882353)]
[('centrally', 0.9411764705882353)]
[('someone', 0.9230769230769231)]
[('someone', 0.9230769230769231)]
[('families', 0.9333333333333333), ('failes', 0.9230769230769231)]
[('families', 0.9333333333333333), ('failes', 0.9230769230769231)]
[('february', 0.9333333333333333)]
[('february', 0.9333333333333333)]
[('choises', 0.9230769230769231), ('chose', 0.9090909090909091)]
[('chose', 0.9090909090909091), ('choises', 0.9230769230769231)]
[('basically', 0.9411764705882353)]
[('basically', 0.9411764705882353)]
[('descided', 0.9333333333333333), ('descides', 0.9333333333333333), ('decide', 0.9230769230769231)]
[('decide', 0.9230769230769231), ('descided', 0.9333333333333333), ('descides', 0.9333333333333333)]
[('paerticulaur', 0.9565217391304348), ('particular', 0.9523809523809523), ('particulary', 0.9090909090909091)]
[('particular', 0.9523809523809523), ('particulary', 0.9090909090909091), ('paerticulaur', 0.9565217391304348)]
[('considerable', 0.9166666666666666)]
[('considerable', 0.9166666666666666)]
[('account', 0.9230769230769231), ('acounts', 0.9230769230769231)]
[('account', 0.9230769230769231), ('acounts', 0.9230769230769231)]
[('unfortunately', 0.96)]
[('unfortunately', 0.96)]
[('variable', 0.9333333333333333)]
[('variable', 0.9333333333333333)]
[('whether', 0.9230769230769231)]
[('whether', 0.9230769230769231)]
[('levals', 0.9090909090909091)]
[('levals', 0.9090909090909091)]
[('transferred', 0.9)]
[('transferred', 0.9)]
[('benifits', 0.9333333333333333)]
[('benifits', 0.9333333333333333)]
[('addressable', 0.9523809523809523)]
[('addressable', 0.9523809523809523)]
[('hierarchal', 0.9)]
[('hierarchal', 0.9)]
[('descide', 0.9333333333333333), ('decided', 0.9333333333333333)]
[('decided', 0.9333333333333333), ('descide', 0.9333333333333333)]
[('choosing', 0.9333333333333333)]
[('choosing', 0.9333333333333333)]
[('further', 0.9230769230769231)]
[('further', 0.9230769230769231)]
[('questionnaire', 0.96), ('questionare', 0.9565217391304348)]
[('questionnaire', 0.96), ('questionare', 0.9565217391304348)]
[('different', 0.9411764705882353)]
[('different', 0.9411764705882353)]
[('clerical', 0.9411764705882353)]
[('clerical', 0.9411764705882353)]
[('monitoring', 0.9)]
[('monitoring', 0.9)]
[('position', 0.9411764705882353)]
[('position', 0.9411764705882353)]
[('perhaps', 0.9333333333333333)]
[('perhaps', 0.9333333333333333)]
[('personnel', 0.9473684210526315)]
[('personnel', 0.9473684210526315)]
[('seperated', 0.9411764705882353)]
[('seperated', 0.9411764705882353)]
[('arrangment', 0.9473684210526315), ('arrangement', 0.9)]
[('arrangment', 0.9473684210526315), ('arrangement', 0.9)]
[('access', 0.9090909090909091)]
[('access', 0.9090909090909091)]
[('various', 0.9333333333333333)]
[('various', 0.9333333333333333)]
[('between', 0.9333333333333333), ('beeteen', 0.9333333333333333)]
[('between', 0.9333333333333333), ('beeteen', 0.9333333333333333)]
[('availble', 0.9333333333333333)]
[('availble', 0.9333333333333333)]
[('accessing', 0.9411764705882353)]
[('accessing', 0.9411764705882353)]
[('description', 0.9090909090909091)]
[('description', 0.9090909090909091)]
[('variant', 0.9333333333333333)]
[('variant', 0.9333333333333333)]
[('southern', 0.9333333333333333)]
[('southern', 0.9333333333333333)]
[('familes', 0.9230769230769231), ('fails', 0.9090909090909091)]
[('fails', 0.9090909090909091), ('familes', 0.9230769230769231)]
[('visitors', 0.9333333333333333), ('vistor', 0.9230769230769231)]
[('visitors', 0.9333333333333333), ('vistor', 0.9230769230769231)]
[('completely', 0.9473684210526315)]
[('completely', 0.9473684210526315)]
[('leval', 0.9090909090909091)]
[('leval', 0.9090909090909091)]
[('beginning', 0.9411764705882353)]
[('beginning', 0.9411764705882353)]
[('volantary', 0.9411764705882353)]
[('volantary', 0.9411764705882353)]
[('defenitions', 0.9523809523809523), ('definition', 0.9)]
[('definition', 0.9), ('defenitions', 0.9523809523809523)]
[('voting', 0.9230769230769231)]
[('voting', 0.9230769230769231)]
[('benifit', 0.9333333333333333)]
[('benifit', 0.9333333333333333)]
[('planned', 0.9230769230769231)]
[('planned', 0.9230769230769231)]
[('defenition', 0.9523809523809523), ('definitions', 0.9090909090909091)]
[('definitions', 0.9090909090909091), ('defenition', 0.9523809523809523)]
[('forbidden', 0.9411764705882353)]
[('forbidden', 0.9411764705882353)]
[('comments', 0.9333333333333333)]
[('comments', 0.9333333333333333)]
[('descisions descision', 0.926829268292683)]
[('descisions descision', 0.926829268292683)]
[('supposedly', 0.9)]
[('supposedly', 0.9)]
[('embellishing', 0.9565217391304348)]
[('embellishing', 0.9565217391304348)]
[('technique', 0.9411764705882353), ('tecniques', 0.9411764705882353)]
[('technique', 0.9411764705882353), ('tecniques', 0.9411764705882353)]
[('perminant', 0.9)]
[('perminant', 0.9)]
[('confirmation', 0.9166666666666666)]
[('confirmation', 0.9166666666666666)]
[('appointment', 0.9523809523809523), ('appointments', 0.9090909090909091)]
[('appointment', 0.9523809523809523), ('appointments', 0.9090909090909091)]
[('progression', 0.9523809523809523)]
[('progression', 0.9523809523809523)]
[('accompanying', 0.9090909090909091)]
[('accompanying', 0.9090909090909091)]
[('applicable', 0.9473684210526315)]
[('applicable', 0.9473684210526315)]
[('regained', 0.9333333333333333)]
[('regained', 0.9333333333333333)]
[('guidelines', 0.9473684210526315)]
[('guidelines', 0.9473684210526315)]
[('titles', 0.9230769230769231)]
[('titles', 0.9230769230769231)]
[('unavailable', 0.9523809523809523)]
[('unavailable', 0.9523809523809523)]
[('advantageous', 0.9565217391304348)]
[('advantageous', 0.9565217391304348)]
[('appeal', 0.9090909090909091)]
[('appeal', 0.9090909090909091)]
[('consisting', 0.9523809523809523)]
[('consisting', 0.9523809523809523)]
[('separation', 0.9)]
[('separation', 0.9)]
[('search', 0.9090909090909091)]
[('search', 0.9090909090909091)]
[('resulting', 0.9411764705882353)]
[('resulting', 0.9411764705882353)]
[('suggestion', 0.9473684210526315)]
[('suggestion', 0.9473684210526315)]
[('opinion', 0.9333333333333333)]
[('opinion', 0.9333333333333333)]
[('cancellation', 0.9565217391304348)]
[('cancellation', 0.9565217391304348)]
[('composed', 0.9411764705882353)]
[('composed', 0.9411764705882353)]
[('useful', 0.9090909090909091)]
[('useful', 0.9090909090909091)]
[('humour', 0.9090909090909091)]
[('humour', 0.9090909090909091)]
[('would', 0.9090909090909091)]
[('would', 0.9090909090909091)]
[('examination', 0.9090909090909091)]
[('examination', 0.9090909090909091)]
[('therefore', 0.9411764705882353)]
[('therefore', 0.9411764705882353)]
[('recommend', 0.9411764705882353)]
[('recommend', 0.9411764705882353)]
[('seperate', 0.9411764705882353)]
[('seperate', 0.9411764705882353)]
[('particulaur', 0.9565217391304348), ('particular', 0.9090909090909091)]
[('particular', 0.9090909090909091), ('particulaur', 0.9565217391304348)]
[('pivoting', 0.9333333333333333)]
[('pivoting', 0.9333333333333333)]
[('announcing', 0.9473684210526315)]
[('announcing', 0.9473684210526315)]
[('arrangements', 0.9166666666666666)]
[('arrangements', 0.9166666666666666)]
[('proportions', 0.9523809523809523)]
[('proportions', 0.9523809523809523)]
[('accept', 0.9090909090909091)]
[('accept', 0.9090909090909091)]
[('dependence', 0.9)]
[('dependence', 0.9)]
[('unequalled', 0.9473684210526315)]
[('unequalled', 0.9473684210526315)]
[('numbers', 0.9333333333333333)]
[('numbers', 0.9333333333333333)]
[('conversely', 0.9473684210526315)]
[('conversely', 0.9473684210526315)]
[('provide', 0.9230769230769231)]
[('provide', 0.9230769230769231)]
[('arrangement', 0.9523809523809523), ('arragment', 0.9473684210526315), ('arrangements', 0.9090909090909091)]
[('arrangements', 0.9090909090909091), ('arrangement', 0.9523809523809523), ('arragment', 0.9473684210526315)]
[('responsibilities', 0.967741935483871)]
[('responsibilities', 0.967741935483871)]
[('fourth', 0.9090909090909091)]
[('fourth', 0.9090909090909091)]
[('inconceivable', 0.9230769230769231)]
[('inconceivable', 0.9230769230769231)]
[('register', 0.9333333333333333)]
[('register', 0.9333333333333333)]
[('supervision', 0.9523809523809523)]
[('supervision', 0.9523809523809523)]
[('encompassing', 0.9565217391304348)]
[('encompassing', 0.9565217391304348)]
[('negligible', 0.9)]
[('negligible', 0.9)]
[('operations', 0.9473684210526315)]
[('operations', 0.9473684210526315)]
[('executed', 0.9411764705882353)]
[('executed', 0.9411764705882353)]
[('interpretation', 0.9285714285714286)]
[('interpretation', 0.9285714285714286)]
[('years', 0.9090909090909091)]
[('years', 0.9090909090909091)]
[('committee', 0.9411764705882353)]
[('committee', 0.9411764705882353)]
[('before', 0.9090909090909091)]
[('before', 0.9090909090909091)]
[('interesting', 0.9523809523809523)]
[('interesting', 0.9523809523809523)]
[('perminantly', 0.9)]
[('perminantly', 0.9)]
[('choose', 0.9090909090909091), ('choise', 0.9090909090909091)]
[('choose', 0.9090909090909091), ('choise', 0.9090909090909091)]
[('correspondence', 0.9285714285714286)]
[('correspondence', 0.9285714285714286)]
[('eventually', 0.9473684210526315)]
[('eventually', 0.9473684210526315)]
[('desperately', 0.9)]
[('desperately', 0.9)]
[('university', 0.9473684210526315)]
[('university', 0.9473684210526315)]
[('adjournment', 0.9523809523809523)]
[('adjournment', 0.9523809523809523)]
[('stopped', 0.9230769230769231)]
[('stopped', 0.9230769230769231)]
[('adequately', 0.9)]
[('adequately', 0.9)]
[('proffits', 0.9333333333333333), ('profit', 0.9230769230769231)]
[('profit', 0.9230769230769231), ('proffits', 0.9333333333333333)]
[('encourage', 0.9411764705882353)]
[('encourage', 0.9411764705882353)]
[('collate', 0.9230769230769231)]
[('collate', 0.9230769230769231)]
[('proviso', 0.9333333333333333)]
[('proviso', 0.9333333333333333)]
[('approached', 0.9473684210526315)]
[('approached', 0.9473684210526315)]
[('difficulty', 0.9473684210526315), ('dificult', 0.9411764705882353)]
[('difficulty', 0.9473684210526315), ('dificult', 0.9411764705882353)]
[('appointments', 0.9565217391304348), ('appointment', 0.9090909090909091)]
[('appointment', 0.9090909090909091), ('appointments', 0.9565217391304348)]
[('conditioning', 0.9565217391304348)]
[('conditioning', 0.9565217391304348)]
[('beginning', 0.9411764705882353)]
[('beginning', 0.9411764705882353)]
[('universally', 0.9523809523809523)]
[('universally', 0.9523809523809523)]
[('unresolved', 0.9)]
[('unresolved', 0.9)]
[('length', 0.9090909090909091)]
[('length', 0.9090909090909091)]
[('system', 0.9090909090909091)]
[('system', 0.9090909090909091)]
[('approximately', 0.9166666666666666)]
[('approximately', 0.9166666666666666)]
[('repetitive', 0.9)]
[('repetitive', 0.9)]
[('exactly', 0.9333333333333333)]
[('exactly', 0.9333333333333333)]
[('immediate', 0.9411764705882353)]
[('immediate', 0.9411764705882353)]
[('appreciation', 0.9565217391304348)]
[('appreciation', 0.9565217391304348)]
[('eliminated', 0.9473684210526315)]
[('eliminated', 0.9473684210526315)]
[('believe', 0.9230769230769231)]
[('believe', 0.9230769230769231)]
[('appreciated', 0.9523809523809523)]
[('appreciated', 0.9523809523809523)]
[('readjusted', 0.9473684210526315)]
[('readjusted', 0.9473684210526315)]
[('false', 0.9090909090909091)]
[('false', 0.9090909090909091)]
[('interrogating', 0.9230769230769231)]
[('interrogating', 0.9230769230769231)]
[('academically', 0.9090909090909091)]
[('academically', 0.9090909090909091)]
[('traditionally', 0.96)]
[('traditionally', 0.96)]
[('studying', 0.9333333333333333)]
[('studying', 0.9333333333333333)]
[('majority', 0.9333333333333333)]
[('majority', 0.9333333333333333)]
[('aggravating', 0.9523809523809523)]
[('aggravating', 0.9523809523809523)]
[('transactions', 0.9565217391304348)]
[('transactions', 0.9565217391304348)]
[('arguing', 0.9333333333333333)]
[('arguing', 0.9333333333333333)]
[('later', 0.9090909090909091)]
[('later', 0.9090909090909091)]
[('senior', 0.9230769230769231)]
[('senior', 0.9230769230769231)]
[('dragged', 0.9230769230769231)]
[('dragged', 0.9230769230769231)]
[('atmosphere', 0.9473684210526315)]
[('atmosphere', 0.9473684210526315)]
[('drastically', 0.9523809523809523)]
[('drastically', 0.9523809523809523)]
[('particularly', 0.9565217391304348), ('particular', 0.9523809523809523), ('particulaur', 0.9090909090909091)]
[('particular', 0.9523809523809523), ('particularly', 0.9565217391304348), ('particulaur', 0.9090909090909091)]
[('visitor', 0.9230769230769231), ('vistors', 0.9230769230769231)]
[('visitor', 0.9230769230769231), ('vistors', 0.9230769230769231)]
[('session', 0.9230769230769231)]
[('session', 0.9230769230769231)]
[('continually', 0.9)]
[('continually', 0.9)]
[('beetween', 0.9333333333333333)]
[('beetween', 0.9333333333333333)]
[('overall', 0.9230769230769231)]
[('overall', 0.9230769230769231)]
[('timing', 0.9230769230769231)]
[('timing', 0.9230769230769231)]
[('econometric', 0.9523809523809523)]
[('econometric', 0.9523809523809523)]
[('descide', 0.9333333333333333), ('decides', 0.9333333333333333)]
[('descide', 0.9333333333333333), ('decides', 0.9333333333333333)]
[('intelligence', 0.9565217391304348)]
[('intelligence', 0.9565217391304348)]
[('apologies', 0.9473684210526315)]
[('apologies', 0.9473684210526315)]
[('techniques', 0.9473684210526315), ('tecnique', 0.9411764705882353)]
[('techniques', 0.9473684210526315), ('tecnique', 0.9411764705882353)]
[('volantry', 0.9411764705882353)]
[('volantry', 0.9411764705882353)]
[('forecast', 0.9333333333333333)]
[('forecast', 0.9333333333333333)]
[('weapons', 0.9230769230769231)]
[('weapons', 0.9230769230769231)]
[('approach', 0.9333333333333333)]
[('approach', 0.9333333333333333)]
[('available', 0.9411764705882353), ('avaible', 0.9333333333333333)]
[('available', 0.9411764705882353), ('avaible', 0.9333333333333333)]
[('recently', 0.9411764705882353)]
[('recently', 0.9411764705882353)]
[('ability', 0.9230769230769231)]
[('ability', 0.9230769230769231)]
[('agencies', 0.9333333333333333)]
[('agencies', 0.9333333333333333)]
[('however', 0.9333333333333333)]
[('however', 0.9333333333333333)]
[('suggested', 0.9411764705882353)]
[('suggested', 0.9411764705882353)]
[('annual', 0.9090909090909091)]
[('annual', 0.9090909090909091)]
[('according', 0.9411764705882353)]
[('according', 0.9411764705882353)]
[('table', 0.9090909090909091)]
[('table', 0.9090909090909091)]
[('conference', 0.9)]
[('conference', 0.9)]
[('union', 0.9090909090909091)]
[('union', 0.9090909090909091)]
[('interest', 0.9333333333333333)]
[('interest', 0.9333333333333333)]
[('refered', 0.9333333333333333)]
[('refered', 0.9333333333333333)]
[('sufficient', 0.9473684210526315)]
[('sufficient', 0.9473684210526315)]
[('representative', 0.9285714285714286)]
[('representative', 0.9285714285714286)]
[('apologised', 0.9523809523809523)]
[('apologised', 0.9523809523809523)]
[('choise', 0.9230769230769231)]
[('choise', 0.9230769230769231)]
[('shortened', 0.9411764705882353)]
[('shortened', 0.9411764705882353)]
[('manually', 0.9333333333333333)]
[('manually', 0.9333333333333333)]
[('excessively', 0.9523809523809523)]
[('excessively', 0.9523809523809523)]
[('develop', 0.9333333333333333)]
[('develop', 0.9333333333333333)]
[('credit', 0.9230769230769231)]
[('credit', 0.9230769230769231)]
[('government', 0.9473684210526315)]
[('government', 0.9473684210526315)]
[('orientated', 0.9473684210526315)]
[('orientated', 0.9473684210526315)]
[('widely', 0.9090909090909091)]
[('widely', 0.9090909090909091)]
[('difficult', 0.9411764705882353), ('dificulty', 0.9411764705882353)]
[('difficult', 0.9411764705882353), ('dificulty', 0.9411764705882353)]
[('investigated', 0.9166666666666666)]
[('investigated', 0.9166666666666666)]
[('nationally', 0.9473684210526315)]
[('nationally', 0.9473684210526315)]
[('moving', 0.9230769230769231)]
[('moving', 0.9230769230769231)]
[('equalled', 0.9333333333333333)]
[('equalled', 0.9333333333333333)]
[('financially', 0.9523809523809523)]
[('financially', 0.9523809523809523)]
[('functionally', 0.9565217391304348)]
[('functionally', 0.9565217391304348)]
[('announcement', 0.9565217391304348)]
[('announcement', 0.9565217391304348)]
[('progresses', 0.9523809523809523)]
[('progresses', 0.9523809523809523)]
[('except', 0.9230769230769231)]
[('except', 0.9230769230769231)]
[('recommending', 0.9565217391304348)]
[('recommending', 0.9565217391304348)]
[('mathematically', 0.9629629629629629)]
[('mathematically', 0.9629629629629629)]
[('sorces', 0.9090909090909091), ('source', 0.9090909090909091)]
[('source', 0.9090909090909091), ('sorces', 0.9090909090909091)]
[('combine', 0.9333333333333333)]
[('combine', 0.9333333333333333)]
[('resolved', 0.9333333333333333)]
[('resolved', 0.9333333333333333)]
[('demands', 0.9333333333333333)]
[('demands', 0.9333333333333333)]
[('unequivocally', 0.96)]
[('unequivocally', 0.96)]
[('accepted', 0.9333333333333333)]
[('accepted', 0.9333333333333333)]
[('projects', 0.9411764705882353)]
[('projects', 0.9411764705882353)]
[('journalism', 0.9473684210526315)]
[('journalism', 0.9473684210526315)]
[('output', 0.9230769230769231)]
[('output', 0.9230769230769231)]
[('security', 0.9333333333333333)]
[('security', 0.9333333333333333)]
[('essential', 0.9411764705882353)]
[('essential', 0.9411764705882353)]
[('requested', 0.9411764705882353)]
[('requested', 0.9411764705882353)]
[('supplementary', 0.96)]
[('supplementary', 0.96)]
[('questionaire', 0.9565217391304348), ('questionnaire', 0.9166666666666666)]
[('questionnaire', 0.9166666666666666), ('questionaire', 0.9565217391304348)]
[('employment', 0.9)]
[('employment', 0.9)]
[('proceeding', 0.9473684210526315)]
[('proceeding', 0.9473684210526315)]
[('deciscions descisions', 0.926829268292683)]
[('deciscions descisions', 0.926829268292683)]
[('discretion', 0.9)]
[('discretion', 0.9)]
[('reaching', 0.9333333333333333)]
[('reaching', 0.9333333333333333)]
[('expansion', 0.9411764705882353)]
[('expansion', 0.9411764705882353)]
[('although', 0.9333333333333333)]
[('although', 0.9333333333333333)]
[('analysing', 0.9411764705882353)]
[('analysing', 0.9411764705882353)]
[('comparison', 0.9523809523809523)]
[('comparison', 0.9523809523809523)]
[('months', 0.9230769230769231)]
[('months', 0.9230769230769231)]
[('hierarchal', 0.9)]
[('hierarchal', 0.9)]
[('misleading', 0.9523809523809523)]
[('misleading', 0.9523809523809523)]
[('commit', 0.9090909090909091)]
[('commit', 0.9090909090909091)]
[('within', 0.9230769230769231)]
[('within', 0.9230769230769231)]
[('accounts', 0.9333333333333333), ('acount', 0.9230769230769231)]
[('accounts', 0.9333333333333333), ('acount', 0.9230769230769231)]
[('primarily', 0.9411764705882353)]
[('primarily', 0.9411764705882353)]
[('operator', 0.9333333333333333)]
[('operator', 0.9333333333333333)]
[('accumulated', 0.9523809523809523)]
[('accumulated', 0.9523809523809523)]
[('summarys', 0.9333333333333333)]
[('summarys', 0.9333333333333333)]
[('understandable', 0.9629629629629629)]
[('understandable', 0.9629629629629629)]
[('consist', 0.9333333333333333)]
[('consist', 0.9333333333333333)]
[('declarations', 0.96)]
[('declarations', 0.96)]
[('associated', 0.9)]
[('associated', 0.9)]
[('accessibility', 0.9230769230769231)]
[('accessibility', 0.9230769230769231)]
[('examine', 0.9230769230769231)]
[('examine', 0.9230769230769231)]
[('annoying', 0.9333333333333333)]
[('annoying', 0.9333333333333333)]
[('accessing', 0.9411764705882353)]
[('accessing', 0.9411764705882353)]
[('ideally', 0.9230769230769231)]
[('ideally', 0.9230769230769231)]
[('personnel', 0.9411764705882353)]
[('personnel', 0.9411764705882353)]
[('whereas', 0.9230769230769231)]
[('whereas', 0.9230769230769231)]
[('gaining', 0.9230769230769231)]
[('gaining', 0.9230769230769231)]
[('explaining', 0.9473684210526315)]
[('explaining', 0.9473684210526315)]
[('students', 0.9333333333333333)]
[('students', 0.9333333333333333)]
[('prepared', 0.9411764705882353)]
[('prepared', 0.9411764705882353)]
[('generated', 0.9473684210526315)]
[('generated', 0.9473684210526315)]
[('graphically', 0.9523809523809523)]
[('graphically', 0.9523809523809523)]
[('suited', 0.9090909090909091)]
[('suited', 0.9090909090909091)]
[('controlled', 0.9473684210526315)]
[('controlled', 0.9473684210526315)]
[('required', 0.9411764705882353)]
[('required', 0.9411764705882353)]
[('profits', 0.9333333333333333), ('proffit', 0.9333333333333333)]
[('profits', 0.9333333333333333), ('proffit', 0.9333333333333333)]
{'min_Count': 10, 'nb_sent': 1000, 'freq_ratio': 10, 'chgram': 2, 'sim_thres': 0.9} hits = "250" precision = "0.919117647059" recall = "0.51867219917"
[('locally', 0.9230769230769231)]
[('locally', 0.9230769230769231)]
[('useful', 0.9230769230769231)]
[('useful', 0.9230769230769231)]
[('triangular', 0.9523809523809523)]
[('triangular', 0.9523809523809523)]
[('arrangeing', 0.8888888888888888)]
[('arrangeing', 0.8888888888888888)]
[('sources', 0.9230769230769231), ('sorce', 0.9090909090909091)]
[('sources', 0.9230769230769231), ('sorce', 0.9090909090909091)]
[('committee', 0.9411764705882353), ('committees', 0.8888888888888888)]
[('committee', 0.9411764705882353), ('committees', 0.8888888888888888)]
[('transportability', 0.9375)]
[('transportability', 0.9375)]
[('minuscule', 0.8888888888888888)]
[('minuscule', 0.8888888888888888)]
[('diagrammatically', 0.9696969696969697)]
[('diagrammatically', 0.9696969696969697)]
[('management', 0.9473684210526315)]
[('management', 0.9473684210526315)]
[('singular', 0.9411764705882353)]
[('singular', 0.9411764705882353)]
[('extreemly', 0.8888888888888888), ('extremely', 0.8888888888888888)]
[('extremely', 0.8888888888888888), ('extreemly', 0.8888888888888888)]
[('initial', 0.9230769230769231)]
[('initial', 0.9230769230769231)]
[('pronunciation', 0.9629629629629629)]
[('pronunciation', 0.9629629629629629)]
[('totally', 0.9230769230769231)]
[('totally', 0.9230769230769231)]
[('centrally', 0.9411764705882353)]
[('centrally', 0.9411764705882353)]
[('meant', 0.8888888888888888)]
[('meant', 0.8888888888888888)]
[('someone', 0.9230769230769231)]
[('someone', 0.9230769230769231)]
[('families', 0.9333333333333333), ('failes', 0.9230769230769231)]
[('families', 0.9333333333333333), ('failes', 0.9230769230769231)]
[('their', 0.8888888888888888), ('there', 0.8888888888888888), ('thear', 0.8888888888888888)]
[('their', 0.8888888888888888), ('there', 0.8888888888888888), ('thear', 0.8888888888888888)]
[('february', 0.9333333333333333)]
[('february', 0.9333333333333333)]
[('choises', 0.9230769230769231), ('chose', 0.9090909090909091)]
[('chose', 0.9090909090909091), ('choises', 0.9230769230769231)]
[('basically', 0.9411764705882353)]
[('basically', 0.9411764705882353)]
[('descided', 0.9333333333333333), ('descides', 0.9333333333333333), ('decide', 0.9230769230769231)]
[('decide', 0.9230769230769231), ('descided', 0.9333333333333333), ('descides', 0.9333333333333333)]
[('paerticulaur', 0.9565217391304348), ('particular', 0.9523809523809523), ('particulary', 0.9090909090909091)]
[('particular', 0.9523809523809523), ('particulary', 0.9090909090909091), ('paerticulaur', 0.9565217391304348)]
[('considerable', 0.9166666666666666)]
[('considerable', 0.9166666666666666)]
[('account', 0.9230769230769231), ('acounts', 0.9230769230769231)]
[('account', 0.9230769230769231), ('acounts', 0.9230769230769231)]
[('unfortunately', 0.96)]
[('unfortunately', 0.96)]
[('variable', 0.9333333333333333)]
[('variable', 0.9333333333333333)]
[('whether', 0.9230769230769231)]
[('whether', 0.9230769230769231)]
[('levals', 0.9090909090909091)]
[('levals', 0.9090909090909091)]
[('transferred', 0.9)]
[('transferred', 0.9)]
[('benifits', 0.9333333333333333)]
[('benifits', 0.9333333333333333)]
[('addressable', 0.9523809523809523)]
[('addressable', 0.9523809523809523)]
[('hierarchal', 0.9)]
[('hierarchal', 0.9)]
[('descide', 0.9333333333333333), ('decided', 0.9333333333333333)]
[('decided', 0.9333333333333333), ('descide', 0.9333333333333333)]
[('choosing', 0.9333333333333333)]
[('choosing', 0.9333333333333333)]
[('further', 0.9230769230769231)]
[('further', 0.9230769230769231)]
[('questionnaire', 0.96), ('questionare', 0.9565217391304348)]
[('questionnaire', 0.96), ('questionare', 0.9565217391304348)]
[('different', 0.9411764705882353)]
[('different', 0.9411764705882353)]
[('clerical', 0.9411764705882353)]
[('clerical', 0.9411764705882353)]
[('monitoring', 0.9)]
[('monitoring', 0.9)]
[('position', 0.9411764705882353)]
[('position', 0.9411764705882353)]
[('perhaps', 0.9333333333333333)]
[('perhaps', 0.9333333333333333)]
[('personnel', 0.9473684210526315), ('personel', 0.8888888888888888)]
[('personnel', 0.9473684210526315), ('personel', 0.8888888888888888)]
[('seperated', 0.9411764705882353)]
[('seperated', 0.9411764705882353)]
[('arrangment', 0.9473684210526315), ('arrangement', 0.9)]
[('arrangment', 0.9473684210526315), ('arrangement', 0.9)]
[('access', 0.9090909090909091)]
[('access', 0.9090909090909091)]
[('various', 0.9333333333333333)]
[('various', 0.9333333333333333)]
[('between', 0.9333333333333333), ('beeteen', 0.9333333333333333)]
[('between', 0.9333333333333333), ('beeteen', 0.9333333333333333)]
[('standardizing', 0.88)]
[('standardizing', 0.88)]
[('availble', 0.9333333333333333)]
[('availble', 0.9333333333333333)]
[('accessing', 0.9411764705882353)]
[('accessing', 0.9411764705882353)]
[('description', 0.9090909090909091)]
[('description', 0.9090909090909091)]
[('variant', 0.9333333333333333)]
[('variant', 0.9333333333333333)]
[('southern', 0.9333333333333333)]
[('southern', 0.9333333333333333)]
[('familes', 0.9230769230769231), ('fails', 0.9090909090909091)]
[('fails', 0.9090909090909091), ('familes', 0.9230769230769231)]
[('visitors', 0.9333333333333333), ('vistor', 0.9230769230769231)]
[('visitors', 0.9333333333333333), ('vistor', 0.9230769230769231)]
[('completely', 0.9473684210526315)]
[('completely', 0.9473684210526315)]
[('leval', 0.9090909090909091)]
[('leval', 0.9090909090909091)]
[('beginning', 0.9411764705882353)]
[('beginning', 0.9411764705882353)]
[('volantary', 0.9411764705882353)]
[('volantary', 0.9411764705882353)]
[('defenitions', 0.9523809523809523), ('definition', 0.9)]
[('definition', 0.9), ('defenitions', 0.9523809523809523)]
[('voting', 0.9230769230769231)]
[('voting', 0.9230769230769231)]
[('benifit', 0.9333333333333333)]
[('benifit', 0.9333333333333333)]
[('auxiliary', 0.8888888888888888)]
[('auxiliary', 0.8888888888888888)]
[('planned', 0.9230769230769231)]
[('planned', 0.9230769230769231)]
[('defenition', 0.9523809523809523), ('definitions', 0.9090909090909091)]
[('definitions', 0.9090909090909091), ('defenition', 0.9523809523809523)]
[('forbidden', 0.9411764705882353)]
[('forbidden', 0.9411764705882353)]
[('comments', 0.9333333333333333)]
[('comments', 0.9333333333333333)]
[('descisions descision', 0.926829268292683)]
[('descisions descision', 0.926829268292683)]
[('supposedly', 0.9)]
[('supposedly', 0.9)]
[('embellishing', 0.9565217391304348)]
[('embellishing', 0.9565217391304348)]
[('technique', 0.9411764705882353), ('tecniques', 0.9411764705882353), ('techniques', 0.8888888888888888)]
[('techniques', 0.8888888888888888), ('technique', 0.9411764705882353), ('tecniques', 0.9411764705882353)]
[('perminant', 0.9)]
[('perminant', 0.9)]
[('confirmation', 0.9166666666666666)]
[('confirmation', 0.9166666666666666)]
[('appointment', 0.9523809523809523), ('appointments', 0.9090909090909091)]
[('appointment', 0.9523809523809523), ('appointments', 0.9090909090909091)]
[('progression', 0.9523809523809523)]
[('progression', 0.9523809523809523)]
[('accompanying', 0.9090909090909091)]
[('accompanying', 0.9090909090909091)]
[('applicable', 0.9473684210526315)]
[('applicable', 0.9473684210526315)]
[('regained', 0.9333333333333333)]
[('regained', 0.9333333333333333)]
[('guidelines', 0.9473684210526315)]
[('guidelines', 0.9473684210526315)]
[('titles', 0.9230769230769231)]
[('titles', 0.9230769230769231)]
[('unavailable', 0.9523809523809523), ('availble', 0.8888888888888888)]
[('unavailable', 0.9523809523809523), ('availble', 0.8888888888888888)]
[('advantageous', 0.9565217391304348)]
[('advantageous', 0.9565217391304348)]
[('brief', 0.8888888888888888)]
[('brief', 0.8888888888888888)]
[('appeal', 0.9090909090909091)]
[('appeal', 0.9090909090909091)]
[('consisting', 0.9523809523809523)]
[('consisting', 0.9523809523809523)]
[('separation', 0.9)]
[('separation', 0.9)]
[('search', 0.9090909090909091)]
[('search', 0.9090909090909091)]
[('resulting', 0.9411764705882353)]
[('resulting', 0.9411764705882353)]
[('suggestion', 0.9473684210526315)]
[('suggestion', 0.9473684210526315)]
[('opinion', 0.9333333333333333)]
[('opinion', 0.9333333333333333)]
[('cancellation', 0.9565217391304348)]
[('cancellation', 0.9565217391304348)]
[('composed', 0.9411764705882353)]
[('composed', 0.9411764705882353)]
[('useful', 0.9090909090909091)]
[('useful', 0.9090909090909091)]
[('humour', 0.9090909090909091)]
[('humour', 0.9090909090909091)]
[('anomalies', 0.8888888888888888)]
[('anomalies', 0.8888888888888888)]
[('would', 0.9090909090909091)]
[('would', 0.9090909090909091)]
[('examination', 0.9090909090909091)]
[('examination', 0.9090909090909091)]
[('therefore', 0.9411764705882353)]
[('therefore', 0.9411764705882353)]
[('recommend', 0.9411764705882353)]
[('recommend', 0.9411764705882353)]
[('seperate', 0.9411764705882353), ('separated', 0.8888888888888888)]
[('separated', 0.8888888888888888), ('seperate', 0.9411764705882353)]
[('particulaur', 0.9565217391304348), ('particular', 0.9090909090909091)]
[('particular', 0.9090909090909091), ('particulaur', 0.9565217391304348)]
[('pivoting', 0.9333333333333333)]
[('pivoting', 0.9333333333333333)]
[('announcing', 0.9473684210526315)]
[('announcing', 0.9473684210526315)]
[('arrangements', 0.9166666666666666)]
[('arrangements', 0.9166666666666666)]
[('proportions', 0.9523809523809523)]
[('proportions', 0.9523809523809523)]
[('accept', 0.9090909090909091)]
[('accept', 0.9090909090909091)]
[('dependence', 0.9)]
[('dependence', 0.9)]
[('unequalled', 0.9473684210526315)]
[('unequalled', 0.9473684210526315)]
[('numbers', 0.9333333333333333)]
[('numbers', 0.9333333333333333)]
[('conversely', 0.9473684210526315)]
[('conversely', 0.9473684210526315)]
[('provide', 0.9230769230769231)]
[('provide', 0.9230769230769231)]
[('arrangement', 0.9523809523809523), ('arragment', 0.9473684210526315), ('arrangements', 0.9090909090909091)]
[('arrangements', 0.9090909090909091), ('arrangement', 0.9523809523809523), ('arragment', 0.9473684210526315)]
[('responsibilities', 0.967741935483871)]
[('responsibilities', 0.967741935483871)]
[('fourth', 0.9090909090909091)]
[('fourth', 0.9090909090909091)]
[('inconceivable', 0.9230769230769231)]
[('inconceivable', 0.9230769230769231)]
[('data', 0.8888888888888888)]
[('data', 0.8888888888888888)]
[('register', 0.9333333333333333)]
[('register', 0.9333333333333333)]
[('supervision', 0.9523809523809523)]
[('supervision', 0.9523809523809523)]
[('encompassing', 0.9565217391304348)]
[('encompassing', 0.9565217391304348)]
[('negligible', 0.9)]
[('negligible', 0.9)]
[('allow', 0.8888888888888888)]
[('allow', 0.8888888888888888)]
[('operations', 0.9473684210526315)]
[('operations', 0.9473684210526315)]
[('executed', 0.9411764705882353)]
[('executed', 0.9411764705882353)]
[('interpretation', 0.9285714285714286)]
[('interpretation', 0.9285714285714286)]
[('years', 0.9090909090909091)]
[('years', 0.9090909090909091)]
[('committee', 0.9411764705882353), ('committees', 0.8888888888888888)]
[('committee', 0.9411764705882353), ('committees', 0.8888888888888888)]
[('before', 0.9090909090909091)]
[('before', 0.9090909090909091)]
[('interesting', 0.9523809523809523)]
[('interesting', 0.9523809523809523)]
[('perminantly', 0.9)]
[('perminantly', 0.9)]
[('choose', 0.9090909090909091), ('choise', 0.9090909090909091)]
[('choose', 0.9090909090909091), ('choise', 0.9090909090909091)]
[('virtually', 0.8888888888888888)]
[('virtually', 0.8888888888888888)]
[('correspondence', 0.9285714285714286)]
[('correspondence', 0.9285714285714286)]
[('eventually', 0.9473684210526315)]
[('eventually', 0.9473684210526315)]
[('desperately', 0.9)]
[('desperately', 0.9)]
[('university', 0.9473684210526315)]
[('university', 0.9473684210526315)]
[('adjournment', 0.9523809523809523)]
[('adjournment', 0.9523809523809523)]
[('possibilities', 0.88)]
[('possibilities', 0.88)]
[('stopped', 0.9230769230769231)]
[('stopped', 0.9230769230769231)]
[('meens', 0.8888888888888888)]
[('meens', 0.8888888888888888)]
[('adequately', 0.9)]
[('adequately', 0.9)]
[('shown', 0.8888888888888888)]
[('shown', 0.8888888888888888)]
[('proffits', 0.9333333333333333), ('profit', 0.9230769230769231)]
[('profit', 0.9230769230769231), ('proffits', 0.9333333333333333)]
[('encourage', 0.9411764705882353)]
[('encourage', 0.9411764705882353)]
[('collate', 0.9230769230769231)]
[('collate', 0.9230769230769231)]
[('proviso', 0.9333333333333333)]
[('proviso', 0.9333333333333333)]
[('approached', 0.9473684210526315)]
[('approached', 0.9473684210526315)]
[('difficulty', 0.9473684210526315), ('dificult', 0.9411764705882353), ('difficult', 0.8888888888888888)]
[('difficult', 0.8888888888888888), ('difficulty', 0.9473684210526315), ('dificult', 0.9411764705882353)]
[('appointments', 0.9565217391304348), ('appointment', 0.9090909090909091)]
[('appointment', 0.9090909090909091), ('appointments', 0.9565217391304348)]
[('base', 0.8888888888888888)]
[('base', 0.8888888888888888)]
[('conditioning', 0.9565217391304348)]
[('conditioning', 0.9565217391304348)]
[('beginning', 0.9411764705882353)]
[('beginning', 0.9411764705882353)]
[('universally', 0.9523809523809523)]
[('universally', 0.9523809523809523)]
[('unresolved', 0.9)]
[('unresolved', 0.9)]
[('length', 0.9090909090909091)]
[('length', 0.9090909090909091)]
[('exponentially', 0.88)]
[('exponentially', 0.88)]
[('system', 0.9090909090909091)]
[('system', 0.9090909090909091)]
[('approximately', 0.9166666666666666)]
[('approximately', 0.9166666666666666)]
[('their', 0.8888888888888888), ('there', 0.8888888888888888), ('thear', 0.8888888888888888)]
[('their', 0.8888888888888888), ('there', 0.8888888888888888), ('thear', 0.8888888888888888)]
[('repetitive', 0.9)]
[('repetitive', 0.9)]
[('exactly', 0.9333333333333333)]
[('exactly', 0.9333333333333333)]
[('immediate', 0.9411764705882353)]
[('immediate', 0.9411764705882353)]
[('appreciation', 0.9565217391304348)]
[('appreciation', 0.9565217391304348)]
[('eliminated', 0.9473684210526315)]
[('eliminated', 0.9473684210526315)]
[('believe', 0.9230769230769231)]
[('believe', 0.9230769230769231)]
[('appreciated', 0.9523809523809523)]
[('appreciated', 0.9523809523809523)]
[('readjusted', 0.9473684210526315)]
[('readjusted', 0.9473684210526315)]
[('false', 0.9090909090909091)]
[('false', 0.9090909090909091)]
[('seen', 0.8888888888888888)]
[('seen', 0.8888888888888888)]
[('interrogating', 0.9230769230769231)]
[('interrogating', 0.9230769230769231)]
[('academically', 0.9090909090909091)]
[('academically', 0.9090909090909091)]
[('traditionally', 0.96)]
[('traditionally', 0.96)]
[('studying', 0.9333333333333333)]
[('studying', 0.9333333333333333)]
[('majority', 0.9333333333333333)]
[('majority', 0.9333333333333333)]
[('aggravating', 0.9523809523809523)]
[('aggravating', 0.9523809523809523)]
[('transactions', 0.9565217391304348)]
[('transactions', 0.9565217391304348)]
[('arguing', 0.9333333333333333)]
[('arguing', 0.9333333333333333)]
[('extremely', 0.8888888888888888), ('extreamly', 0.8888888888888888)]
[('extremely', 0.8888888888888888), ('extreamly', 0.8888888888888888)]
[('later', 0.9090909090909091)]
[('later', 0.9090909090909091)]
[('senior', 0.9230769230769231)]
[('senior', 0.9230769230769231)]
[('dragged', 0.9230769230769231)]
[('dragged', 0.9230769230769231)]
[('atmosphere', 0.9473684210526315)]
[('atmosphere', 0.9473684210526315)]
[('drastically', 0.9523809523809523)]
[('drastically', 0.9523809523809523)]
[('particularly', 0.9565217391304348), ('particular', 0.9523809523809523), ('particulaur', 0.9090909090909091)]
[('particular', 0.9523809523809523), ('particularly', 0.9565217391304348), ('particulaur', 0.9090909090909091)]
[('visitor', 0.9230769230769231), ('vistors', 0.9230769230769231)]
[('visitor', 0.9230769230769231), ('vistors', 0.9230769230769231)]
[('session', 0.9230769230769231)]
[('session', 0.9230769230769231)]
[('continually', 0.9)]
[('continually', 0.9)]
[('busy', 0.8888888888888888)]
[('busy', 0.8888888888888888)]
[('meen', 0.8888888888888888)]
[('meen', 0.8888888888888888)]
[('beetween', 0.9333333333333333)]
[('beetween', 0.9333333333333333)]
[('overall', 0.9230769230769231)]
[('overall', 0.9230769230769231)]
[('timing', 0.9230769230769231)]
[('timing', 0.9230769230769231)]
[('econometric', 0.9523809523809523)]
[('econometric', 0.9523809523809523)]
[('erroneous', 0.8888888888888888)]
[('erroneous', 0.8888888888888888)]
[('descide', 0.9333333333333333), ('decides', 0.9333333333333333)]
[('descide', 0.9333333333333333), ('decides', 0.9333333333333333)]
[('intelligence', 0.9565217391304348)]
[('intelligence', 0.9565217391304348)]
[('apologies', 0.9473684210526315)]
[('apologies', 0.9473684210526315)]
[('techniques', 0.9473684210526315), ('tecnique', 0.9411764705882353), ('technique', 0.8888888888888888)]
[('techniques', 0.9473684210526315), ('technique', 0.8888888888888888), ('tecnique', 0.9411764705882353)]
[('volantry', 0.9411764705882353), ('voluntary', 0.8888888888888888)]
[('voluntary', 0.8888888888888888), ('volantry', 0.9411764705882353)]
[('currently', 0.8888888888888888)]
[('currently', 0.8888888888888888)]
[('forecast', 0.9333333333333333)]
[('forecast', 0.9333333333333333)]
[('weapons', 0.9230769230769231)]
[('weapons', 0.9230769230769231)]
[('approach', 0.9333333333333333)]
[('approach', 0.9333333333333333)]
[('available', 0.9411764705882353), ('avaible', 0.9333333333333333), ('unavailble', 0.8888888888888888)]
[('available', 0.9411764705882353), ('avaible', 0.9333333333333333), ('unavailble', 0.8888888888888888)]
[('recently', 0.9411764705882353)]
[('recently', 0.9411764705882353)]
[('ability', 0.9230769230769231)]
[('ability', 0.9230769230769231)]
[('component', 0.8888888888888888)]
[('component', 0.8888888888888888)]
[('agencies', 0.9333333333333333)]
[('agencies', 0.9333333333333333)]
[('however', 0.9333333333333333)]
[('however', 0.9333333333333333)]
[('suggested', 0.9411764705882353)]
[('suggested', 0.9411764705882353)]
[('annual', 0.9090909090909091)]
[('annual', 0.9090909090909091)]
[('according', 0.9411764705882353)]
[('according', 0.9411764705882353)]
[('table', 0.9090909090909091)]
[('table', 0.9090909090909091)]
[('throughout', 0.8888888888888888)]
[('throughout', 0.8888888888888888)]
[('conference', 0.9)]
[('conference', 0.9)]
[('union', 0.9090909090909091)]
[('union', 0.9090909090909091)]
[('interest', 0.9333333333333333)]
[('interest', 0.9333333333333333)]
[('refered', 0.9333333333333333)]
[('refered', 0.9333333333333333)]
[('sufficient', 0.9473684210526315)]
[('sufficient', 0.9473684210526315)]
[('adaptable', 0.8888888888888888)]
[('adaptable', 0.8888888888888888)]
[('representative', 0.9285714285714286)]
[('representative', 0.9285714285714286)]
[('apologised', 0.9523809523809523)]
[('apologised', 0.9523809523809523)]
[('choise', 0.9230769230769231)]
[('choise', 0.9230769230769231)]
[('procedure', 0.8888888888888888)]
[('procedure', 0.8888888888888888)]
[('shortened', 0.9411764705882353)]
[('shortened', 0.9411764705882353)]
[('manually', 0.9333333333333333)]
[('manually', 0.9333333333333333)]
[('disappointing', 0.88)]
[('disappointing', 0.88)]
[('excessively', 0.9523809523809523)]
[('excessively', 0.9523809523809523)]
[('containing', 0.8888888888888888)]
[('containing', 0.8888888888888888)]
[('develop', 0.9333333333333333)]
[('develop', 0.9333333333333333)]
[('credit', 0.9230769230769231)]
[('credit', 0.9230769230769231)]
[('government', 0.9473684210526315)]
[('government', 0.9473684210526315)]
[('orientated', 0.9473684210526315)]
[('orientated', 0.9473684210526315)]
[('widely', 0.9090909090909091)]
[('widely', 0.9090909090909091)]
[('difficult', 0.9411764705882353), ('dificulty', 0.9411764705882353), ('difficulty', 0.8888888888888888)]
[('difficult', 0.9411764705882353), ('difficulty', 0.8888888888888888), ('dificulty', 0.9411764705882353)]
[('investigated', 0.9166666666666666)]
[('investigated', 0.9166666666666666)]
[('conceived', 0.8888888888888888)]
[('conceived', 0.8888888888888888)]
[('nationally', 0.9473684210526315)]
[('nationally', 0.9473684210526315)]
[('moving', 0.9230769230769231)]
[('moving', 0.9230769230769231)]
[('equalled', 0.9333333333333333)]
[('equalled', 0.9333333333333333)]
[('corporate', 0.8888888888888888)]
[('corporate', 0.8888888888888888)]
[('enormously', 0.8888888888888888)]
[('enormously', 0.8888888888888888)]
[('financially', 0.9523809523809523)]
[('financially', 0.9523809523809523)]
[('functionally', 0.9565217391304348)]
[('functionally', 0.9565217391304348)]
[('discipline', 0.8888888888888888)]
[('discipline', 0.8888888888888888)]
[('announcement', 0.9565217391304348)]
[('announcement', 0.9565217391304348)]
[('progresses', 0.9523809523809523)]
[('progresses', 0.9523809523809523)]
[('except', 0.9230769230769231)]
[('except', 0.9230769230769231)]
[('recommending', 0.9565217391304348)]
[('recommending', 0.9565217391304348)]
[('mathematically', 0.9629629629629629)]
[('mathematically', 0.9629629629629629)]
[('sorces', 0.9090909090909091), ('source', 0.9090909090909091)]
[('source', 0.9090909090909091), ('sorces', 0.9090909090909091)]
[('combine', 0.9333333333333333)]
[('combine', 0.9333333333333333)]
[('input', 0.8888888888888888)]
[('input', 0.8888888888888888)]
[('resolved', 0.9333333333333333)]
[('resolved', 0.9333333333333333)]
[('demands', 0.9333333333333333)]
[('demands', 0.9333333333333333)]
[('unequivocally', 0.96)]
[('unequivocally', 0.96)]
[('suffering', 0.8888888888888888)]
[('suffering', 0.8888888888888888)]
[('accepted', 0.9333333333333333)]
[('accepted', 0.9333333333333333)]
[('projects', 0.9411764705882353)]
[('projects', 0.9411764705882353)]
[('journalism', 0.9473684210526315)]
[('journalism', 0.9473684210526315)]
[('output', 0.9230769230769231)]
[('output', 0.9230769230769231)]
[('security', 0.9333333333333333)]
[('security', 0.9333333333333333)]
[('essential', 0.9411764705882353)]
[('essential', 0.9411764705882353)]
[('requested', 0.9411764705882353)]
[('requested', 0.9411764705882353)]
[('supplementary', 0.96)]
[('supplementary', 0.96)]
[('questionaire', 0.9565217391304348), ('questionnaire', 0.9166666666666666)]
[('questionnaire', 0.9166666666666666), ('questionaire', 0.9565217391304348)]
[('employment', 0.9)]
[('employment', 0.9)]
[('proceeding', 0.9473684210526315)]
[('proceeding', 0.9473684210526315)]
[('deciscions descisions', 0.926829268292683)]
[('deciscions descisions', 0.926829268292683)]
[('discretion', 0.9)]
[('discretion', 0.9)]
[('reaching', 0.9333333333333333)]
[('reaching', 0.9333333333333333)]
[('expansion', 0.9411764705882353)]
[('expansion', 0.9411764705882353)]
[('although', 0.9333333333333333)]
[('although', 0.9333333333333333)]
[('analysing', 0.9411764705882353)]
[('analysing', 0.9411764705882353)]
[('comparison', 0.9523809523809523)]
[('comparison', 0.9523809523809523)]
[('months', 0.9230769230769231)]
[('months', 0.9230769230769231)]
[('hierarchal', 0.9)]
[('hierarchal', 0.9)]
[('misleading', 0.9523809523809523)]
[('misleading', 0.9523809523809523)]
[('commit', 0.9090909090909091)]
[('commit', 0.9090909090909091)]
[('auguments', 0.8888888888888888)]
[('auguments', 0.8888888888888888)]
[('within', 0.9230769230769231)]
[('within', 0.9230769230769231)]
[('accounts', 0.9333333333333333), ('acount', 0.9230769230769231)]
[('accounts', 0.9333333333333333), ('acount', 0.9230769230769231)]
[('primarily', 0.9411764705882353)]
[('primarily', 0.9411764705882353)]
[('operator', 0.9333333333333333)]
[('operator', 0.9333333333333333)]
[('accumulated', 0.9523809523809523)]
[('accumulated', 0.9523809523809523)]
[('ther', 0.8888888888888888)]
[('ther', 0.8888888888888888)]
[('summarys', 0.9333333333333333)]
[('summarys', 0.9333333333333333)]
[('understandable', 0.9629629629629629)]
[('understandable', 0.9629629629629629)]
[('safeguard', 0.8888888888888888)]
[('safeguard', 0.8888888888888888)]
[('consist', 0.9333333333333333)]
[('consist', 0.9333333333333333)]
[('declarations', 0.96)]
[('declarations', 0.96)]
[('associated', 0.9)]
[('associated', 0.9)]
[('accessibility', 0.9230769230769231)]
[('accessibility', 0.9230769230769231)]
[('examine', 0.9230769230769231)]
[('examine', 0.9230769230769231)]
[('annoying', 0.9333333333333333)]
[('annoying', 0.9333333333333333)]
[('accessing', 0.9411764705882353)]
[('accessing', 0.9411764705882353)]
[('ideally', 0.9230769230769231)]
[('ideally', 0.9230769230769231)]
[('personnel', 0.9411764705882353), ('personnell', 0.8888888888888888)]
[('personnel', 0.9411764705882353), ('personnell', 0.8888888888888888)]
[('whereas', 0.9230769230769231)]
[('whereas', 0.9230769230769231)]
[('geographically', 0.8888888888888888)]
[('geographically', 0.8888888888888888)]
[('gaining', 0.9230769230769231)]
[('gaining', 0.9230769230769231)]
[('explaining', 0.9473684210526315)]
[('explaining', 0.9473684210526315)]
[('students', 0.9333333333333333)]
[('students', 0.9333333333333333)]
[('prepared', 0.9411764705882353)]
[('prepared', 0.9411764705882353)]
[('generated', 0.9473684210526315)]
[('generated', 0.9473684210526315)]
[('graphically', 0.9523809523809523)]
[('graphically', 0.9523809523809523)]
[('suited', 0.9090909090909091)]
[('suited', 0.9090909090909091)]
[('controlled', 0.9473684210526315)]
[('controlled', 0.9473684210526315)]
[('required', 0.9411764705882353)]
[('required', 0.9411764705882353)]
[('profits', 0.9333333333333333), ('proffit', 0.9333333333333333)]
[('profits', 0.9333333333333333), ('proffit', 0.9333333333333333)]
{'min_Count': 10, 'nb_sent': 1000, 'freq_ratio': 10, 'chgram': 2, 'sim_thres': 0.88} hits = "286" precision = "0.916666666667" recall = "0.598326359833"
[('locally', 0.9230769230769231)]
[('locally', 0.9230769230769231)]
[('useful', 0.9230769230769231)]
[('useful', 0.9230769230769231)]
[('consider', 0.875)]
[('consider', 0.875)]
[('triangular', 0.9523809523809523)]
[('triangular', 0.9523809523809523)]
[('hierarchy', 0.875)]
[('hierarchy', 0.875)]
[('arrangeing', 0.8888888888888888)]
[('arrangeing', 0.8888888888888888)]
[('sources', 0.9230769230769231), ('sorce', 0.9090909090909091)]
[('sources', 0.9230769230769231), ('sorce', 0.9090909090909091)]
[('committee', 0.9411764705882353), ('committees', 0.8888888888888888), ('committe', 0.875)]
[('committee', 0.9411764705882353), ('committees', 0.8888888888888888), ('committe', 0.875)]
[('transportability', 0.9375)]
[('transportability', 0.9375)]
[('minuscule', 0.8888888888888888)]
[('minuscule', 0.8888888888888888)]
[('diagrammatically', 0.9696969696969697)]
[('diagrammatically', 0.9696969696969697)]
[('management', 0.9473684210526315)]
[('management', 0.9473684210526315)]
[('singular', 0.9411764705882353)]
[('singular', 0.9411764705882353)]
[('extreemly', 0.8888888888888888), ('extremely', 0.8888888888888888)]
[('extremely', 0.8888888888888888), ('extreemly', 0.8888888888888888)]
[('initial', 0.9230769230769231), ('initials', 0.8571428571428571)]
[('initial', 0.9230769230769231), ('initials', 0.8571428571428571)]
[('pronunciation', 0.9629629629629629)]
[('pronunciation', 0.9629629629629629)]
[('totally', 0.9230769230769231)]
[('totally', 0.9230769230769231)]
[('centrally', 0.9411764705882353)]
[('centrally', 0.9411764705882353)]
[('meant', 0.8888888888888888)]
[('meant', 0.8888888888888888)]
[('someone', 0.9230769230769231)]
[('someone', 0.9230769230769231)]
[('families', 0.9333333333333333), ('failes', 0.9230769230769231), ('familer', 0.8571428571428571), ('familys', 0.8571428571428571)]
[('families', 0.9333333333333333), ('failes', 0.9230769230769231), ('familer', 0.8571428571428571), ('familys', 0.8571428571428571)]
[('their', 0.8888888888888888), ('there', 0.8888888888888888), ('thear', 0.8888888888888888), ('the', 0.8571428571428571)]
[('the', 0.8571428571428571), ('their', 0.8888888888888888), ('there', 0.8888888888888888), ('thear', 0.8888888888888888)]
[('february', 0.9333333333333333)]
[('february', 0.9333333333333333)]
[('extended', 0.875)]
[('extended', 0.875)]
[('choises', 0.9230769230769231), ('chose', 0.9090909090909091)]
[('chose', 0.9090909090909091), ('choises', 0.9230769230769231)]
[('basically', 0.9411764705882353)]
[('basically', 0.9411764705882353)]
[('descided', 0.9333333333333333), ('descides', 0.9333333333333333), ('decide', 0.9230769230769231), ('decides', 0.8571428571428571), ('decided', 0.8571428571428571)]
[('decided', 0.8571428571428571), ('decide', 0.9230769230769231), ('descided', 0.9333333333333333), ('descides', 0.9333333333333333), ('decides', 0.8571428571428571)]
[('paerticulaur', 0.9565217391304348), ('particular', 0.9523809523809523), ('particulary', 0.9090909090909091), ('particularly', 0.8695652173913043)]
[('particular', 0.9523809523809523), ('particularly', 0.8695652173913043), ('particulary', 0.9090909090909091), ('paerticulaur', 0.9565217391304348)]
[('considerable', 0.9166666666666666)]
[('considerable', 0.9166666666666666)]
[('articles', 0.875)]
[('articles', 0.875)]
[('account', 0.9230769230769231), ('acounts', 0.9230769230769231), ('accounts', 0.8571428571428571)]
[('account', 0.9230769230769231), ('acounts', 0.9230769230769231), ('accounts', 0.8571428571428571)]
[('unfortunately', 0.96)]
[('unfortunately', 0.96)]
[('variable', 0.9333333333333333)]
[('variable', 0.9333333333333333)]
[('whether', 0.9230769230769231)]
[('whether', 0.9230769230769231)]
[('levals', 0.9090909090909091)]
[('levals', 0.9090909090909091)]
[('transferred', 0.9)]
[('transferred', 0.9)]
[('receive', 0.8571428571428571)]
[('receive', 0.8571428571428571)]
[('benifits', 0.9333333333333333), ('benefit', 0.8571428571428571)]
[('benefit', 0.8571428571428571), ('benifits', 0.9333333333333333)]
[('addressable', 0.9523809523809523)]
[('addressable', 0.9523809523809523)]
[('cake', 0.8571428571428571)]
[('cake', 0.8571428571428571)]
[('compare', 0.8571428571428571)]
[('compare', 0.8571428571428571)]
[('certain', 0.8571428571428571)]
[('certain', 0.8571428571428571)]
[('hierarchal', 0.9)]
[('hierarchal', 0.9)]
[('descide', 0.9333333333333333), ('decided', 0.9333333333333333), ('descides', 0.875), ('decide', 0.8571428571428571)]
[('decided', 0.9333333333333333), ('decide', 0.8571428571428571), ('descide', 0.9333333333333333), ('descides', 0.875)]
[('choosing', 0.9333333333333333)]
[('choosing', 0.9333333333333333)]
[('further', 0.9230769230769231)]
[('further', 0.9230769230769231)]
[('questionnaire', 0.96), ('questionare', 0.9565217391304348)]
[('questionnaire', 0.96), ('questionare', 0.9565217391304348)]
[('different', 0.9411764705882353)]
[('different', 0.9411764705882353)]
[('clerical', 0.9411764705882353)]
[('clerical', 0.9411764705882353)]
[('monitoring', 0.9)]
[('monitoring', 0.9)]
[('position', 0.9411764705882353)]
[('position', 0.9411764705882353)]
[('perhaps', 0.9333333333333333)]
[('perhaps', 0.9333333333333333)]
[('personnel', 0.9473684210526315), ('personel', 0.8888888888888888)]
[('personnel', 0.9473684210526315), ('personel', 0.8888888888888888)]
[('seperated', 0.9411764705882353), ('separate', 0.875), ('seporate', 0.875)]
[('separate', 0.875), ('seperated', 0.9411764705882353), ('seporate', 0.875)]
[('arrangment', 0.9473684210526315), ('arrangement', 0.9), ('arrangements', 0.8571428571428571)]
[('arrangements', 0.8571428571428571), ('arrangment', 0.9473684210526315), ('arrangement', 0.9)]
[('access', 0.9090909090909091)]
[('access', 0.9090909090909091)]
[('various', 0.9333333333333333)]
[('various', 0.9333333333333333)]
[('between', 0.9333333333333333), ('beeteen', 0.9333333333333333)]
[('between', 0.9333333333333333), ('beeteen', 0.9333333333333333)]
[('standardizing', 0.88)]
[('standardizing', 0.88)]
[('availble', 0.9333333333333333), ('available', 0.875)]
[('available', 0.875), ('availble', 0.9333333333333333)]
[('accessing', 0.9411764705882353)]
[('accessing', 0.9411764705882353)]
[('description', 0.9090909090909091), ('discretion', 0.8571428571428571)]
[('description', 0.9090909090909091), ('discretion', 0.8571428571428571)]
[('variant', 0.9333333333333333)]
[('variant', 0.9333333333333333)]
[('southern', 0.9333333333333333)]
[('southern', 0.9333333333333333)]
[('familes', 0.9230769230769231), ('fails', 0.9090909090909091), ('families', 0.8571428571428571)]
[('families', 0.8571428571428571), ('fails', 0.9090909090909091), ('familes', 0.9230769230769231)]
[('possible', 0.875)]
[('possible', 0.875)]
[('visitors', 0.9333333333333333), ('vistor', 0.9230769230769231), ('visitor', 0.8571428571428571)]
[('visitors', 0.9333333333333333), ('vistor', 0.9230769230769231), ('visitor', 0.8571428571428571)]
[('completely', 0.9473684210526315)]
[('completely', 0.9473684210526315)]
[('leval', 0.9090909090909091)]
[('leval', 0.9090909090909091)]
[('experiences', 0.8571428571428571)]
[('experiences', 0.8571428571428571)]
[('beginning', 0.9411764705882353)]
[('beginning', 0.9411764705882353)]
[('volantary', 0.9411764705882353)]
[('volantary', 0.9411764705882353)]
[('defenitions', 0.9523809523809523), ('definition', 0.9), ('definitions', 0.8571428571428571)]
[('definition', 0.9), ('definitions', 0.8571428571428571), ('defenitions', 0.9523809523809523)]
[('voting', 0.9230769230769231)]
[('voting', 0.9230769230769231)]
[('benifit', 0.9333333333333333), ('benefits', 0.875)]
[('benifit', 0.9333333333333333), ('benefits', 0.875)]
[('auxiliary', 0.8888888888888888)]
[('auxiliary', 0.8888888888888888)]
[('planned', 0.9230769230769231)]
[('planned', 0.9230769230769231)]
[('defenition', 0.9523809523809523), ('definitions', 0.9090909090909091), ('definition', 0.8571428571428571)]
[('definition', 0.8571428571428571), ('definitions', 0.9090909090909091), ('defenition', 0.9523809523809523)]
[('forbidden', 0.9411764705882353)]
[('forbidden', 0.9411764705882353)]
[('comments', 0.9333333333333333)]
[('comments', 0.9333333333333333)]
[('descisions descision', 0.926829268292683)]
[('descisions descision', 0.926829268292683)]
[('supposedly', 0.9)]
[('supposedly', 0.9)]
[('embellishing', 0.9565217391304348)]
[('embellishing', 0.9565217391304348)]
[('technique', 0.9411764705882353), ('tecniques', 0.9411764705882353), ('techniques', 0.8888888888888888)]
[('techniques', 0.8888888888888888), ('technique', 0.9411764705882353), ('tecniques', 0.9411764705882353)]
[('perminant', 0.9)]
[('perminant', 0.9)]
[('confirmation', 0.9166666666666666)]
[('confirmation', 0.9166666666666666)]
[('appointment', 0.9523809523809523), ('appointments', 0.9090909090909091), ('apointments', 0.8571428571428571)]
[('appointment', 0.9523809523809523), ('appointments', 0.9090909090909091), ('apointments', 0.8571428571428571)]
[('progression', 0.9523809523809523)]
[('progression', 0.9523809523809523)]
[('accompanying', 0.9090909090909091)]
[('accompanying', 0.9090909090909091)]
[('applicable', 0.9473684210526315)]
[('applicable', 0.9473684210526315)]
[('regained', 0.9333333333333333)]
[('regained', 0.9333333333333333)]
[('guidelines', 0.9473684210526315)]
[('guidelines', 0.9473684210526315)]
[('surrounding', 0.8571428571428571)]
[('surrounding', 0.8571428571428571)]
[('titles', 0.9230769230769231)]
[('titles', 0.9230769230769231)]
[('unavailable', 0.9523809523809523), ('availble', 0.8888888888888888)]
[('unavailable', 0.9523809523809523), ('availble', 0.8888888888888888)]
[('advantageous', 0.9565217391304348)]
[('advantageous', 0.9565217391304348)]
[('brief', 0.8888888888888888)]
[('brief', 0.8888888888888888)]
[('appeal', 0.9090909090909091)]
[('appeal', 0.9090909090909091)]
[('consisting', 0.9523809523809523)]
[('consisting', 0.9523809523809523)]
[('separation', 0.9)]
[('separation', 0.9)]
[('search', 0.9090909090909091)]
[('search', 0.9090909090909091)]
[('receive', 0.8571428571428571)]
[('receive', 0.8571428571428571)]
[('emploied', 0.875)]
[('emploied', 0.875)]
[('resulting', 0.9411764705882353)]
[('resulting', 0.9411764705882353)]
[('suggestion', 0.9473684210526315)]
[('suggestion', 0.9473684210526315)]
[('opinion', 0.9333333333333333)]
[('opinion', 0.9333333333333333)]
[('cancellation', 0.9565217391304348)]
[('cancellation', 0.9565217391304348)]
[('composed', 0.9411764705882353)]
[('composed', 0.9411764705882353)]
[('useful', 0.9090909090909091)]
[('useful', 0.9090909090909091)]
[('humour', 0.9090909090909091)]
[('humour', 0.9090909090909091)]
[('anomalies', 0.8888888888888888)]
[('anomalies', 0.8888888888888888)]
[('would', 0.9090909090909091)]
[('would', 0.9090909090909091)]
[('examination', 0.9090909090909091)]
[('examination', 0.9090909090909091)]
[('therefore', 0.9411764705882353)]
[('therefore', 0.9411764705882353)]
[('recommend', 0.9411764705882353)]
[('recommend', 0.9411764705882353)]
[('seperate', 0.9411764705882353), ('separated', 0.8888888888888888)]
[('separated', 0.8888888888888888), ('seperate', 0.9411764705882353)]
[('apparent', 0.875)]
[('apparent', 0.875)]
[('occurred', 0.875)]
[('occurred', 0.875)]
[('particulaur', 0.9565217391304348), ('particular', 0.9090909090909091), ('particulary', 0.8695652173913043)]
[('particular', 0.9090909090909091), ('particulary', 0.8695652173913043), ('particulaur', 0.9565217391304348)]
[('pivoting', 0.9333333333333333)]
[('pivoting', 0.9333333333333333)]
[('announcing', 0.9473684210526315)]
[('announcing', 0.9473684210526315)]
[('arrangements', 0.9166666666666666), ('arrangement', 0.8695652173913043)]
[('arrangements', 0.9166666666666666), ('arrangement', 0.8695652173913043)]
[('proportions', 0.9523809523809523)]
[('proportions', 0.9523809523809523)]
[('accept', 0.9090909090909091)]
[('accept', 0.9090909090909091)]
[('dependence', 0.9)]
[('dependence', 0.9)]
[('unequalled', 0.9473684210526315), ('equaled', 0.875)]
[('unequalled', 0.9473684210526315), ('equaled', 0.875)]
[('numbers', 0.9333333333333333)]
[('numbers', 0.9333333333333333)]
[('conversely', 0.9473684210526315)]
[('conversely', 0.9473684210526315)]
[('provide', 0.9230769230769231)]
[('provide', 0.9230769230769231)]
[('arrangement', 0.9523809523809523), ('arragment', 0.9473684210526315), ('arrangements', 0.9090909090909091)]
[('arrangements', 0.9090909090909091), ('arrangement', 0.9523809523809523), ('arragment', 0.9473684210526315)]
[('responsibilities', 0.967741935483871)]
[('responsibilities', 0.967741935483871)]
[('fourth', 0.9090909090909091)]
[('fourth', 0.9090909090909091)]
[('ordinary', 0.875)]
[('ordinary', 0.875)]
[('inconceivable', 0.9230769230769231)]
[('inconceivable', 0.9230769230769231)]
[('data', 0.8888888888888888)]
[('data', 0.8888888888888888)]
[('register', 0.9333333333333333)]
[('register', 0.9333333333333333)]
[('supervision', 0.9523809523809523)]
[('supervision', 0.9523809523809523)]
[('encompassing', 0.9565217391304348)]
[('encompassing', 0.9565217391304348)]
[('negligible', 0.9)]
[('negligible', 0.9)]
[('allow', 0.8888888888888888)]
[('allow', 0.8888888888888888)]
[('operations', 0.9473684210526315)]
[('operations', 0.9473684210526315)]
[('executed', 0.9411764705882353)]
[('executed', 0.9411764705882353)]
[('interpretation', 0.9285714285714286)]
[('interpretation', 0.9285714285714286)]
[('years', 0.9090909090909091)]
[('years', 0.9090909090909091)]
[('throuout', 0.8571428571428571)]
[('throuout', 0.8571428571428571)]
[('committee', 0.9411764705882353), ('committees', 0.8888888888888888), ('comittee', 0.875), ('commit', 0.8571428571428571)]
[('committee', 0.9411764705882353), ('committees', 0.8888888888888888), ('comittee', 0.875), ('commit', 0.8571428571428571)]
[('before', 0.9090909090909091)]
[('before', 0.9090909090909091)]
[('interesting', 0.9523809523809523)]
[('interesting', 0.9523809523809523)]
[('perminantly', 0.9)]
[('perminantly', 0.9)]
[('choose', 0.9090909090909091), ('choise', 0.9090909090909091)]
[('choose', 0.9090909090909091), ('choise', 0.9090909090909091)]
[('virtually', 0.8888888888888888)]
[('virtually', 0.8888888888888888)]
[('correspondence', 0.9285714285714286)]
[('correspondence', 0.9285714285714286)]
[('eventually', 0.9473684210526315)]
[('eventually', 0.9473684210526315)]
[('desperately', 0.9)]
[('desperately', 0.9)]
[('university', 0.9473684210526315)]
[('university', 0.9473684210526315)]
[('adjournment', 0.9523809523809523)]
[('adjournment', 0.9523809523809523)]
[('possibilities', 0.88)]
[('possibilities', 0.88)]
[('stopped', 0.9230769230769231)]
[('stopped', 0.9230769230769231)]
[('meens', 0.8888888888888888)]
[('meens', 0.8888888888888888)]
[('adequately', 0.9)]
[('adequately', 0.9)]
[('shown', 0.8888888888888888)]
[('shown', 0.8888888888888888)]
[('matrix', 0.8571428571428571)]
[('matrix', 0.8571428571428571)]
[('proffits', 0.9333333333333333), ('profit', 0.9230769230769231), ('profits', 0.8571428571428571)]
[('profit', 0.9230769230769231), ('proffits', 0.9333333333333333), ('profits', 0.8571428571428571)]
[('encourage', 0.9411764705882353)]
[('encourage', 0.9411764705882353)]
[('collate', 0.9230769230769231)]
[('collate', 0.9230769230769231)]
[('proviso', 0.9333333333333333)]
[('proviso', 0.9333333333333333)]
[('approached', 0.9473684210526315), ('aproach', 0.875)]
[('approached', 0.9473684210526315), ('aproach', 0.875)]
[('difficulty', 0.9473684210526315), ('dificult', 0.9411764705882353), ('difficult', 0.8888888888888888)]
[('difficult', 0.8888888888888888), ('difficulty', 0.9473684210526315), ('dificult', 0.9411764705882353)]
[('appointments', 0.9565217391304348), ('appointment', 0.9090909090909091), ('appoitment', 0.8571428571428571)]
[('appointment', 0.9090909090909091), ('appointments', 0.9565217391304348), ('appoitment', 0.8571428571428571)]
[('base', 0.8888888888888888)]
[('base', 0.8888888888888888)]
[('conditioning', 0.9565217391304348), ('containing', 0.8571428571428571)]
[('containing', 0.8571428571428571), ('conditioning', 0.9565217391304348)]
[('earliest', 0.875)]
[('earliest', 0.875)]
[('beginning', 0.9411764705882353)]
[('beginning', 0.9411764705882353)]
[('universally', 0.9523809523809523)]
[('universally', 0.9523809523809523)]
[('unresolved', 0.9)]
[('unresolved', 0.9)]
[('length', 0.9090909090909091)]
[('length', 0.9090909090909091)]
[('exponentially', 0.88)]
[('exponentially', 0.88)]
[('system', 0.9090909090909091)]
[('system', 0.9090909090909091)]
[('approximately', 0.9166666666666666)]
[('approximately', 0.9166666666666666)]
[('their', 0.8888888888888888), ('there', 0.8888888888888888), ('thear', 0.8888888888888888), ('the', 0.8571428571428571)]
[('the', 0.8571428571428571), ('their', 0.8888888888888888), ('there', 0.8888888888888888), ('thear', 0.8888888888888888)]
[('speaking', 0.875)]
[('speaking', 0.875)]
[('repetitive', 0.9)]
[('repetitive', 0.9)]
[('exactly', 0.9333333333333333)]
[('exactly', 0.9333333333333333)]
[('immediate', 0.9411764705882353)]
[('immediate', 0.9411764705882353)]
[('appreciation', 0.9565217391304348)]
[('appreciation', 0.9565217391304348)]
[('eliminated', 0.9473684210526315)]
[('eliminated', 0.9473684210526315)]
[('believe', 0.9230769230769231)]
[('believe', 0.9230769230769231)]
[('appreciated', 0.9523809523809523)]
[('appreciated', 0.9523809523809523)]
[('readjusted', 0.9473684210526315)]
[('readjusted', 0.9473684210526315)]
[('feeling', 0.8571428571428571)]
[('feeling', 0.8571428571428571)]
[('false', 0.9090909090909091)]
[('false', 0.9090909090909091)]
[('seen', 0.8888888888888888)]
[('seen', 0.8888888888888888)]
[('interrogating', 0.9230769230769231)]
[('interrogating', 0.9230769230769231)]
[('academically', 0.9090909090909091)]
[('academically', 0.9090909090909091)]
[('traditionally', 0.96)]
[('traditionally', 0.96)]
[('studying', 0.9333333333333333)]
[('studying', 0.9333333333333333)]
[('majority', 0.9333333333333333)]
[('majority', 0.9333333333333333)]
[('aggravating', 0.9523809523809523)]
[('aggravating', 0.9523809523809523)]
[('transactions', 0.9565217391304348)]
[('transactions', 0.9565217391304348)]
[('arguing', 0.9333333333333333)]
[('arguing', 0.9333333333333333)]
[('sheets', 0.8571428571428571)]
[('sheets', 0.8571428571428571)]
[('extremely', 0.8888888888888888), ('extreamly', 0.8888888888888888)]
[('extremely', 0.8888888888888888), ('extreamly', 0.8888888888888888)]
[('later', 0.9090909090909091)]
[('later', 0.9090909090909091)]
[('senior', 0.9230769230769231)]
[('senior', 0.9230769230769231)]
[('dragged', 0.9230769230769231)]
[('dragged', 0.9230769230769231)]
[('atmosphere', 0.9473684210526315)]
[('atmosphere', 0.9473684210526315)]
[('drastically', 0.9523809523809523)]
[('drastically', 0.9523809523809523)]
[('particularly', 0.9565217391304348), ('particular', 0.9523809523809523), ('particulaur', 0.9090909090909091), ('paerticulaur', 0.8695652173913043)]
[('particular', 0.9523809523809523), ('particularly', 0.9565217391304348), ('particulaur', 0.9090909090909091), ('paerticulaur', 0.8695652173913043)]
[('visitor', 0.9230769230769231), ('vistors', 0.9230769230769231), ('visitors', 0.8571428571428571)]
[('visitors', 0.8571428571428571), ('visitor', 0.9230769230769231), ('vistors', 0.9230769230769231)]
[('session', 0.9230769230769231)]
[('session', 0.9230769230769231)]
[('continually', 0.9)]
[('continually', 0.9)]
[('availability', 0.8571428571428571)]
[('availability', 0.8571428571428571)]
[('busy', 0.8888888888888888)]
[('busy', 0.8888888888888888)]
[('employed', 0.875), ('emploies', 0.875)]
[('employed', 0.875), ('emploies', 0.875)]
[('adequate', 0.875)]
[('adequate', 0.875)]
[('meen', 0.8888888888888888)]
[('meen', 0.8888888888888888)]
[('familes', 0.8571428571428571)]
[('familes', 0.8571428571428571)]
[('beetween', 0.9333333333333333), ('between', 0.8571428571428571)]
[('between', 0.8571428571428571), ('beetween', 0.9333333333333333)]
[('overall', 0.9230769230769231)]
[('overall', 0.9230769230769231)]
[('timing', 0.9230769230769231)]
[('timing', 0.9230769230769231)]
[('econometric', 0.9523809523809523)]
[('econometric', 0.9523809523809523)]
[('erroneous', 0.8888888888888888)]
[('erroneous', 0.8888888888888888)]
[('descide', 0.9333333333333333), ('decides', 0.9333333333333333), ('descided', 0.875), ('decide', 0.8571428571428571)]
[('decide', 0.8571428571428571), ('descide', 0.9333333333333333), ('decides', 0.9333333333333333), ('descided', 0.875)]
[('intelligence', 0.9565217391304348)]
[('intelligence', 0.9565217391304348)]
[('are', 0.8571428571428571)]
[('are', 0.8571428571428571)]
[('apologies', 0.9473684210526315), ('appologised', 0.8571428571428571)]
[('apologies', 0.9473684210526315), ('appologised', 0.8571428571428571)]
[('techniques', 0.9473684210526315), ('tecnique', 0.9411764705882353), ('technique', 0.8888888888888888)]
[('techniques', 0.9473684210526315), ('technique', 0.8888888888888888), ('tecnique', 0.9411764705882353)]
[('volantry', 0.9411764705882353), ('voluntary', 0.8888888888888888)]
[('voluntary', 0.8888888888888888), ('volantry', 0.9411764705882353)]
[('currently', 0.8888888888888888)]
[('currently', 0.8888888888888888)]
[('forecast', 0.9333333333333333)]
[('forecast', 0.9333333333333333)]
[('weapons', 0.9230769230769231)]
[('weapons', 0.9230769230769231)]
[('neither', 0.8571428571428571)]
[('neither', 0.8571428571428571)]
[('approach', 0.9333333333333333), ('aproached', 0.875)]
[('approach', 0.9333333333333333), ('aproached', 0.875)]
[('available', 0.9411764705882353), ('avaible', 0.9333333333333333), ('unavailble', 0.8888888888888888)]
[('available', 0.9411764705882353), ('avaible', 0.9333333333333333), ('unavailble', 0.8888888888888888)]
[('recently', 0.9411764705882353)]
[('recently', 0.9411764705882353)]
[('ability', 0.9230769230769231)]
[('ability', 0.9230769230769231)]
[('component', 0.8888888888888888)]
[('component', 0.8888888888888888)]
[('agencies', 0.9333333333333333)]
[('agencies', 0.9333333333333333)]
[('however', 0.9333333333333333)]
[('however', 0.9333333333333333)]
[('suggested', 0.9411764705882353)]
[('suggested', 0.9411764705882353)]
[('annual', 0.9090909090909091)]
[('annual', 0.9090909090909091)]
[('according', 0.9411764705882353)]
[('according', 0.9411764705882353)]
[('expense', 0.8571428571428571)]
[('expense', 0.8571428571428571)]
[('table', 0.9090909090909091)]
[('table', 0.9090909090909091)]
[('throughout', 0.8888888888888888), ('throut', 0.8571428571428571)]
[('throughout', 0.8888888888888888), ('throut', 0.8571428571428571)]
[('conference', 0.9)]
[('conference', 0.9)]
[('sensible', 0.875)]
[('sensible', 0.875)]
[('union', 0.9090909090909091)]
[('union', 0.9090909090909091)]
[('interest', 0.9333333333333333)]
[('interest', 0.9333333333333333)]
[('flexible', 0.875)]
[('flexible', 0.875)]
[('refered', 0.9333333333333333)]
[('refered', 0.9333333333333333)]
[('familes', 0.8571428571428571)]
[('familes', 0.8571428571428571)]
[('sufficient', 0.9473684210526315)]
[('sufficient', 0.9473684210526315)]
[('adaptable', 0.8888888888888888)]
[('adaptable', 0.8888888888888888)]
[('representative', 0.9285714285714286)]
[('representative', 0.9285714285714286)]
[('applied', 0.8571428571428571)]
[('applied', 0.8571428571428571)]
[('apologised', 0.9523809523809523), ('appologies', 0.8571428571428571)]
[('apologised', 0.9523809523809523), ('appologies', 0.8571428571428571)]
[('choise', 0.9230769230769231), ('choices', 0.8571428571428571)]
[('choices', 0.8571428571428571), ('choise', 0.9230769230769231)]
[('will', 0.8571428571428571)]
[('will', 0.8571428571428571)]
[('procedure', 0.8888888888888888)]
[('procedure', 0.8888888888888888)]
[('shortened', 0.9411764705882353)]
[('shortened', 0.9411764705882353)]
[('manually', 0.9333333333333333)]
[('manually', 0.9333333333333333)]
[('disappointing', 0.88)]
[('disappointing', 0.88)]
[('excessively', 0.9523809523809523)]
[('excessively', 0.9523809523809523)]
[('containing', 0.8888888888888888)]
[('containing', 0.8888888888888888)]
[('develop', 0.9333333333333333)]
[('develop', 0.9333333333333333)]
[('credit', 0.9230769230769231)]
[('credit', 0.9230769230769231)]
[('government', 0.9473684210526315)]
[('government', 0.9473684210526315)]
[('orientated', 0.9473684210526315)]
[('orientated', 0.9473684210526315)]
[('widely', 0.9090909090909091)]
[('widely', 0.9090909090909091)]
[('difficult', 0.9411764705882353), ('dificulty', 0.9411764705882353), ('difficulty', 0.8888888888888888)]
[('difficult', 0.9411764705882353), ('difficulty', 0.8888888888888888), ('dificulty', 0.9411764705882353)]
[('investigated', 0.9166666666666666)]
[('investigated', 0.9166666666666666)]
[('conceived', 0.8888888888888888)]
[('conceived', 0.8888888888888888)]
[('nationally', 0.9473684210526315)]
[('nationally', 0.9473684210526315)]
[('moving', 0.9230769230769231)]
[('moving', 0.9230769230769231)]
[('equalled', 0.9333333333333333), ('unequaled', 0.875)]
[('equalled', 0.9333333333333333), ('unequaled', 0.875)]
[('scrutinized', 0.8695652173913043)]
[('scrutinized', 0.8695652173913043)]
[('patterns', 0.875)]
[('patterns', 0.875)]
[('qualities', 0.875)]
[('qualities', 0.875)]
[('corporate', 0.8888888888888888)]
[('corporate', 0.8888888888888888)]
[('enormously', 0.8888888888888888)]
[('enormously', 0.8888888888888888)]
[('financially', 0.9523809523809523)]
[('financially', 0.9523809523809523)]
[('functionally', 0.9565217391304348)]
[('functionally', 0.9565217391304348)]
[('discipline', 0.8888888888888888)]
[('discipline', 0.8888888888888888)]
[('announcement', 0.9565217391304348)]
[('announcement', 0.9565217391304348)]
[('progresses', 0.9523809523809523)]
[('progresses', 0.9523809523809523)]
[('except', 0.9230769230769231)]
[('except', 0.9230769230769231)]
[('recommending', 0.9565217391304348)]
[('recommending', 0.9565217391304348)]
[('mathematically', 0.9629629629629629)]
[('mathematically', 0.9629629629629629)]
[('sorces', 0.9090909090909091), ('source', 0.9090909090909091)]
[('source', 0.9090909090909091), ('sorces', 0.9090909090909091)]
[('combine', 0.9333333333333333)]
[('combine', 0.9333333333333333)]
[('input', 0.8888888888888888)]
[('input', 0.8888888888888888)]
[('resolved', 0.9333333333333333)]
[('resolved', 0.9333333333333333)]
[('demands', 0.9333333333333333)]
[('demands', 0.9333333333333333)]
[('unequivocally', 0.96)]
[('unequivocally', 0.96)]
[('suffering', 0.8888888888888888)]
[('suffering', 0.8888888888888888)]
[('accepted', 0.9333333333333333)]
[('accepted', 0.9333333333333333)]
[('projects', 0.9411764705882353)]
[('projects', 0.9411764705882353)]
[('journalism', 0.9473684210526315)]
[('journalism', 0.9473684210526315)]
[('output', 0.9230769230769231)]
[('output', 0.9230769230769231)]
[('security', 0.9333333333333333)]
[('security', 0.9333333333333333)]
[('essential', 0.9411764705882353)]
[('essential', 0.9411764705882353)]
[('requested', 0.9411764705882353)]
[('requested', 0.9411764705882353)]
[('supplementary', 0.96)]
[('supplementary', 0.96)]
[('questionaire', 0.9565217391304348), ('questionnaire', 0.9166666666666666)]
[('questionnaire', 0.9166666666666666), ('questionaire', 0.9565217391304348)]
[('employment', 0.9)]
[('employment', 0.9)]
[('proceeding', 0.9473684210526315)]
[('proceeding', 0.9473684210526315)]
[('deciscions descisions', 0.926829268292683)]
[('deciscions descisions', 0.926829268292683)]
[('per', 0.8571428571428571)]
[('per', 0.8571428571428571)]
[('discretion', 0.9)]
[('discretion', 0.9)]
[('reaching', 0.9333333333333333)]
[('reaching', 0.9333333333333333)]
[('analysed', 0.875)]
[('analysed', 0.875)]
[('expansion', 0.9411764705882353)]
[('expansion', 0.9411764705882353)]
[('although', 0.9333333333333333), ('through', 0.8571428571428571)]
[('through', 0.8571428571428571), ('although', 0.9333333333333333)]
[('subtract', 0.875)]
[('subtract', 0.875)]
[('analysing', 0.9411764705882353)]
[('analysing', 0.9411764705882353)]
[('comparison', 0.9523809523809523)]
[('comparison', 0.9523809523809523)]
[('months', 0.9230769230769231)]
[('months', 0.9230769230769231)]
[('hierarchal', 0.9)]
[('hierarchal', 0.9)]
[('misleading', 0.9523809523809523)]
[('misleading', 0.9523809523809523)]
[('commit', 0.9090909090909091)]
[('commit', 0.9090909090909091)]
[('auguments', 0.8888888888888888)]
[('auguments', 0.8888888888888888)]
[('within', 0.9230769230769231)]
[('within', 0.9230769230769231)]
[('accounts', 0.9333333333333333), ('acount', 0.9230769230769231), ('account', 0.8571428571428571)]
[('account', 0.8571428571428571), ('accounts', 0.9333333333333333), ('acount', 0.9230769230769231)]
[('primarily', 0.9411764705882353)]
[('primarily', 0.9411764705882353)]
[('operator', 0.9333333333333333)]
[('operator', 0.9333333333333333)]
[('accumulated', 0.9523809523809523)]
[('accumulated', 0.9523809523809523)]
[('segment', 0.8571428571428571)]
[('segment', 0.8571428571428571)]
[('ther', 0.8888888888888888)]
[('ther', 0.8888888888888888)]
[('summarys', 0.9333333333333333)]
[('summarys', 0.9333333333333333)]
[('understandable', 0.9629629629629629)]
[('understandable', 0.9629629629629629)]
[('safeguard', 0.8888888888888888)]
[('safeguard', 0.8888888888888888)]
[('consist', 0.9333333333333333)]
[('consist', 0.9333333333333333)]
[('declarations', 0.96)]
[('declarations', 0.96)]
[('associated', 0.9)]
[('associated', 0.9)]
[('accessibility', 0.9230769230769231)]
[('accessibility', 0.9230769230769231)]
[('examine', 0.9230769230769231)]
[('examine', 0.9230769230769231)]
[('politics', 0.875)]
[('politics', 0.875)]
[('annoying', 0.9333333333333333)]
[('annoying', 0.9333333333333333)]
[('accessing', 0.9411764705882353)]
[('accessing', 0.9411764705882353)]
[('ideally', 0.9230769230769231)]
[('ideally', 0.9230769230769231)]
[('simular', 0.8571428571428571)]
[('simular', 0.8571428571428571)]
[('personnel', 0.9411764705882353), ('personnell', 0.8888888888888888)]
[('personnel', 0.9411764705882353), ('personnell', 0.8888888888888888)]
[('whereas', 0.9230769230769231)]
[('whereas', 0.9230769230769231)]
[('when', 0.8571428571428571)]
[('when', 0.8571428571428571)]
[('geographically', 0.8888888888888888), ('graphicaly', 0.8695652173913043)]
[('geographically', 0.8888888888888888), ('graphicaly', 0.8695652173913043)]
[('gaining', 0.9230769230769231)]
[('gaining', 0.9230769230769231)]
[('explaining', 0.9473684210526315)]
[('explaining', 0.9473684210526315)]
[('separate', 0.875), ('seperate', 0.875)]
[('separate', 0.875), ('seperate', 0.875)]
[('students', 0.9333333333333333)]
[('students', 0.9333333333333333)]
[('prepared', 0.9411764705882353)]
[('prepared', 0.9411764705882353)]
[('generated', 0.9473684210526315)]
[('generated', 0.9473684210526315)]
[('graphically', 0.9523809523809523), ('goegraphicaly', 0.8695652173913043)]
[('graphically', 0.9523809523809523), ('goegraphicaly', 0.8695652173913043)]
[('suited', 0.9090909090909091)]
[('suited', 0.9090909090909091)]
[('building', 0.875)]
[('building', 0.875)]
[('controlled', 0.9473684210526315)]
[('controlled', 0.9473684210526315)]
[('required', 0.9411764705882353)]
[('required', 0.9411764705882353)]
[('together', 0.875)]
[('together', 0.875)]
[('profits', 0.9333333333333333), ('proffit', 0.9333333333333333), ('profit', 0.8571428571428571)]
[('profit', 0.8571428571428571), ('profits', 0.9333333333333333), ('proffit', 0.9333333333333333)]
{'min_Count': 10, 'nb_sent': 10000, 'freq_ratio': 10, 'chgram': 2, 'sim_thres': 0.85} hits = "325" precision = "0.905292479109" recall = "0.691489361702"
[('locally', 0.9230769230769231)]
[('locally', 0.9230769230769231)]
[('useful', 0.9230769230769231)]
[('useful', 0.9230769230769231)]
[('consider', 0.875)]
[('consider', 0.875)]
[('triangular', 0.9523809523809523)]
[('triangular', 0.9523809523809523)]
[('hierarchy', 0.875)]
[('hierarchy', 0.875)]
[('arrangeing', 0.8888888888888888)]
[('arrangeing', 0.8888888888888888)]
[('sources', 0.9230769230769231), ('sorce', 0.9090909090909091)]
[('sources', 0.9230769230769231), ('sorce', 0.9090909090909091)]
[('committee', 0.9411764705882353), ('committees', 0.8888888888888888), ('committe', 0.875)]
[('committee', 0.9411764705882353), ('committees', 0.8888888888888888), ('committe', 0.875)]
[('transportability', 0.9375)]
[('transportability', 0.9375)]
[('minuscule', 0.8888888888888888)]
[('minuscule', 0.8888888888888888)]
[('diagrammatically', 0.9696969696969697)]
[('diagrammatically', 0.9696969696969697)]
[('management', 0.9473684210526315)]
[('management', 0.9473684210526315)]
[('singular', 0.9411764705882353)]
[('singular', 0.9411764705882353)]
[('extreemly', 0.8888888888888888), ('extremely', 0.8888888888888888)]
[('extremely', 0.8888888888888888), ('extreemly', 0.8888888888888888)]
[('initial', 0.9230769230769231), ('initials', 0.8571428571428571)]
[('initial', 0.9230769230769231), ('initials', 0.8571428571428571)]
[('pronunciation', 0.9629629629629629)]
[('pronunciation', 0.9629629629629629)]
[('totally', 0.9230769230769231)]
[('totally', 0.9230769230769231)]
[('centrally', 0.9411764705882353)]
[('centrally', 0.9411764705882353)]
[('meant', 0.8888888888888888)]
[('meant', 0.8888888888888888)]
[('someone', 0.9230769230769231)]
[('someone', 0.9230769230769231)]
[('families', 0.9333333333333333), ('failes', 0.9230769230769231), ('familer', 0.8571428571428571), ('familys', 0.8571428571428571)]
[('families', 0.9333333333333333), ('failes', 0.9230769230769231), ('familer', 0.8571428571428571), ('familys', 0.8571428571428571)]
[('their', 0.8888888888888888), ('there', 0.8888888888888888), ('thear', 0.8888888888888888), ('the', 0.8571428571428571)]
[('the', 0.8571428571428571), ('their', 0.8888888888888888), ('there', 0.8888888888888888), ('thear', 0.8888888888888888)]
[('february', 0.9333333333333333)]
[('february', 0.9333333333333333)]
[('extended', 0.875)]
[('extended', 0.875)]
[('choises', 0.9230769230769231), ('chose', 0.9090909090909091)]
[('chose', 0.9090909090909091), ('choises', 0.9230769230769231)]
[('basically', 0.9411764705882353)]
[('basically', 0.9411764705882353)]
[('descided', 0.9333333333333333), ('descides', 0.9333333333333333), ('decide', 0.9230769230769231), ('decides', 0.8571428571428571), ('decided', 0.8571428571428571)]
[('decided', 0.8571428571428571), ('decide', 0.9230769230769231), ('descided', 0.9333333333333333), ('descides', 0.9333333333333333), ('decides', 0.8571428571428571)]
[('paerticulaur', 0.9565217391304348), ('particular', 0.9523809523809523), ('particulary', 0.9090909090909091), ('particularly', 0.8695652173913043)]
[('particular', 0.9523809523809523), ('particularly', 0.8695652173913043), ('particulary', 0.9090909090909091), ('paerticulaur', 0.9565217391304348)]
[('considerable', 0.9166666666666666)]
[('considerable', 0.9166666666666666)]
[('articles', 0.875)]
[('articles', 0.875)]
[('account', 0.9230769230769231), ('acounts', 0.9230769230769231), ('accounts', 0.8571428571428571)]
[('account', 0.9230769230769231), ('acounts', 0.9230769230769231), ('accounts', 0.8571428571428571)]
[('unfortunately', 0.96)]
[('unfortunately', 0.96)]
[('variable', 0.9333333333333333)]
[('variable', 0.9333333333333333)]
[('whether', 0.9230769230769231)]
[('whether', 0.9230769230769231)]
[('levals', 0.9090909090909091)]
[('levals', 0.9090909090909091)]
[('transferred', 0.9)]
[('transferred', 0.9)]
[('receive', 0.8571428571428571)]
[('receive', 0.8571428571428571)]
[('benifits', 0.9333333333333333), ('benefit', 0.8571428571428571)]
[('benefit', 0.8571428571428571), ('benifits', 0.9333333333333333)]
[('addressable', 0.9523809523809523)]
[('addressable', 0.9523809523809523)]
[('cake', 0.8571428571428571)]
[('cake', 0.8571428571428571)]
[('compare', 0.8571428571428571)]
[('compare', 0.8571428571428571)]
[('certain', 0.8571428571428571)]
[('certain', 0.8571428571428571)]
[('hierarchal', 0.9)]
[('hierarchal', 0.9)]
[('descide', 0.9333333333333333), ('decided', 0.9333333333333333), ('descides', 0.875), ('decide', 0.8571428571428571)]
[('decided', 0.9333333333333333), ('decide', 0.8571428571428571), ('descide', 0.9333333333333333), ('descides', 0.875)]
[('choosing', 0.9333333333333333)]
[('choosing', 0.9333333333333333)]
[('further', 0.9230769230769231)]
[('further', 0.9230769230769231)]
[('questionnaire', 0.96), ('questionare', 0.9565217391304348)]
[('questionnaire', 0.96), ('questionare', 0.9565217391304348)]
[('different', 0.9411764705882353)]
[('different', 0.9411764705882353)]
[('clerical', 0.9411764705882353)]
[('clerical', 0.9411764705882353)]
[('monitoring', 0.9)]
[('monitoring', 0.9)]
[('position', 0.9411764705882353)]
[('position', 0.9411764705882353)]
[('perhaps', 0.9333333333333333)]
[('perhaps', 0.9333333333333333)]
[('personnel', 0.9473684210526315), ('personel', 0.8888888888888888)]
[('personnel', 0.9473684210526315), ('personel', 0.8888888888888888)]
[('seperated', 0.9411764705882353), ('separate', 0.875), ('seporate', 0.875)]
[('separate', 0.875), ('seperated', 0.9411764705882353), ('seporate', 0.875)]
[('arrangment', 0.9473684210526315), ('arrangement', 0.9), ('arrangements', 0.8571428571428571)]
[('arrangements', 0.8571428571428571), ('arrangment', 0.9473684210526315), ('arrangement', 0.9)]
[('access', 0.9090909090909091)]
[('access', 0.9090909090909091)]
[('various', 0.9333333333333333)]
[('various', 0.9333333333333333)]
[('between', 0.9333333333333333), ('beeteen', 0.9333333333333333)]
[('between', 0.9333333333333333), ('beeteen', 0.9333333333333333)]
[('standardizing', 0.88)]
[('standardizing', 0.88)]
[('availble', 0.9333333333333333), ('available', 0.875)]
[('available', 0.875), ('availble', 0.9333333333333333)]
[('accessing', 0.9411764705882353)]
[('accessing', 0.9411764705882353)]
[('description', 0.9090909090909091), ('discretion', 0.8571428571428571)]
[('description', 0.9090909090909091), ('discretion', 0.8571428571428571)]
[('variant', 0.9333333333333333)]
[('variant', 0.9333333333333333)]
[('southern', 0.9333333333333333)]
[('southern', 0.9333333333333333)]
[('familes', 0.9230769230769231), ('fails', 0.9090909090909091), ('families', 0.8571428571428571)]
[('families', 0.8571428571428571), ('fails', 0.9090909090909091), ('familes', 0.9230769230769231)]
[('possible', 0.875)]
[('possible', 0.875)]
[('visitors', 0.9333333333333333), ('vistor', 0.9230769230769231), ('visitor', 0.8571428571428571)]
[('visitors', 0.9333333333333333), ('vistor', 0.9230769230769231), ('visitor', 0.8571428571428571)]
[('completely', 0.9473684210526315)]
[('completely', 0.9473684210526315)]
[('leval', 0.9090909090909091)]
[('leval', 0.9090909090909091)]
[('experiences', 0.8571428571428571)]
[('experiences', 0.8571428571428571)]
[('beginning', 0.9411764705882353)]
[('beginning', 0.9411764705882353)]
[('volantary', 0.9411764705882353)]
[('volantary', 0.9411764705882353)]
[('defenitions', 0.9523809523809523), ('definition', 0.9), ('definitions', 0.8571428571428571)]
[('definition', 0.9), ('definitions', 0.8571428571428571), ('defenitions', 0.9523809523809523)]
[('voting', 0.9230769230769231)]
[('voting', 0.9230769230769231)]
[('benifit', 0.9333333333333333), ('benefits', 0.875)]
[('benifit', 0.9333333333333333), ('benefits', 0.875)]
[('auxiliary', 0.8888888888888888)]
[('auxiliary', 0.8888888888888888)]
[('planned', 0.9230769230769231)]
[('planned', 0.9230769230769231)]
[('defenition', 0.9523809523809523), ('definitions', 0.9090909090909091), ('definition', 0.8571428571428571)]
[('definition', 0.8571428571428571), ('definitions', 0.9090909090909091), ('defenition', 0.9523809523809523)]
[('forbidden', 0.9411764705882353)]
[('forbidden', 0.9411764705882353)]
[('comments', 0.9333333333333333)]
[('comments', 0.9333333333333333)]
[('descisions descision', 0.926829268292683)]
[('descisions descision', 0.926829268292683)]
[('supposedly', 0.9)]
[('supposedly', 0.9)]
[('embellishing', 0.9565217391304348)]
[('embellishing', 0.9565217391304348)]
[('technique', 0.9411764705882353), ('tecniques', 0.9411764705882353), ('techniques', 0.8888888888888888)]
[('techniques', 0.8888888888888888), ('technique', 0.9411764705882353), ('tecniques', 0.9411764705882353)]
[('perminant', 0.9)]
[('perminant', 0.9)]
[('confirmation', 0.9166666666666666)]
[('confirmation', 0.9166666666666666)]
[('appointment', 0.9523809523809523), ('appointments', 0.9090909090909091), ('apointments', 0.8571428571428571)]
[('appointment', 0.9523809523809523), ('appointments', 0.9090909090909091), ('apointments', 0.8571428571428571)]
[('progression', 0.9523809523809523)]
[('progression', 0.9523809523809523)]
[('accompanying', 0.9090909090909091)]
[('accompanying', 0.9090909090909091)]
[('applicable', 0.9473684210526315)]
[('applicable', 0.9473684210526315)]
[('regained', 0.9333333333333333)]
[('regained', 0.9333333333333333)]
[('guidelines', 0.9473684210526315)]
[('guidelines', 0.9473684210526315)]
[('surrounding', 0.8571428571428571)]
[('surrounding', 0.8571428571428571)]
[('titles', 0.9230769230769231)]
[('titles', 0.9230769230769231)]
[('unavailable', 0.9523809523809523), ('availble', 0.8888888888888888)]
[('unavailable', 0.9523809523809523), ('availble', 0.8888888888888888)]
[('advantageous', 0.9565217391304348)]
[('advantageous', 0.9565217391304348)]
[('brief', 0.8888888888888888)]
[('brief', 0.8888888888888888)]
[('appeal', 0.9090909090909091)]
[('appeal', 0.9090909090909091)]
[('consisting', 0.9523809523809523)]
[('consisting', 0.9523809523809523)]
[('separation', 0.9)]
[('separation', 0.9)]
[('search', 0.9090909090909091)]
[('search', 0.9090909090909091)]
[('receive', 0.8571428571428571)]
[('receive', 0.8571428571428571)]
[('emploied', 0.875)]
[('emploied', 0.875)]
[('resulting', 0.9411764705882353)]
[('resulting', 0.9411764705882353)]
[('suggestion', 0.9473684210526315)]
[('suggestion', 0.9473684210526315)]
[('opinion', 0.9333333333333333)]
[('opinion', 0.9333333333333333)]
[('cancellation', 0.9565217391304348)]
[('cancellation', 0.9565217391304348)]
[('composed', 0.9411764705882353)]
[('composed', 0.9411764705882353)]
[('useful', 0.9090909090909091)]
[('useful', 0.9090909090909091)]
[('humour', 0.9090909090909091)]
[('humour', 0.9090909090909091)]
[('anomalies', 0.8888888888888888)]
[('anomalies', 0.8888888888888888)]
[('would', 0.9090909090909091)]
[('would', 0.9090909090909091)]
[('examination', 0.9090909090909091)]
[('examination', 0.9090909090909091)]
[('therefore', 0.9411764705882353)]
[('therefore', 0.9411764705882353)]
[('recommend', 0.9411764705882353)]
[('recommend', 0.9411764705882353)]
[('seperate', 0.9411764705882353), ('separated', 0.8888888888888888)]
[('separated', 0.8888888888888888), ('seperate', 0.9411764705882353)]
[('apparent', 0.875)]
[('apparent', 0.875)]
[('occurred', 0.875)]
[('occurred', 0.875)]
[('particulaur', 0.9565217391304348), ('particular', 0.9090909090909091), ('particulary', 0.8695652173913043)]
[('particular', 0.9090909090909091), ('particulary', 0.8695652173913043), ('particulaur', 0.9565217391304348)]
[('pivoting', 0.9333333333333333)]
[('pivoting', 0.9333333333333333)]
[('announcing', 0.9473684210526315)]
[('announcing', 0.9473684210526315)]
[('arrangements', 0.9166666666666666), ('arrangement', 0.8695652173913043)]
[('arrangements', 0.9166666666666666), ('arrangement', 0.8695652173913043)]
[('proportions', 0.9523809523809523)]
[('proportions', 0.9523809523809523)]
[('accept', 0.9090909090909091)]
[('accept', 0.9090909090909091)]
[('dependence', 0.9)]
[('dependence', 0.9)]
[('unequalled', 0.9473684210526315), ('equaled', 0.875)]
[('unequalled', 0.9473684210526315), ('equaled', 0.875)]
[('numbers', 0.9333333333333333)]
[('numbers', 0.9333333333333333)]
[('conversely', 0.9473684210526315)]
[('conversely', 0.9473684210526315)]
[('provide', 0.9230769230769231)]
[('provide', 0.9230769230769231)]
[('arrangement', 0.9523809523809523), ('arragment', 0.9473684210526315), ('arrangements', 0.9090909090909091)]
[('arrangements', 0.9090909090909091), ('arrangement', 0.9523809523809523), ('arragment', 0.9473684210526315)]
[('responsibilities', 0.967741935483871)]
[('responsibilities', 0.967741935483871)]
[('fourth', 0.9090909090909091)]
[('fourth', 0.9090909090909091)]
[('ordinary', 0.875)]
[('ordinary', 0.875)]
[('inconceivable', 0.9230769230769231)]
[('inconceivable', 0.9230769230769231)]
[('data', 0.8888888888888888)]
[('data', 0.8888888888888888)]
[('register', 0.9333333333333333)]
[('register', 0.9333333333333333)]
[('supervision', 0.9523809523809523)]
[('supervision', 0.9523809523809523)]
[('encompassing', 0.9565217391304348)]
[('encompassing', 0.9565217391304348)]
[('negligible', 0.9)]
[('negligible', 0.9)]
[('allow', 0.8888888888888888)]
[('allow', 0.8888888888888888)]
[('operations', 0.9473684210526315)]
[('operations', 0.9473684210526315)]
[('executed', 0.9411764705882353)]
[('executed', 0.9411764705882353)]
[('interpretation', 0.9285714285714286)]
[('interpretation', 0.9285714285714286)]
[('years', 0.9090909090909091)]
[('years', 0.9090909090909091)]
[('throuout', 0.8571428571428571)]
[('throuout', 0.8571428571428571)]
[('committee', 0.9411764705882353), ('committees', 0.8888888888888888), ('comittee', 0.875), ('commit', 0.8571428571428571)]
[('committee', 0.9411764705882353), ('committees', 0.8888888888888888), ('comittee', 0.875), ('commit', 0.8571428571428571)]
[('before', 0.9090909090909091)]
[('before', 0.9090909090909091)]
[('interesting', 0.9523809523809523)]
[('interesting', 0.9523809523809523)]
[('perminantly', 0.9)]
[('perminantly', 0.9)]
[('choose', 0.9090909090909091), ('choise', 0.9090909090909091)]
[('choose', 0.9090909090909091), ('choise', 0.9090909090909091)]
[('virtually', 0.8888888888888888)]
[('virtually', 0.8888888888888888)]
[('correspondence', 0.9285714285714286)]
[('correspondence', 0.9285714285714286)]
[('eventually', 0.9473684210526315)]
[('eventually', 0.9473684210526315)]
[('desperately', 0.9)]
[('desperately', 0.9)]
[('university', 0.9473684210526315)]
[('university', 0.9473684210526315)]
[('adjournment', 0.9523809523809523)]
[('adjournment', 0.9523809523809523)]
[('possibilities', 0.88)]
[('possibilities', 0.88)]
[('stopped', 0.9230769230769231)]
[('stopped', 0.9230769230769231)]
[('meens', 0.8888888888888888)]
[('meens', 0.8888888888888888)]
[('adequately', 0.9)]
[('adequately', 0.9)]
[('shown', 0.8888888888888888)]
[('shown', 0.8888888888888888)]
[('matrix', 0.8571428571428571)]
[('matrix', 0.8571428571428571)]
[('proffits', 0.9333333333333333), ('profit', 0.9230769230769231), ('profits', 0.8571428571428571)]
[('profit', 0.9230769230769231), ('proffits', 0.9333333333333333), ('profits', 0.8571428571428571)]
[('encourage', 0.9411764705882353)]
[('encourage', 0.9411764705882353)]
[('collate', 0.9230769230769231)]
[('collate', 0.9230769230769231)]
[('proviso', 0.9333333333333333)]
[('proviso', 0.9333333333333333)]
[('approached', 0.9473684210526315), ('aproach', 0.875)]
[('approached', 0.9473684210526315), ('aproach', 0.875)]
[('difficulty', 0.9473684210526315), ('dificult', 0.9411764705882353), ('difficult', 0.8888888888888888)]
[('difficult', 0.8888888888888888), ('difficulty', 0.9473684210526315), ('dificult', 0.9411764705882353)]
[('appointments', 0.9565217391304348), ('appointment', 0.9090909090909091), ('appoitment', 0.8571428571428571)]
[('appointment', 0.9090909090909091), ('appointments', 0.9565217391304348), ('appoitment', 0.8571428571428571)]
[('base', 0.8888888888888888)]
[('base', 0.8888888888888888)]
[('conditioning', 0.9565217391304348), ('containing', 0.8571428571428571)]
[('containing', 0.8571428571428571), ('conditioning', 0.9565217391304348)]
[('earliest', 0.875)]
[('earliest', 0.875)]
[('beginning', 0.9411764705882353)]
[('beginning', 0.9411764705882353)]
[('universally', 0.9523809523809523)]
[('universally', 0.9523809523809523)]
[('unresolved', 0.9)]
[('unresolved', 0.9)]
[('length', 0.9090909090909091)]
[('length', 0.9090909090909091)]
[('exponentially', 0.88)]
[('exponentially', 0.88)]
[('system', 0.9090909090909091)]
[('system', 0.9090909090909091)]
[('approximately', 0.9166666666666666)]
[('approximately', 0.9166666666666666)]
[('their', 0.8888888888888888), ('there', 0.8888888888888888), ('thear', 0.8888888888888888), ('the', 0.8571428571428571)]
[('the', 0.8571428571428571), ('their', 0.8888888888888888), ('there', 0.8888888888888888), ('thear', 0.8888888888888888)]
[('speaking', 0.875)]
[('speaking', 0.875)]
[('repetitive', 0.9)]
[('repetitive', 0.9)]
[('exactly', 0.9333333333333333)]
[('exactly', 0.9333333333333333)]
[('immediate', 0.9411764705882353)]
[('immediate', 0.9411764705882353)]
[('appreciation', 0.9565217391304348)]
[('appreciation', 0.9565217391304348)]
[('eliminated', 0.9473684210526315)]
[('eliminated', 0.9473684210526315)]
[('believe', 0.9230769230769231)]
[('believe', 0.9230769230769231)]
[('appreciated', 0.9523809523809523)]
[('appreciated', 0.9523809523809523)]
[('readjusted', 0.9473684210526315)]
[('readjusted', 0.9473684210526315)]
[('feeling', 0.8571428571428571)]
[('feeling', 0.8571428571428571)]
[('false', 0.9090909090909091)]
[('false', 0.9090909090909091)]
[('seen', 0.8888888888888888)]
[('seen', 0.8888888888888888)]
[('interrogating', 0.9230769230769231)]
[('interrogating', 0.9230769230769231)]
[('academically', 0.9090909090909091)]
[('academically', 0.9090909090909091)]
[('traditionally', 0.96)]
[('traditionally', 0.96)]
[('studying', 0.9333333333333333)]
[('studying', 0.9333333333333333)]
[('majority', 0.9333333333333333)]
[('majority', 0.9333333333333333)]
[('aggravating', 0.9523809523809523)]
[('aggravating', 0.9523809523809523)]
[('transactions', 0.9565217391304348)]
[('transactions', 0.9565217391304348)]
[('arguing', 0.9333333333333333)]
[('arguing', 0.9333333333333333)]
[('sheets', 0.8571428571428571)]
[('sheets', 0.8571428571428571)]
[('extremely', 0.8888888888888888), ('extreamly', 0.8888888888888888)]
[('extremely', 0.8888888888888888), ('extreamly', 0.8888888888888888)]
[('later', 0.9090909090909091)]
[('later', 0.9090909090909091)]
[('senior', 0.9230769230769231)]
[('senior', 0.9230769230769231)]
[('dragged', 0.9230769230769231)]
[('dragged', 0.9230769230769231)]
[('atmosphere', 0.9473684210526315)]
[('atmosphere', 0.9473684210526315)]
[('drastically', 0.9523809523809523)]
[('drastically', 0.9523809523809523)]
[('particularly', 0.9565217391304348), ('particular', 0.9523809523809523), ('particulaur', 0.9090909090909091), ('paerticulaur', 0.8695652173913043)]
[('particular', 0.9523809523809523), ('particularly', 0.9565217391304348), ('particulaur', 0.9090909090909091), ('paerticulaur', 0.8695652173913043)]
[('visitor', 0.9230769230769231), ('vistors', 0.9230769230769231), ('visitors', 0.8571428571428571)]
[('visitors', 0.8571428571428571), ('visitor', 0.9230769230769231), ('vistors', 0.9230769230769231)]
[('session', 0.9230769230769231)]
[('session', 0.9230769230769231)]
[('continually', 0.9)]
[('continually', 0.9)]
[('availability', 0.8571428571428571)]
[('availability', 0.8571428571428571)]
[('busy', 0.8888888888888888)]
[('busy', 0.8888888888888888)]
[('employed', 0.875), ('emploies', 0.875)]
[('employed', 0.875), ('emploies', 0.875)]
[('adequate', 0.875)]
[('adequate', 0.875)]
[('meen', 0.8888888888888888)]
[('meen', 0.8888888888888888)]
[('familes', 0.8571428571428571)]
[('familes', 0.8571428571428571)]
[('beetween', 0.9333333333333333), ('between', 0.8571428571428571)]
[('between', 0.8571428571428571), ('beetween', 0.9333333333333333)]
[('overall', 0.9230769230769231)]
[('overall', 0.9230769230769231)]
[('timing', 0.9230769230769231)]
[('timing', 0.9230769230769231)]
[('econometric', 0.9523809523809523)]
[('econometric', 0.9523809523809523)]
[('erroneous', 0.8888888888888888)]
[('erroneous', 0.8888888888888888)]
[('descide', 0.9333333333333333), ('decides', 0.9333333333333333), ('descided', 0.875), ('decide', 0.8571428571428571)]
[('decide', 0.8571428571428571), ('descide', 0.9333333333333333), ('decides', 0.9333333333333333), ('descided', 0.875)]
[('intelligence', 0.9565217391304348)]
[('intelligence', 0.9565217391304348)]
[('are', 0.8571428571428571)]
[('are', 0.8571428571428571)]
[('apologies', 0.9473684210526315), ('appologised', 0.8571428571428571)]
[('apologies', 0.9473684210526315), ('appologised', 0.8571428571428571)]
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-1-c14e663f6d97> in <module>()
78
79 for correct, error in tests:
---> 80 similars = index[error]
81 if not similars:
82 empty += 1
/Users/jordi/Laboratorio/Lviv Summer School/spellchecker/lib/CharacterIndex.pyc in __getitem__(self, w)
83
84 def __getitem__(self, w):
---> 85 return self(w)
86
87
/Users/jordi/Laboratorio/Lviv Summer School/spellchecker/lib/CharacterIndex.pyc in __call__(self, w, n)
65 hypotheses = []
66 for c, _ in candidates:
---> 67 r = fuzzmatch(None, w, c).ratio()
68 if r < self.min_r:
69 continue
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/difflib.pyc in __init__(self, isjunk, a, b, autojunk)
219 self.a = self.b = None
220 self.autojunk = autojunk
--> 221 self.set_seqs(a, b)
222
223 def set_seqs(self, a, b):
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/difflib.pyc in set_seqs(self, a, b)
231
232 self.set_seq1(a)
--> 233 self.set_seq2(b)
234
235 def set_seq1(self, a):
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/difflib.pyc in set_seq2(self, b)
285 self.matching_blocks = self.opcodes = None
286 self.fullbcount = None
--> 287 self.__chain_b()
288
289 # For each element x in b, set b2j[x] to a list of the indices in
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/difflib.pyc in __chain_b(self)
318 self.b2j = b2j = {}
319
--> 320 for i, elt in enumerate(b):
321 indices = b2j.setdefault(elt, [])
322 indices.append(i)
KeyboardInterrupt:
In [ ]:
Content source: JordiCarreraVentura/spellchecker
Similar notebooks: