Temporary tests


In [31]:
import graphparser
reload(graphparser)
nagarip = graphparser.GraphParser('settings/devanagari.yaml')
urdup = graphparser.GraphParser('settings/urdu.yaml')

In [32]:
omr = nagarip.onmatch_rules

In [33]:
len(omr)


Out[33]:
44

In [61]:
def onmatch_rules_token_matrix(self, omr):
    '''
    Sorts onmatch rules by current match token of rule, reducing number of iterations
    '''
    token_matrix = {t:[] for t in self.tokens}
    ttm = {t:{tc:[] for tc in self.tokens} for t in self.tokens}
    for t,t_classes in self.tokens.iteritems():
        for o in omr:
            match_rules,prod = o
            curr_class=match_rules[1]
            if curr_class[0] in t_classes:
                for t2,t2_classes in self.tokens.iteritems():
                    prev_class = match_rules[0]
                    if prev_class[0] in t2_classes:
                        ttm[t][t2].append(omr)
    return ttm
ttm=onmatch_rules_token_matrix(nagarip, nagarip.onmatch_rules)

In [64]:
len(ttm['k'])


Out[64]:
85

In [ ]:
t_om = defaultdict(list)
        tokens = self.tokens
        for t,t_classes in tokens.iteritems():
            for o in omr:
                match_rules,prod = o
                curr_class=match_rules[1]
                if curr_class[0] in t_classes:
                    t_om[t].append(o)
        return t_om

mr = nagarip.onmatch_rules


In [59]:
nagarip.onmatch_rules[0]


Out[59]:
((['z_consonant'], ['z_consonant']), u'\u094d')

In [ ]: