In [11]:
from collatex import * 

json_input = {
    "witnesses" : [ 
        {
            "id": "A",
            "tokens": [ 
                { "t" : "the" }, 
                { "t" : "black" }, 
                { "t" : "dog" } 
            ]
        },
        {
            "id": "B",
            "tokens": [ 
                { "t" : "the" }, 
                { "t" : "black", "tag" : "emph" }, 
                { "t" : "dingo" } 
            ]
        },
        {
            "id": "C",
            "tokens": [ 
                { "t" : "the" }, 
                { "t" : "black", "tag" : "bold" }, 
                { "t" : "dingo" } 
            ]
        } 
    ]
}

collate_pretokenized_json(json_input, output="html2")


A B C
the the the
black black black
dog dingo dingo

In [14]:
def match_properties(token1_data, token2_data):
    tag1 = token1_data.get("tag")
    tag2 = token2_data.get("tag")
    if (token1_data.get("t") == token2_data.get("t")):
        if tag1 == tag2:
            return True
        if tag1 == 'emph' and tag2 == 'bold':
            return True
        if tag1 == 'bold' and tag2 == 'emph':
            return True
    return False

In [4]:
match_properties( "dingo", "dingo" )


Out[4]:
True

In [16]:
match_properties( { "tag": "i", "t" : "black" }, { "t" : "black", "tag": "emph" } )


Out[16]:
False

In [12]:
collate_pretokenized_json(json_input, properties_filter=match_properties, output="html2")


A B C
the the the
black black black
dog dingo dingo

In [13]:
collate_pretokenized_json(json_input, output="html2")


A B C
the the the
black black black
dog dingo dingo

In [ ]: