In [11]:
from collatex import *
json_input = {
"witnesses" : [
{
"id": "A",
"tokens": [
{ "t" : "the" },
{ "t" : "black" },
{ "t" : "dog" }
]
},
{
"id": "B",
"tokens": [
{ "t" : "the" },
{ "t" : "black", "tag" : "emph" },
{ "t" : "dingo" }
]
},
{
"id": "C",
"tokens": [
{ "t" : "the" },
{ "t" : "black", "tag" : "bold" },
{ "t" : "dingo" }
]
}
]
}
collate_pretokenized_json(json_input, output="html2")
In [14]:
def match_properties(token1_data, token2_data):
tag1 = token1_data.get("tag")
tag2 = token2_data.get("tag")
if (token1_data.get("t") == token2_data.get("t")):
if tag1 == tag2:
return True
if tag1 == 'emph' and tag2 == 'bold':
return True
if tag1 == 'bold' and tag2 == 'emph':
return True
return False
In [4]:
match_properties( "dingo", "dingo" )
Out[4]:
In [16]:
match_properties( { "tag": "i", "t" : "black" }, { "t" : "black", "tag": "emph" } )
Out[16]:
In [12]:
collate_pretokenized_json(json_input, properties_filter=match_properties, output="html2")
In [13]:
collate_pretokenized_json(json_input, output="html2")
In [ ]: