In [1]:
#Babyl disambiguation
In [16]:
api_key='2c568dae-79a3-483e-b220-538cf4140531'
In [90]:
import requests
class UniqueChars(list):
def append(self,value):
s,e = value
length = (e - s)
for idx,(start,end) in enumerate(self):
_length = (end - start)
if (start == s) | (end == e):
if length > _length:
self[idx] = value
return None
super().append(value)
def disambiguate(text,api_key,thesaurus={}):
# Ingoing text
url = 'https://babelfy.io/v1/disambiguate'
params = {'text' : text,
'lang' : 'EN',
'key' : api_key}
headers = {'Accept-encoding':'gzip'}
r = requests.get(service_url,params=params,headers=headers)
r.raise_for_status()
data = r.json()
unique_chars = UniqueChars()
for row in data:
char_range = sorted(row['charFragment'].values())
unique_chars.append(char_range)
# Outgoing text
out_text = text
for row in data:
sense = row['babelSynsetID']
char_range = sorted(row['charFragment'].values())
if char_range not in unique_chars:
continue
word = text[char_range[0]:char_range[1]+1]
print(word,sense)
if not word in thesaurus:
thesaurus[word] = []
if sense not in thesaurus[word]:
thesaurus[word].append(sense)
sense_idx = thesaurus[word].index(sense)
new_word = word.replace(" ","_")+"_"+str(sense_idx)
out_text = out_text.replace(word,new_word.lower())
return out_text,thesaurus
thesaurus = {}
input_sentences = ["I clean windows","I like to program computers on Windows machines","I am cleaner than him","I program on Windows computers"]
output_sentences = []
for sentence in input_sentences:
result,thesaurus = disambiguate(sentence,api_key=api_key,thesaurus=thesaurus)
output_sentences.append(result)
print(output_sentences)
In [82]:
output_sentences
Out[82]:
In [47]:
thesaurus
Out[47]:
In [54]:
data
Out[54]:
In [62]:
a = ["a","a"]
a.replace("a","b")
In [61]:
a
Out[61]:
In [ ]: