In [1]:
#Babyl disambiguation

In [16]:
api_key='2c568dae-79a3-483e-b220-538cf4140531'

In [90]:
import requests

class UniqueChars(list):
    def append(self,value):
        s,e = value
        length = (e - s)
        for idx,(start,end) in enumerate(self):
            _length = (end - start)
            if (start == s) | (end == e):
                if length > _length:
                    self[idx] = value
                return None                
        super().append(value)

def disambiguate(text,api_key,thesaurus={}):
    # Ingoing text
    url = 'https://babelfy.io/v1/disambiguate'
    params = {'text' : text,
              'lang' : 'EN',
              'key'  : api_key}
    headers = {'Accept-encoding':'gzip'}
    r = requests.get(service_url,params=params,headers=headers)
    r.raise_for_status()
    data = r.json()

    unique_chars = UniqueChars()
    for row in data:
        char_range = sorted(row['charFragment'].values())
        unique_chars.append(char_range)

    # Outgoing text
    out_text = text
    for row in data:
        sense = row['babelSynsetID']        
        char_range = sorted(row['charFragment'].values())
        if char_range not in unique_chars:
            continue        
        word = text[char_range[0]:char_range[1]+1]
        print(word,sense)
        if not word in thesaurus:
            thesaurus[word] = []
        if sense not in thesaurus[word]:
            thesaurus[word].append(sense)
        sense_idx = thesaurus[word].index(sense)
        
        new_word = word.replace(" ","_")+"_"+str(sense_idx)
        out_text = out_text.replace(word,new_word.lower())
    return out_text,thesaurus

thesaurus = {}
input_sentences = ["I clean windows","I like to program computers on Windows machines","I am cleaner than him","I program on Windows computers"]
output_sentences = []
for sentence in input_sentences:
    result,thesaurus = disambiguate(sentence,api_key=api_key,thesaurus=thesaurus)
    output_sentences.append(result)
    
print(output_sentences)


clean bn:00085059v
windows bn:00081285n
like bn:00090362v
program bn:00064646n
computers bn:00021464n
Windows bn:00081285n
machines bn:00052556n
program bn:00064646n
Windows computers bn:00081305n
['I clean_0 windows_0', 'I like_0 to program_0 computers_0 on windows_0 machines_0', 'I am cleaner than him', 'I program_0 on windows_computers_0']

In [82]:
output_sentences


Out[82]:
['I am a windows_0 cleaner', 'I like_0 windows_computers_0']

In [47]:
thesaurus


Out[47]:
{'computer': ['bn:00021464n'], 'windows': ['bn:00081285n']}

In [54]:
data


Out[54]:
[{'BabelNetURL': 'http://babelnet.org/rdf/s00081285n',
  'DBpediaURL': 'http://dbpedia.org/resource/Window',
  'babelSynsetID': 'bn:00081285n',
  'charFragment': {'end': 13, 'start': 7},
  'coherenceScore': 0.0,
  'globalScore': 0.0,
  'score': 0.0,
  'source': 'MCS',
  'tokenFragment': {'end': 3, 'start': 3}}]

In [62]:
a = ["a","a"]
a.replace("a","b")


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-62-4f0548e209d4> in <module>()
      1 a = ["a","a"]
----> 2 a.replace("a","b")

AttributeError: 'list' object has no attribute 'replace'

In [61]:
a


Out[61]:
['a']

In [ ]: