In [1]:
import sys
sys.path.append('./../')
%load_ext autoreload
%autoreload 2
In [2]:
from ontology import get_ontology
ontology = get_ontology('../data/doid.obo')
name2doid = {term.name: term.id for term in ontology.get_terms()}
doid2name = {term.id: term.name for term in ontology.get_terms()}
In [ ]:
In [3]:
import numpy as np
import re
In [3]:
import wiki
lst = wiki.get_links_from_ontology(ontology)
print r'example:{:}'.format(repr(lst[10]))
In [4]:
page = wiki.get_html(lst[101])
page[:1000]
Out[4]:
In [6]:
import fuzzywuzzy.process as fuzzy_process
from fuzzywuzzy import fuzz
In [17]:
string = "ventricular arrhythmia"
names = np.sort(name2doid.keys())
print fuzzy_process.extractOne(string, names, scorer=fuzz.token_set_ratio)
In [139]:
string = "Complete remission of hairy cell leukemia variant (HCL-v) complicated by red cell aplasia post treatment with rituximab."
print fuzzy_process.extractOne(string, names, scorer=fuzz.partial_ratio)
In [ ]:
In [18]:
query = "ventricular arrhythmia"
top = wiki.get_top_headers(query)
top
Out[18]:
In [20]:
for header in top:
results = fuzzy_process.extractOne(header, names, scorer=fuzz.token_set_ratio)
print results
In [ ]:
In [59]:
page = wikipedia.WikipediaPage(title='Cell_proliferation')
page.summary
Out[59]:
In [ ]:
[name for name in names if len(re.split(' ', name)) > 3]
In [49]:
import pubmed
query = 'hcl-v'
titles = pubmed.get(query)
titles_len = [len(title) for title in titles]
for i, string in enumerate(titles):
print("%d) %s" % (i+1, string))
print fuzzy_process.extractOne(string, names, scorer=fuzz.partial_ratio)
print
def find_synonym(s_ref, s): last = s_ref.find('(' + s + ')') if last == -1: return None
n_upper = len(''.join([c for c in s if c.isupper()]))
first = [(i,c) for i, c in enumerate(s_ref[:last]) if c.isupper()][-n_upper][0]
return s_ref[first:last-1]
print find_synonym('Wolff-Parkinson-White syndrome (WPW) and athletes: Darwin at play?', 'WPW')
In [27]:
import utils
print utils.find_synonym('Wolff-Parkinson-White syndrome (WPW) and athletes: Darwin at play?', 'WPW')
print utils.find_synonym('Complete remission of hairy cell leukemia variant (HCL-v)...', 'hcl-v')
In [29]:
s_ref = 'artery disease'
s = 'nonartery'
print utils.assym_dist(s, s_ref)
In [30]:
print 'Mean term name length:', np.mean([len(term.name) for term in ontology.get_terms()])
print 'Mean article title length:', np.mean(titles_len)
In [31]:
words = [re.split(' |-', term.name) for term in ontology.get_terms()]
words = np.unique([l for sublist in words for l in sublist if len(l) > 0])
words = [w for w in words if len(w) >= 4]
words[:10]
Out[31]:
In [ ]:
In [ ]:
In [4]:
from threading import Thread
from time import sleep
from ontology import get_ontology
query_results = None
def fn_get_q(query):
global query_results
query_results = fuzzy_process.extractOne(query, names, scorer=fuzz.ratio)
return True
wiki_results = None
def fn_get_wiki(query):
global wiki_results
header = wiki.get_top_headers(query, 1)[0]
wiki_results = fuzzy_process.extractOne(header, names, scorer=fuzz.ratio)
#sleep(0.1)
return True
pubmed_results = None
def fn_get_pubmed(query):
global pubmed_results
string = pubmed.get(query, topK=1)
if string is not None:
string = string[0]
print string
pubmed_results = fuzzy_process.extractOne(string, names, scorer=fuzz.partial_ratio)
return True
else:
return False
'''main'''
## from bot
query = 'valve disease'
def find_answer(query):
query = query.lower()
# load ontology
ontology = get_ontology('../data/doid.obo')
name2doid = {term.name: term.id for term in ontology.get_terms()}
doid2name = {term.id: term.name for term in ontology.get_terms()}
## exact match
if query in name2doid.keys():
doid = name2doid[query]
else:
# exact match -- no
th_get_q = Thread(target = fn_get_q, args = (query,))
th_get_wiki = Thread(target = fn_get_wiki, args = (query,))
th_get_pubmed = Thread(target = fn_get_pubmed, args = (query,))
th_get_q.start()
th_get_wiki.start()
th_get_pubmed.start()
## search engine query --> vertices, p=100(NLP??); synonyms
## new thread for synonyms???
## synonyms NLP
## new thread for NLP
## tree search on vertices (returned + synonyms)
## sleep ?
th_get_q.join()
print query_results
th_get_wiki.join()
print wiki_results
th_get_pubmed.join()
print pubmed_results
## final answer
## draw graph
doid = None
graph = None
return doid, graph
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: