In [1]:
import os
import processors
from processors import *
from processors.visualization import JupyterVisualizer as viz
print(processors.__version__)
In [2]:
API = ProcessorsAPI(port=8886, keep_alive=True)
In [3]:
demo_dir = os.path.split(os.getcwd())[0]
docs_dir = os.path.join(demo_dir, "docs")
grammar_dir = os.path.join(demo_dir, "grammar")
print("DEMO DIRECTORY:\t{}".format(demo_dir))
print("DOCS DIRECTORY:\t{}".format(docs_dir))
print("GRAMMAR DIRECTORY:\t{}".format(grammar_dir))
In [4]:
raw_text_files = [os.path.join(docs_dir, f) for f in os.listdir(docs_dir) if f.endswith(".txt")]
In [5]:
documents = [API.bionlp.annotate(open(f).read()) for f in raw_text_files]
In [6]:
# TODO: Add to py-processors
def display_mentions(mentions):
for m in mentions:
print(m.document.id if m.document.id else id(m.document))
viz.display_mention(m)
def deserialize_document(doc):
if json_file.endswith(".json"):
fpath = os.path.join(serialized_docs_dir, json_file)
with open(fpath) as infile:
text = infile.read()
doc_dict = json.loads(text)
doc = Document.load_from_JSON(doc_dict)
# FIXED, but not released (ADD TEST)
# py-processors v3.0.3 wasn't retrieving the doc id
doc.id = os.path.splitext(json_file)[0]
return doc
else:
return None
def deserialize_documents(serialized_docs_dir):
for json_file in os.listdir(serialized_docs_dir):
if json_file.endswith(".json"):
doc = deserialize_document(json_file)
yield doc
# TODO: add to py-processors
def extract_from_documents(documents, rules, API):
for doc in documents:
mentions = API.odin.extract_from_document(doc, rules)
for m in sorted(mentions, key=lambda x: (x.document.id, x.sentence, x.start), reverse=False):
yield m
In [7]:
viz.display_graph(documents[0].sentences[0], css=viz.parse_css)
O
means no entity label)
In [8]:
my_prefix = "/Users/gus/repos/odin-tutorial"
master_file = open(os.path.join(grammar_dir, "master.yml"), "r").read().replace("/Users/gus/repos/odin-tutorial", my_prefix)
print(master_file)
In [9]:
mentions = list(extract_from_documents(documents, master_file, API))
In [10]:
viz.display_graph(documents[0].sentences[0], css=viz.parse_css)
In [11]:
print(open(os.path.join(grammar_dir, "entities.yml")).read())
In [12]:
display_mentions(m for m in mentions if m.matches("Entity"))
In [13]:
print(open(os.path.join(grammar_dir, "ptm_events.yml")).read())
In [14]:
display_mentions(m for m in mentions if m.matches("PTMEvent"))
In [15]:
print(open(os.path.join(grammar_dir, "complex_events.yml")).read())
In [16]:
display_mentions(m for m in mentions if m.matches("ComplexEvent"))