In [1]:
import pyConTextNLP.pyConTextGraph as pyConText
import pyConTextNLP.itemData as itemData
from textblob import TextBlob
import networkx as nx
import pyConTextNLP.display.html as html
from IPython.display import display, HTML
In [2]:
reports = [
"""IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of
bowel obstruction or mass identified within the abdomen or pelvis. Non-specific interstitial opacities and bronchiectasis seen at the right
base, suggestive of post-inflammatory changes.""",
"""IMPRESSION: Evidence of early pulmonary vascular congestion and interstitial edema. Probable scarring at the medial aspect of the right lung base, with no
definite consolidation."""
,
"""IMPRESSION:
1. 2.0 cm cyst of the right renal lower pole. Otherwise, normal appearance
of the right kidney with patent vasculature and no sonographic evidence of
renal artery stenosis.
2. Surgically absent left kidney.""",
"""IMPRESSION: No pneumothorax.""",
"""IMPRESSION: No definite pneumothorax"""
"""IMPRESSION: New opacity at the left lower lobe consistent with pneumonia."""
]
In [3]:
modifiers = itemData.instantiateFromCSVtoitemData(
"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_05042016.tsv")
targets = itemData.instantiateFromCSVtoitemData(
"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/utah_crit.tsv")
markup_sentence
We are putting the functionality we went through in the previous two notebooks (BasicSentenceMarkup and BasicSentenceMarkupPart2) into a function markup_sentence
. We add one step to the function: dropInactiveModifiers
will delete any modifier node that does not get attached to a target node.
In [4]:
def markup_sentence(s, modifiers, targets, prune_inactive=True):
"""
"""
markup = pyConText.ConTextMarkup()
markup.setRawText(s)
markup.cleanText()
markup.markItems(modifiers, mode="modifier")
markup.markItems(targets, mode="target")
markup.pruneMarks()
markup.dropMarks('Exclusion')
# apply modifiers to any targets within the modifiers scope
markup.applyModifiers()
markup.pruneSelfModifyingRelationships()
if prune_inactive:
markup.dropInactiveModifiers()
return markup
In [5]:
report = reports[0]
print(report)
In [6]:
context = pyConText.ConTextDocument()
pyConTextNLP comes with a simple sentence splitter in helper.py
. I have not been maintaining this and have recently been using TextBlob to split sentences. A known problem with either sentence splitting solution is enumerated lists that don't use periods.
In [7]:
blob = TextBlob(report.lower())
count = 0
rslts = []
for s in blob.sentences:
m = markup_sentence(s.raw, modifiers=modifiers, targets=targets)
rslts.append(m)
for r in rslts:
context.addMarkup(r)
In [8]:
clrs = {\
"bowel_obstruction": "blue",
"inflammation": "blue",
"definite_negated_existence": "red",
"probable_negated_existence": "indianred",
"ambivalent_existence": "orange",
"probable_existence": "forestgreen",
"definite_existence": "green",
"historical": "goldenrod",
"indication": "pink",
"acute": "golden"
}
In [9]:
display(HTML(html.mark_document_with_html(context,colors = clrs, default_color="black")))
In [10]:
print(context.getXML())