In [1]:
import re
import os

In [4]:
modifiers = """abdomen
abdomen wall
abdominal
abdominal wall subcutaneous
bowel
colon
colonic
fascia
gallbladder
gastroesophageal
groin
hepaticobiliary limb
incision
interloop
intra-abdominal
intraabdominal
intrahepatic
intrapelvic
IVC
jejunojejunal anastomosis
Left lower quadrant
lesser sac
liver
LUQ
mesenteric
mesentery
Morison's pouch
Morison's sac
omental
omental
pancreas
pancreatic
paracolic
paracolic gutter
pararenal
pelvic
pericolic
perihepatic
peripancreatic
peritoneal
resection bed
retroperitoneal
right lower quadrant
ruq
segment VI
splenic
stomach
subdiaphragmatic
subhepatic
subpleural"""
targets = """fluid collection
abscess
loculated collection
hematoma
drainage 
complex fluid
heterogeneous collection"""

targets.replace('\n', ' ')
targets += 'abscesses '
targets += 'fluid-collection '
targets += 'collection of fluid '
targets += 'fluid collections'
targets += 'collections of fluid'

modifiers += 'intra abdominal'

Modifiers


In [5]:
#targets
modifiers.replace('\n',' ')
''


Out[5]:
''

In [6]:
regexes_modifiers = ["liver","[intra]*[-]* *abdom[a-z]*","""abdom[a-z]* wall"""]

In [7]:
r_liver = re.compile(r"""liver""")
r_liver.findall(modifiers)


Out[7]:
['liver']

In [39]:
r_abdomen = re.compile(r"""[intra]*[-]* *abdom[a-z]* *[wall]*""")
r_abdomen.findall(modifiers)


Out[39]:
['abdomen',
 'abdomen wall',
 'abdominal',
 'abdominal wall',
 'intra-abdominal',
 'intraabdominal',
 'intra abdominal']

In [15]:
r_wall = re.compile(r"""abdom[a-z]* wall""")
r_wall.findall(modifiers)


Out[15]:
['abdomen wall', 'abdominal wall']

In [ ]:

Targets


In [20]:
regexes_targets = \
["abscess[a-z]*","complex fluids*","loculated collections*","hematoma*",
 "heterogeneous collections*","abscess[es]*","collections* of fluid*|fluid collection[s]*"]

In [54]:
#simple fluid collection 
r_collection1 = re.compile(r"""fluid[s]* collection[s]*""")

r_collection1.findall(targets)


Out[54]:
['fluid collection', 'fluid collections']

In [16]:
#collections of fluid
r_collection2 = re.compile(r"""collections* of fluid[s]*""")
r_collection2.findall(targets)


Out[16]:
['collection of fluid', 'collections of fluid']

In [31]:
#combine both fluid collections
r_collection3 = re.compile(r"""(collections* of fluid*|fluid collection[s]*)""")

sentence = 'There is a fluid collection and heterogeneous collections in the abdomen.\
There is a fluid collection and a collection of fluid.'
r_collection3.findall(sentence)


Out[31]:
['fluid collection', 'fluid collection', 'collection of fluid']

In [58]:
# 'loculated collection'
r_loc = re.compile(r"""loculated collections*""")
r_loc.findall(targets)


Out[58]:
['loculated collection']

In [61]:
# 'hematoma'
r_hemat = re.compile(r"""hematoma*""")
r_hemat.findall(targets)


Out[61]:
['hematoma']

In [62]:
# 'complex fluid'
r_complex = re.compile(r"""complex fluids*""")
r_complex.findall(targets)


Out[62]:
['complex fluid']

In [64]:
# 'heterogeneous collection'
r_hetero = re.compile(r"""heterogeneous collections*""")
r_hetero.findall(targets)


Out[64]:
['heterogeneous collection']

In [21]:
#abscess
r_abscess = re.compile(r"""abscess[es]*""")
r_abscess.findall(targets)


Out[21]:
['abscess', 'abscesses']

In [ ]: