In [1]:
from __future__ import print_function
import re
Let's start with the original regular expression and string to search from Travis' regex problem.
In [2]:
pattern = re.compile(r"""
(?P<any>any4?) # "any"
# association
| # or
(?P<object_eq>object ([\w-]+) eq (\d+)) # object
alone
# association
| # or
(?P<object_range>object ([a-z0-9A-Z-]+) range (\d+) (\d+)) # object range
# association
| # or
(?P<object_group>object-group ([a-z0-9A-Z-]+)) # object group
# association
| # or
(?P<object_alone>object ([[a-z0-9A-Z-]+)) # object alone
# association
""", re.VERBOSE)
s = ''' object-group jfi-ip-ranges object DA-TD-WEB01 eq 8850
'''
The regex had two bugs.
So those bugs are fixed in the pattern below.
In [3]:
pattern = re.compile(r"""
(?P<any>any4?) # "any"
# association
| # or
(?P<object_eq>object\ ([\w-]+)\ eq\ (\d+)) # object
alone
# association
| # or
(?P<object_range>object\ ([a-z0-9A-Z-]+)\ range\ (\d+)\ (\d+)) # object range
# association
| # or
(?P<object_group>object-group\ ([a-z0-9A-Z-]+)) # object group
# association
| # or
(?P<object_alone>object\ ([a-z0-9A-Z-]+)) # object alone
# association
""", re.VERBOSE)
In [4]:
re.findall(pattern, s)
Out[4]:
In [5]:
for m in re.finditer(pattern, s):
print(repr(m))
print('groups', m.groups())
print('groupdict', m.groupdict())
The above works, but keeping track of the indexes of the unnamed groups drives me crazy. So I add names for all groups.
In [6]:
pattern = re.compile(r"""
(?P<any>any4?) # "any"
# association
| # or
(?P<object_eq>object\ (?P<oe_name>[\w-]+)\ eq\ (?P<oe_i>\d+)) # object
alone
# association
| # or
(?P<object_range>object\ (?P<or_name>[a-z0-9A-Z-]+)
\ range\ (?P<oe_r_start>\d+)\ (?P<oe_r_end>\d+)) # object range
# association
| # or
(?P<object_group>object-group\ (?P<og_name>[a-z0-9A-Z-]+)) # object group
# association
| # or
(?P<object_alone>object\ (?P<oa_name>[a-z0-9A-Z-]+)) # object alone
# association
""", re.VERBOSE)
In [7]:
for m in re.finditer(pattern, s):
print(repr(m))
print('groups', m.groups())
print('groupdict', m.groupdict())
The following shows me just the groups that matched.
In [8]:
for m in re.finditer(pattern, s):
for key, value in m.groupdict().items():
if value is not None:
print(key, repr(value))
print()
Looking at the above, I see that I probably don't care about the big groups, just the parameters, so I remove the big groups (except for "any") from the regular expression.
In [9]:
pattern = re.compile(r"""
(?P<any>any4?) # "any"
# association
| # or
(object\ (?P<oe_name>[\w-]+)\ eq\ (?P<oe_i>\d+)) # object
alone
# association
| # or
(object\ (?P<or_name>[a-z0-9A-Z-]+)
\ range\ (?P<oe_r_start>\d+)\ (?P<oe_r_end>\d+)) # object range
# association
| # or
(object-group\ (?P<og_name>[a-z0-9A-Z-]+)) # object group
# association
| # or
(object\ (?P<oa_name>[a-z0-9A-Z-]+)) # object alone
# association
""", re.VERBOSE)
Now it tells me just the meat of what I want to know.
In [10]:
for m in re.finditer(pattern, s):
for key, value in m.groupdict().items():
if value is not None:
print(key, repr(value))
print()