In [1]:
from __future__ import print_function
import re

Let's start with the original regular expression and string to search from Travis' regex problem.


In [2]:
pattern = re.compile(r"""
        (?P<any>any4?)                                       # "any"
                                                             #   association
        |                                                    # or
        (?P<object_eq>object ([\w-]+) eq (\d+))             # object
        alone
                                                             #   association
        |                                                    # or
        (?P<object_range>object ([a-z0-9A-Z-]+) range (\d+) (\d+)) # object range
                                                             #   association
        |                                                    # or
        (?P<object_group>object-group ([a-z0-9A-Z-]+))             # object group
                                                             #   association
        |                                                    # or
        (?P<object_alone>object ([[a-z0-9A-Z-]+))                   # object alone
                                                             #   association
""", re.VERBOSE)

s = '''    object-group jfi-ip-ranges object DA-TD-WEB01 eq 8850
'''

The regex had two bugs.

  • Two [[ near the end of the pattern string.
  • The significant spaces in the pattern (such as after object-group) were being ignored because of re.VERBOSE.

So those bugs are fixed in the pattern below.


In [3]:
pattern = re.compile(r"""
        (?P<any>any4?)                                       # "any"
                                                             #   association
        |                                                    # or
        (?P<object_eq>object\ ([\w-]+)\ eq\ (\d+))             # object
        alone
                                                             #   association
        |                                                    # or
        (?P<object_range>object\ ([a-z0-9A-Z-]+)\ range\ (\d+)\ (\d+)) # object range
                                                             #   association
        |                                                    # or
        (?P<object_group>object-group\ ([a-z0-9A-Z-]+))             # object group
                                                             #   association
        |                                                    # or
        (?P<object_alone>object\ ([a-z0-9A-Z-]+))                   # object alone
                                                             #   association
""", re.VERBOSE)

In [4]:
re.findall(pattern, s)


Out[4]:
[('',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  'object-group jfi-ip-ranges',
  'jfi-ip-ranges',
  '',
  ''),
 ('', '', '', '', '', '', '', '', '', '', 'object DA-TD-WEB01', 'DA-TD-WEB01')]

In [5]:
for m in re.finditer(pattern, s):
    print(repr(m))
    print('groups', m.groups())
    print('groupdict', m.groupdict())


<_sre.SRE_Match object; span=(4, 30), match='object-group jfi-ip-ranges'>
groups (None, None, None, None, None, None, None, None, 'object-group jfi-ip-ranges', 'jfi-ip-ranges', None, None)
groupdict {'object_group': 'object-group jfi-ip-ranges', 'object_eq': None, 'any': None, 'object_alone': None, 'object_range': None}
<_sre.SRE_Match object; span=(31, 49), match='object DA-TD-WEB01'>
groups (None, None, None, None, None, None, None, None, None, None, 'object DA-TD-WEB01', 'DA-TD-WEB01')
groupdict {'object_group': None, 'object_eq': None, 'any': None, 'object_alone': 'object DA-TD-WEB01', 'object_range': None}

The above works, but keeping track of the indexes of the unnamed groups drives me crazy. So I add names for all groups.


In [6]:
pattern = re.compile(r"""
        (?P<any>any4?)                                       # "any"
                                                             #   association
        |                                                    # or
        (?P<object_eq>object\ (?P<oe_name>[\w-]+)\ eq\ (?P<oe_i>\d+))             # object
        alone
                                                             #   association
        |                                                    # or
        (?P<object_range>object\ (?P<or_name>[a-z0-9A-Z-]+)
        \ range\ (?P<oe_r_start>\d+)\ (?P<oe_r_end>\d+)) # object range
                                                             #   association
        |                                                    # or
        (?P<object_group>object-group\ (?P<og_name>[a-z0-9A-Z-]+))             # object group
                                                             #   association
        |                                                    # or
        (?P<object_alone>object\ (?P<oa_name>[a-z0-9A-Z-]+))                   # object alone
                                                             #   association
""", re.VERBOSE)

In [7]:
for m in re.finditer(pattern, s):
    print(repr(m))
    print('groups', m.groups())
    print('groupdict', m.groupdict())


<_sre.SRE_Match object; span=(4, 30), match='object-group jfi-ip-ranges'>
groups (None, None, None, None, None, None, None, None, 'object-group jfi-ip-ranges', 'jfi-ip-ranges', None, None)
groupdict {'og_name': 'jfi-ip-ranges', 'object_range': None, 'object_group': 'object-group jfi-ip-ranges', 'oe_r_start': None, 'oe_i': None, 'oa_name': None, 'or_name': None, 'oe_name': None, 'oe_r_end': None, 'any': None, 'object_alone': None, 'object_eq': None}
<_sre.SRE_Match object; span=(31, 49), match='object DA-TD-WEB01'>
groups (None, None, None, None, None, None, None, None, None, None, 'object DA-TD-WEB01', 'DA-TD-WEB01')
groupdict {'og_name': None, 'object_range': None, 'object_group': None, 'oe_r_start': None, 'oe_i': None, 'oa_name': 'DA-TD-WEB01', 'or_name': None, 'oe_name': None, 'oe_r_end': None, 'any': None, 'object_alone': 'object DA-TD-WEB01', 'object_eq': None}

The following shows me just the groups that matched.


In [8]:
for m in re.finditer(pattern, s):
    for key, value in m.groupdict().items():
        if value is not None:
            print(key, repr(value))
    print()


og_name 'jfi-ip-ranges'
object_group 'object-group jfi-ip-ranges'

oa_name 'DA-TD-WEB01'
object_alone 'object DA-TD-WEB01'

Looking at the above, I see that I probably don't care about the big groups, just the parameters, so I remove the big groups (except for "any") from the regular expression.


In [9]:
pattern = re.compile(r"""
        (?P<any>any4?)                                       # "any"
                                                             #   association
        |                                                    # or
        (object\ (?P<oe_name>[\w-]+)\ eq\ (?P<oe_i>\d+))             # object
        alone
                                                             #   association
        |                                                    # or
        (object\ (?P<or_name>[a-z0-9A-Z-]+)
        \ range\ (?P<oe_r_start>\d+)\ (?P<oe_r_end>\d+)) # object range
                                                             #   association
        |                                                    # or
        (object-group\ (?P<og_name>[a-z0-9A-Z-]+))             # object group
                                                             #   association
        |                                                    # or
        (object\ (?P<oa_name>[a-z0-9A-Z-]+))                   # object alone
                                                             #   association
""", re.VERBOSE)

Now it tells me just the meat of what I want to know.


In [10]:
for m in re.finditer(pattern, s):
    for key, value in m.groupdict().items():
        if value is not None:
            print(key, repr(value))
    print()


og_name 'jfi-ip-ranges'

oa_name 'DA-TD-WEB01'