This is a work-in-progress for some future cohpy meeting.
Lastly, uses names to refer to parts of a string that match a regular expression is fantastic! It makes the code much more readable than using meaningless numerical indexes.
This was new to a Python expert who now loves it, although his still prefers to eschew regular expressions because they are hard to read.
Imagine the following contrived example, where I want to get the hash of an git object from its filepath.
In [1]:
s = '.git/objects/8e/28241360c472576e8caa944253d4af368d9081'
import re
git_pattern = re.compile(r'''
.*/ # anything and a slash
(
([0-9a-fA-F]{2}) # 2 hexadecimal digits
/ # separated by a slash
([0-9a-fA-F]{38}) # 38 hexadecimal digits
)$''', flags=re.VERBOSE)
In [2]:
m = git_pattern.match(s)
m
Out[2]:
In [3]:
m.group(0)
Out[3]:
In [4]:
m.group(1)
Out[4]:
In [5]:
m.group(2)
Out[5]:
In [6]:
m.group(3)
Out[6]:
In [7]:
help(re.compile)
In [8]:
s = '.git/objects/8e/28241360c472576e8caa944253d4af368d9081'
import re
git_pattern = re.compile(r'''
.*/ # anything and a slash
(?P<hash_with_slash>
(?P<hash_directory> [0-9a-fA-F]{2}) # 2 hexadecimal digits
/ # separated by a slash
(?P<hash_filename> [0-9a-fA-F]{38}) # 38 hexadecimal digits
)$''', flags=re.VERBOSE)
In [9]:
m = git_pattern.match(s)
m
Out[9]:
In [10]:
m.group(2)
Out[10]:
In [11]:
m.group('hash_directory')
Out[11]:
In [12]:
m.group(3)
Out[12]:
In [13]:
m.group('hash_filename')
Out[13]:
In [14]:
hash = m.group('hash_directory') + m.group('hash_filename')
hash
Out[14]:
In [15]:
hash = ''.join(map(m.group, ('hash_directory', 'hash_filename')))
hash
Out[15]:
In [16]:
hash = ''.join(map(lambda s: m.group('hash_%s' % s), ('directory', 'filename')))
hash
Out[16]:
In [17]:
m.group(1)
Out[17]:
In [18]:
m.group('hash_with_slash')
Out[18]:
In [19]:
hash = ''.join(c for c in m.group('hash_with_slash') if c != '/')
hash
Out[19]:
In [20]:
s
Out[20]:
In [21]:
s = '8e/28241360c472576e8caa944253d4af368d9081'
m = git_pattern.match(s)
m
In [22]:
repr(m)
Out[22]: