In [1]:
"""
Exercise 1, p. 151 - Using \d to find digits.

:return:
"""
import re
PhoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo = PhoneNumRegex.search('My phone number is 415-555-1212')
print('Phone number found: ' + mo.group())


Phone number found: 415-555-1212

In [2]:
"""
Exercise 2 - p. 152 - Grouping with parentheses.

:return:
"""
import re
PhoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
mo = PhoneNumRegex.search('My phone number is 415-555-1212')
mo.group(1)
print(mo.group(2))
print(mo.group(0))
print(mo.groups())
areaCode, mainNumber = mo.groups()
print (areaCode)
print (mainNumber)


555-1212
415-555-1212
('415', '555-1212')
415
555-1212

In [3]:
"""
Exercise 3 - p 153 - Escaping parentheses.

:return:
"""
import re
PhoneNumRegex = re.compile(r'(\(\d\d\d\)) (\d\d\d-\d\d\d\d)')
mo = PhoneNumRegex.search('My phone number is (415) 555-1212')
print(mo.group(1))
print(mo.group(2))


(415)
555-1212

In [4]:
"""
Exercise 4 - p. 153 - Matching multiple groups with the pipe.

:return:
"""
import re
heroRegex = re.compile(r'Batman|Tina Fey')
mo1 = heroRegex.search('Batman and Tina Fey.')
print(mo1.group())
mo2 = heroRegex.search('Tina Fey and Batman.')
print(mo2.group())


Batman
Tina Fey

In [5]:
"""
Exercise 5 - p. 154 - The pipe and grouping.

:return:
"""
import re
batRegex = re.compile(r'Bat(man|mobile|copter|bat)')
mo = batRegex.search('Batmobile lost a wheel')
print(mo.group())
print(mo.group(1))


Batmobile
mobile

In [6]:
"""
Exercise 6 - p. 154 - Optional matching with the question mark.

:return:
"""
import re
batRegex = re.compile(r'Bat(wo)?man')
mo1 = batRegex.search('The Adventures of Batman')
print(mo1.group())
mo2 = batRegex.search('The Adventures of Batwoman')
print(mo2.group())
PhoneNumRegex = re.compile(r'(\d\d\d-)?\d\d\d-\d\d\d\d')
mo3 = PhoneNumRegex.search('My phone number is 415-555-1212')
print(mo3.group())
mo4 = PhoneNumRegex.search('My phone number is 555-1212')
print(mo4.group())


Batman
Batwoman
415-555-1212
555-1212

In [7]:
"""
Exercise 7 - p. 155 - Matching zero or more with the star.

:return:
"""
import re
batRegex = re.compile(r'Bat(wo)*man')
mo1 = batRegex.search('The Adventures of Batman')
print(mo1.group())
mo2 = batRegex.search('The Adventures of Batwoman')
print(mo2.group())
mo3 = batRegex.search('The Adventures of Batwowowowowoman')
print(mo3.group())


Batman
Batwoman
Batwowowowowoman

In [8]:
"""
Exercise 8 - p. 155 - Matching one or more with the plus.

:return:
"""
import re
batRegex = re.compile(r'Bat(wo)+man')
mo1 = batRegex.search('The Adventures of Batwoman')
print(mo1.group())
mo2 = batRegex.search('The Adventures of Batwowowowowoman')
print(mo2.group())
mo3 = batRegex.search('The Adventures of Batman')
print(mo3 == None)


Batwoman
Batwowowowowoman
True

In [9]:
"""
Exercise 9 - p. 156 - Matching specific repetitions with curly
brackets.

:return:
"""
import re
haRegex = re.compile(r'(Ha){3}')
mo1 = haRegex.search('HaHaHa')
print(mo1.group())
mo2 = haRegex.search('Ha')
print(mo2 == None)


HaHaHa
True

In [10]:
"""
Exercise 10 - p. 156-157 - Greedy and nongreedy matching
brackets.

:return:
"""
import re
greedyhaRegex = re.compile(r'(Ha){3,5}')
mo1 = greedyhaRegex.search('HaHaHaHaHa')
print(mo1.group())

nonGreedyhaRegex = re.compile(r'(Ha){3,5}?')
mo2 = nonGreedyhaRegex.search('HaHaHaHaHa')
print(mo2.group())


HaHaHaHaHa
HaHaHa

In [11]:
"""
Exercise 11 - p. 157 - Findall method.

:return:
"""
import re
# search results
PhoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo1 = PhoneNumRegex.search('Cell: 415-555-9999 Work: 415-555-0000')
print(mo1.group())

# findall results (no groups defined)
PhoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d') # has no groups
mo2 = PhoneNumRegex.findall('Cell: 415-555-9999 Work: 415-555-0000')
print(mo2)      # returns list of strings with no groups

# findall results (groups defined)
PhoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)')  # groups
mo3 = PhoneNumRegex.findall('Cell: 415-555-9999 Work: 415-555-0000')
print(mo3)      # returns list of tuples when groups are defined


415-555-9999
['415-555-9999', '415-555-0000']
[('415', '555', '9999'), ('415', '555', '0000')]

In [12]:
"""
Exercise 12 - p. 158 - Character classes - predefined shorthand.

=========================  ===========================================
Shorthand character class  Represents
=========================  ===========================================
\d                         Any numeric digit from 0 to 9.
\0                         Any character that is not a numeric digit
                           from 0 to 9
\w                         Any letter, numeric digit, or the underscore
                           character.  (Think of this as matching
                           "word" characters.)
\W                         Any character that is not a letter,
                           numeric digit, or the underscore character.
\s                         Any space, tab, or newline character.  (
                           Think of this as matching white-space
                           characters.)
\S                         Any character that is not a space, tab,
                           or newline.
=========================  ===========================================
:return:
"""
xmasRegex = re.compile(r'\d+\s\w+')
import re
mo = xmasRegex.findall(('12 drummers, 11 pipers, 10 lords, 9 ladies, '
                        '8 maids, 7 swans, 6 geese, 5 rings, '
                        '4 hummingbirds, 3 hens, 2 turtledoves, '
                        '1 partridge'))
print(mo)


['12 drummers', '11 pipers', '10 lords', '9 ladies', '8 maids', '7 swans', '6 geese', '5 rings', '4 hummingbirds', '3 hens', '2 turtledoves', '1 partridge']

In [13]:
"""
Exercise 13 - p. 159 - Making your own character classes.
:return:
"""
import re
# all vowels
vowelRegex = re.compile(r'[aeiouAEIOU]')
mo1 = vowelRegex.findall('Robocop eats baby food.  BABY FOOD.')
print(mo1)

# all non-vowels (exclude charactes)
consonantRegex = re.compile(r'[^aeiouAEIOU]')
mo2 = consonantRegex.findall('Robocop eats baby food.  BABY FOOD.')
print(mo2)


['o', 'o', 'o', 'e', 'a', 'a', 'o', 'o', 'A', 'O', 'O']
['R', 'b', 'c', 'p', ' ', 't', 's', ' ', 'b', 'b', 'y', ' ', 'f', 'd', '.', ' ', ' ', 'B', 'B', 'Y', ' ', 'F', 'D', '.']

In [14]:
"""
Exercise 14 - p. 159-160 Carat - beginning of line character.

:return:
"""
import re
# match when Hello at beginning of string
beginsWithHello = re.compile(r'^Hello')
mo1 = beginsWithHello.search('Hello world!')
print(mo1.group())

# no match when Hello is not at the beginning
mo2 = beginsWithHello.search('He said "Hello"')
print(mo2 == None)


Hello
True

In [15]:
"""
Exercise 15 - p. 160 - Dollar Sign - End of line character.

:return:
"""
import re
endsWithNumber = re.compile(r']d$')
mo1 = endsWithNumber.search('Your number is 42')
print(mo1)
mo2 = endsWithNumber.search('Your number is forty two')
print(mo2 == None)


None
True

In [16]:
"""
Exercise 16 - p. 160 - Match whole string.

:return:
"""
import re
wholeStringIsNum = re.compile(r'^\d+$')
mo1 = wholeStringIsNum.search('1234567890')
print(mo1.group())
mo2 = wholeStringIsNum.search('12345xyz67890')
print(mo2 == None)
mo3 = wholeStringIsNum.search('12345  67890')
print(mo3 == None)


1234567890
True
True

In [17]:
"""
Exercise 17 - p. 160-161 - Period - Wildcard character.

:return:
"""
import re
atRegex = re.compile(r'.at')
mo = atRegex.findall('The cat in the hat sat flat on the mat.')
print(mo)


['cat', 'hat', 'sat', 'lat', 'mat']

In [18]:
"""
Exercise 18 - p. 161 - Match everything with dot star.

:return:
"""
import re
nameRegex = re.compile(r'First Name: (.*) Last Name: (.*)')
mo = nameRegex.search('First Name: Travis Last Name: Risner')
print(mo.group(1))
print(mo.group(2))


Travis
Risner

In [19]:
"""
Exercise 19 - p. 161 - Greedy vs. nongreedy matching.
:return:
"""
import re
phrase = '<To serve man> for dinner.>'

# Greedy match
greedyRegex = re.compile(r'<.*>')
mo1 = greedyRegex.search(phrase)
print(mo1.group())

# Nongreedy match
nongreedyRegex = re.compile(r'<.*?>')
mo2 = nongreedyRegex.search(phrase)
print(mo2.group())


<To serve man> for dinner.>
<To serve man>

In [20]:
"""
Exercise 20 - p. 164 - Verbose mode for complex regexes.
:return:
"""
import re
phrase = '123-4567 or 123.456.7890 or (999) 456-7890 or ' \
         '(987)654-3210 or 888 456 7890 ext 12345'
nonVerboseRegex = re.compile(r'((\d{3}|\(\d{3}\))?(\s|-|\.)?\d{3}'
                             r'(\s|-|\.)\d{4}(\s*(ext|x|ext.)'
                             r'\s*\d{2,5})?)')
mo1 = nonVerboseRegex.findall(phrase)
print(mo1)
verboseRegex = re.compile(r'''
    (             # start group to capture the phone number
    (             #  start of optional area code choices
    \d{3}         #   bare three digits
    |             #   or
    \(\d{3}\)     #   three digits enclosed in parentheses
    )?            #  end of optional area code choices
    (             #  start of optional separator
    \s|-|\.       #   separator can be whitespace, dash or period
    )?            #  end of optional separator
    \d{3}         #  exchange number (required)
    (\s|-|\.)     #  same separator but required this time
    \d{4}         #  final digits (required)
    (             #  start of optional extension
    \s*           #   zero or more characters of white space
    (             #   start of extention indicator
    ext|x|ext.    #    extention can be indicated by "ext", "x", or
                  #      "ext" followed by any character
    )             #   end of extension indicator
    \s*           #   zero or more characters of white space
    \d{2,5}       #   two to five digits of extension number
    )?            #  end of optional estension
    )             # end phone number capture group
    ''', re.VERBOSE)
mo2 = verboseRegex.findall(phrase)
print(mo2)


[('123-4567', '', '', '-', '', ''), ('123.456.7890', '123', '.', '.', '', ''), ('(999) 456-7890', '(999)', ' ', '-', '', ''), ('(987)654-3210', '(987)', '', '-', '', ''), ('888 456 7890 ext 12345', '888', ' ', ' ', ' ext 12345', 'ext')]
[('123-4567', '', '', '-', '', ''), ('123.456.7890', '123', '.', '.', '', ''), ('(999) 456-7890', '(999)', ' ', '-', '', ''), ('(987)654-3210', '(987)', '', '-', '', ''), ('888 456 7890 ext 12345', '888', ' ', ' ', ' ext 12345', 'ext')]

In [ ]: