In [ ]:
"""
Exercise 1, p. 151 - Using \d to find digits.
"""
import re
PhoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo = PhoneNumRegex.search('My phone number is 415-555-1212')
print('Phone number found: ' + mo.group())
In [ ]:
In [ ]:
"""
Exercise 2 - p. 152 - Grouping with parentheses.
"""
import re
PhoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
mo = PhoneNumRegex.search('My phone number is 415-555-1212')
print(mo.group(1))
print(mo.group(2))
print(mo.group(0))
print(mo.groups())
areaCode, mainNumber = mo.groups()
print (areaCode)
print (mainNumber)
In [ ]:
In [ ]:
"""
Exercise 3 - p 153 - Escaping parentheses.
"""
import re
PhoneNumRegex = re.compile(r'(\(\d\d\d\)) (\d\d\d-\d\d\d\d)')
mo = PhoneNumRegex.search('My phone number is (415) 555-1212')
print(mo.group(1))
print(mo.group(2))
In [ ]:
In [ ]:
"""
Exercise 4 - p. 153 - Matching multiple groups with the pipe.
"""
import re
heroRegex = re.compile(r'Batman|Tina Fey')
mo1 = heroRegex.search('Batman and Tina Fey.')
print(mo1.group())
mo2 = heroRegex.search('Tina Fey and Batman.')
print(mo2.group())
In [ ]:
In [ ]:
"""
Exercise 5 - p. 154 - The pipe and grouping.
"""
import re
batRegex = re.compile(r'Bat(man|mobile|copter|bat)')
mo = batRegex.search('Batmobile lost a wheel')
print(mo.group())
print(mo.group(1))
In [ ]:
In [ ]:
"""
Exercise 6 - p. 154 - Optional matching with the question mark.
"""
import re
batRegex = re.compile(r'Bat(wo)?man')
mo1 = batRegex.search('The Adventures of Batman')
print(mo1.group())
mo2 = batRegex.search('The Adventures of Batwoman')
print(mo2.group())
PhoneNumRegex = re.compile(r'(\d\d\d-)?\d\d\d-\d\d\d\d')
mo3 = PhoneNumRegex.search('My phone number is 415-555-1212')
print(mo3.group())
mo4 = PhoneNumRegex.search('My phone number is 555-1212')
print(mo4.group())
In [ ]:
In [ ]:
"""
Exercise 7 - p. 155 - Matching zero or more with the star.
"""
import re
batRegex = re.compile(r'Bat(wo)*man')
mo1 = batRegex.search('The Adventures of Batman')
print(mo1.group())
mo2 = batRegex.search('The Adventures of Batwoman')
print(mo2.group())
mo3 = batRegex.search('The Adventures of Batwowowowowoman')
print(mo3.group())
In [ ]:
In [ ]:
"""
Exercise 8 - p. 155 - Matching one or more with the plus.
"""
import re
batRegex = re.compile(r'Bat(wo)+man')
mo1 = batRegex.search('The Adventures of Batwoman')
print(mo1.group())
mo2 = batRegex.search('The Adventures of Batwowowowowoman')
print(mo2.group())
mo3 = batRegex.search('The Adventures of Batman')
print(mo3 == None)
In [ ]:
In [ ]:
"""
Exercise 9 - p. 156 - Matching specific repetitions with curly
brackets.
"""
import re
haRegex = re.compile(r'(Ha){3}')
mo1 = haRegex.search('HaHaHa')
print(mo1.group())
mo2 = haRegex.search('Ha')
print(mo2 == None)
In [ ]:
In [ ]:
"""
Exercise 10 - p. 156-157 - Greedy and nongreedy matching
brackets.
"""
import re
greedyhaRegex = re.compile(r'(Ha){3,5}')
mo1 = greedyhaRegex.search('HaHaHaHaHa')
print(mo1.group())
nonGreedyhaRegex = re.compile(r'(Ha){3,5}?')
mo2 = nonGreedyhaRegex.search('HaHaHaHaHa')
print(mo2.group())
In [ ]:
In [ ]:
"""
Exercise 11 - p. 157 - Findall method.
"""
import re
# search results
PhoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo1 = PhoneNumRegex.search('Cell: 415-555-9999 Work: 415-555-0000')
print(mo1.group())
# findall results (no groups defined)
PhoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d') # has no groups
mo2 = PhoneNumRegex.findall('Cell: 415-555-9999 Work: 415-555-0000')
print(mo2) # returns list of strings with no groups
# findall results (groups defined)
PhoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)') # groups
mo3 = PhoneNumRegex.findall('Cell: 415-555-9999 Work: 415-555-0000')
print(mo3) # returns list of tuples when groups are defined
In [ ]:
In [ ]:
"""
Exercise 12 - p. 158 - Character classes - predefined shorthand.
========================= ===========================================
Shorthand character class Represents
========================= ===========================================
\d Any numeric digit from 0 to 9.
\0 Any character that is not a numeric digit
from 0 to 9
\w Any letter, numeric digit, or the underscore
character. (Think of this as matching
"word" characters.)
\W Any character that is not a letter,
numeric digit, or the underscore character.
\s Any space, tab, or newline character. (
Think of this as matching white-space
characters.)
\S Any character that is not a space, tab,
or newline.
========================= ===========================================
:return:
"""
xmasRegex = re.compile(r'\d+\s\w+')
import re
mo = xmasRegex.findall(('12 drummers, 11 pipers, 10 lords, 9 ladies, '
'8 maids, 7 swans, 6 geese, 5 rings, '
'4 hummingbirds, 3 hens, 2 turtledoves, '
'1 partridge'))
print(mo)
In [ ]:
In [ ]:
"""
Exercise 13 - p. 159 - Making your own character classes.
:return:
"""
import re
# all vowels
vowelRegex = re.compile(r'[aeiouAEIOU]')
mo1 = vowelRegex.findall('Robocop eats baby food. BABY FOOD.')
print(mo1)
# all non-vowels (exclude charactes)
consonantRegex = re.compile(r'[^aeiouAEIOU]')
mo2 = consonantRegex.findall('Robocop eats baby food. BABY FOOD.')
print(mo2)
In [ ]:
In [ ]:
"""
Exercise 14 - p. 159-160 Carat - beginning of line character.
"""
import re
# match when Hello at beginning of string
beginsWithHello = re.compile(r'^Hello')
mo1 = beginsWithHello.search('Hello world!')
print(mo1.group())
# no match when Hello is not at the beginning
mo2 = beginsWithHello.search('He said "Hello"')
print(mo2 == None)
In [ ]:
In [ ]:
"""
Exercise 15 - p. 160 - Dollar Sign - End of line character.
"""
import re
endsWithNumber = re.compile(r'\d$')
mo1 = endsWithNumber.search('Your number is 42')
print(mo1.group())
mo2 = endsWithNumber.search('Your number is forty two')
print(mo2 == None)
In [ ]:
In [ ]:
"""
Exercise 16 - p. 160 - Match whole string.
"""
import re
wholeStringIsNum = re.compile(r'^\d+$')
mo1 = wholeStringIsNum.search('1234567890')
print(mo1.group())
mo2 = wholeStringIsNum.search('12345xyz67890')
print(mo2 == None)
mo3 = wholeStringIsNum.search('12345 67890')
print(mo3 == None)
In [ ]:
In [ ]:
"""
Exercise 17 - p. 160-161 - Period - Wildcard character.
"""
import re
atRegex = re.compile(r'.at')
mo = atRegex.findall('The cat in the hat sat flat on the mat.')
print(mo)
In [ ]:
In [ ]:
"""
Exercise 18 - p. 161 - Match everything with dot star.
"""
import re
nameRegex = re.compile(r'First Name: (.*) Last Name: (.*)')
mo = nameRegex.search('First Name: Travis Last Name: Risner')
print(mo.group(1))
print(mo.group(2))
In [ ]:
In [ ]:
"""
Exercise 19 - p. 161 - Greedy vs. nongreedy matching.
:return:
"""
import re
phrase = '<To serve man> for dinner.>'
# Greedy match
greedyRegex = re.compile(r'<.*>')
mo1 = greedyRegex.search(phrase)
print(mo1.group())
# Nongreedy match
nongreedyRegex = re.compile(r'<.*?>')
mo2 = nongreedyRegex.search(phrase)
print(mo2.group())
In [ ]:
In [ ]:
"""
Exercise 20 - p. 164 - Verbose mode for complex regexes.
:return:
"""
import re
phrase = '123-4567 or 123.456.7890 or (999) 456-7890 or ' \
'(987)654-3210 or 888 456 7890 ext 12345'
nonVerboseRegex = re.compile(r'((\d{3}|\(\d{3}\))?(\s|-|\.)?\d{3}'
r'(\s|-|\.)\d{4}(\s*(ext|x|ext.)'
r'\s*\d{2,5})?)')
mo1 = nonVerboseRegex.findall(phrase)
print(mo1)
verboseRegex = re.compile(r'''
( # start group to capture the phone number
( # start of optional area code choices
\d{3} # bare three digits
| # or
\( # literal left parenthesis
\d{3} # three digit area code surrounded by parenthesis
\) # literal right parenthesis
)? # end of optional area code choices
( # start of optional separator
\s|-|\. # separator can be whitespace, dash or period
)? # end of optional separator
\d{3} # exchange number (required)
(\s|-|\.) # same separator but required this time
\d{4} # final digits (required)
( # start of optional extension
\s* # zero or more characters of white space
( # start of extention indicator
ext|x|ext. # extention can be indicated by "ext", "x", or
# "ext" followed by any character
) # end of extension indicator
\s* # zero or more characters of white space
\d{2,5} # two to five digits of extension number
)? # end of optional estension
) # end phone number capture group
''', re.VERBOSE)
mo2 = verboseRegex.findall(phrase)
print(mo2)
In [ ]: