notebook.community

Edit and run



In [ ]:

    
"""
Exercise 1, p. 151 - Using \d to find digits.
"""
import re
PhoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo = PhoneNumRegex.search('My phone number is 415-555-1212')
print('Phone number found: ' + mo.group())



In [ ]:



In [ ]:

    
"""
Exercise 2 - p. 152 - Grouping with parentheses.
"""
import re
PhoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
mo = PhoneNumRegex.search('My phone number is 415-555-1212')
print(mo.group(1))
print(mo.group(2))
print(mo.group(0))
print(mo.groups())
areaCode, mainNumber = mo.groups()
print (areaCode)
print (mainNumber)



In [ ]:



In [ ]:

    
"""
Exercise 3 - p 153 - Escaping parentheses.
"""
import re
PhoneNumRegex = re.compile(r'(\(\d\d\d\)) (\d\d\d-\d\d\d\d)')
mo = PhoneNumRegex.search('My phone number is (415) 555-1212')
print(mo.group(1))
print(mo.group(2))



In [ ]:



In [ ]:

    
"""
Exercise 4 - p. 153 - Matching multiple groups with the pipe.
"""
import re
heroRegex = re.compile(r'Batman|Tina Fey')
mo1 = heroRegex.search('Batman and Tina Fey.')
print(mo1.group())
mo2 = heroRegex.search('Tina Fey and Batman.')
print(mo2.group())



In [ ]:



In [ ]:

    
"""
Exercise 5 - p. 154 - The pipe and grouping.
"""
import re
batRegex = re.compile(r'Bat(man|mobile|copter|bat)')
mo = batRegex.search('Batmobile lost a wheel')
print(mo.group())
print(mo.group(1))



In [ ]:



In [ ]:

    
"""
Exercise 6 - p. 154 - Optional matching with the question mark.
"""
import re
batRegex = re.compile(r'Bat(wo)?man')
mo1 = batRegex.search('The Adventures of Batman')
print(mo1.group())
mo2 = batRegex.search('The Adventures of Batwoman')
print(mo2.group())
PhoneNumRegex = re.compile(r'(\d\d\d-)?\d\d\d-\d\d\d\d')
mo3 = PhoneNumRegex.search('My phone number is 415-555-1212')
print(mo3.group())
mo4 = PhoneNumRegex.search('My phone number is 555-1212')
print(mo4.group())



In [ ]:



In [ ]:

    
"""
Exercise 7 - p. 155 - Matching zero or more with the star.
"""
import re
batRegex = re.compile(r'Bat(wo)*man')
mo1 = batRegex.search('The Adventures of Batman')
print(mo1.group())
mo2 = batRegex.search('The Adventures of Batwoman')
print(mo2.group())
mo3 = batRegex.search('The Adventures of Batwowowowowoman')
print(mo3.group())



In [ ]:



In [ ]:

    
"""
Exercise 8 - p. 155 - Matching one or more with the plus.
"""
import re
batRegex = re.compile(r'Bat(wo)+man')
mo1 = batRegex.search('The Adventures of Batwoman')
print(mo1.group())
mo2 = batRegex.search('The Adventures of Batwowowowowoman')
print(mo2.group())
mo3 = batRegex.search('The Adventures of Batman')
print(mo3 == None)



In [ ]:



In [ ]:

    
"""
Exercise 9 - p. 156 - Matching specific repetitions with curly
brackets.
"""
import re
haRegex = re.compile(r'(Ha){3}')
mo1 = haRegex.search('HaHaHa')
print(mo1.group())
mo2 = haRegex.search('Ha')
print(mo2 == None)



In [ ]:



In [ ]:

    
"""
Exercise 10 - p. 156-157 - Greedy and nongreedy matching
brackets.
"""
import re
greedyhaRegex = re.compile(r'(Ha){3,5}')
mo1 = greedyhaRegex.search('HaHaHaHaHa')
print(mo1.group())

nonGreedyhaRegex = re.compile(r'(Ha){3,5}?')
mo2 = nonGreedyhaRegex.search('HaHaHaHaHa')
print(mo2.group())



In [ ]:



In [ ]:

    
"""
Exercise 11 - p. 157 - Findall method.
"""
import re
# search results
PhoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo1 = PhoneNumRegex.search('Cell: 415-555-9999 Work: 415-555-0000')
print(mo1.group())

# findall results (no groups defined)
PhoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d') # has no groups
mo2 = PhoneNumRegex.findall('Cell: 415-555-9999 Work: 415-555-0000')
print(mo2)      # returns list of strings with no groups

# findall results (groups defined)
PhoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)')  # groups
mo3 = PhoneNumRegex.findall('Cell: 415-555-9999 Work: 415-555-0000')
print(mo3)      # returns list of tuples when groups are defined



In [ ]:



In [ ]:

    
"""
Exercise 12 - p. 158 - Character classes - predefined shorthand.

=========================  ===========================================
Shorthand character class  Represents
=========================  ===========================================
\d                         Any numeric digit from 0 to 9.
\0                         Any character that is not a numeric digit
                           from 0 to 9
\w                         Any letter, numeric digit, or the underscore
                           character.  (Think of this as matching
                           "word" characters.)
\W                         Any character that is not a letter,
                           numeric digit, or the underscore character.
\s                         Any space, tab, or newline character.  (
                           Think of this as matching white-space
                           characters.)
\S                         Any character that is not a space, tab,
                           or newline.
=========================  ===========================================
:return:
"""
xmasRegex = re.compile(r'\d+\s\w+')
import re
mo = xmasRegex.findall(('12 drummers, 11 pipers, 10 lords, 9 ladies, '
                        '8 maids, 7 swans, 6 geese, 5 rings, '
                        '4 hummingbirds, 3 hens, 2 turtledoves, '
                        '1 partridge'))
print(mo)



In [ ]:



In [ ]:

    
"""
Exercise 13 - p. 159 - Making your own character classes.
:return:
"""
import re
# all vowels
vowelRegex = re.compile(r'[aeiouAEIOU]')
mo1 = vowelRegex.findall('Robocop eats baby food.  BABY FOOD.')
print(mo1)

# all non-vowels (exclude charactes)
consonantRegex = re.compile(r'[^aeiouAEIOU]')
mo2 = consonantRegex.findall('Robocop eats baby food.  BABY FOOD.')
print(mo2)



In [ ]:



In [ ]:

    
"""
Exercise 14 - p. 159-160 Carat - beginning of line character.
"""
import re
# match when Hello at beginning of string
beginsWithHello = re.compile(r'^Hello')
mo1 = beginsWithHello.search('Hello world!')
print(mo1.group())

# no match when Hello is not at the beginning
mo2 = beginsWithHello.search('He said "Hello"')
print(mo2 == None)



In [ ]:



In [ ]:

    
"""
Exercise 15 - p. 160 - Dollar Sign - End of line character.
"""
import re
endsWithNumber = re.compile(r'\d$')
mo1 = endsWithNumber.search('Your number is 42')
print(mo1.group())
mo2 = endsWithNumber.search('Your number is forty two')
print(mo2 == None)



In [ ]:



In [ ]:

    
"""
Exercise 16 - p. 160 - Match whole string.
"""
import re
wholeStringIsNum = re.compile(r'^\d+$')
mo1 = wholeStringIsNum.search('1234567890')
print(mo1.group())
mo2 = wholeStringIsNum.search('12345xyz67890')
print(mo2 == None)
mo3 = wholeStringIsNum.search('12345  67890')
print(mo3 == None)



In [ ]:



In [ ]:

    
"""
Exercise 17 - p. 160-161 - Period - Wildcard character.
"""
import re
atRegex = re.compile(r'.at')
mo = atRegex.findall('The cat in the hat sat flat on the mat.')
print(mo)



In [ ]:



In [ ]:

    
"""
Exercise 18 - p. 161 - Match everything with dot star.
"""
import re
nameRegex = re.compile(r'First Name: (.*) Last Name: (.*)')
mo = nameRegex.search('First Name: Travis Last Name: Risner')
print(mo.group(1))
print(mo.group(2))



In [ ]:



In [ ]:

    
"""
Exercise 19 - p. 161 - Greedy vs. nongreedy matching.
:return:
"""
import re
phrase = '<To serve man> for dinner.>'

# Greedy match
greedyRegex = re.compile(r'<.*>')
mo1 = greedyRegex.search(phrase)
print(mo1.group())

# Nongreedy match
nongreedyRegex = re.compile(r'<.*?>')
mo2 = nongreedyRegex.search(phrase)
print(mo2.group())



In [ ]:



In [ ]:

    
"""
Exercise 20 - p. 164 - Verbose mode for complex regexes.
:return:
"""
import re
phrase = '123-4567 or 123.456.7890 or (999) 456-7890 or ' \
         '(987)654-3210 or 888 456 7890 ext 12345'
nonVerboseRegex = re.compile(r'((\d{3}|\(\d{3}\))?(\s|-|\.)?\d{3}'
                             r'(\s|-|\.)\d{4}(\s*(ext|x|ext.)'
                             r'\s*\d{2,5})?)')
mo1 = nonVerboseRegex.findall(phrase)
print(mo1)
verboseRegex = re.compile(r'''
    (              # start group to capture the phone number
     (             #  start of optional area code choices
      \d{3}        #   bare three digits
      |            #   or
      \(           #   literal left parenthesis
      \d{3}        #   three digit area code surrounded by parenthesis
      \)           #   literal right parenthesis
     )?            #  end of optional area code choices
     (             #  start of optional separator
      \s|-|\.      #   separator can be whitespace, dash or period
     )?            #  end of optional separator
     \d{3}         #  exchange number (required)
     (\s|-|\.)     #  same separator but required this time
     \d{4}         #  final digits (required)
     (             #  start of optional extension
      \s*          #   zero or more characters of white space
      (            #   start of extention indicator
       ext|x|ext.  #    extention can be indicated by "ext", "x", or
                   #      "ext" followed by any character
      )            #   end of extension indicator
      \s*          #   zero or more characters of white space
      \d{2,5}      #   two to five digits of extension number
     )?            #  end of optional estension
    )              # end phone number capture group
    ''', re.VERBOSE)
mo2 = verboseRegex.findall(phrase)
print(mo2)



In [ ]: