If you've heard about regular expressions before, you probably know how powerful they are; if you haven't, prepare to be amazed.
You should note, however, that mastering regular expressions may be a bit tricky at first. Okay, very tricky actually. (Beginning Python: From Novice to Professional)
In [ ]:
import re
nameString = \
"""Wendy, Brian, Karen, Charlene, Jeff.
wendy, brian, Karen, charlene, jeff"""
r1 = re.compile(r"""Brian""")
print (r1.findall(nameString))
In [ ]:
r1 = re.compile(r"""Brian""")
print (r1.findall(nameString))
r2 = re.compile(r"""Brian""", re.IGNORECASE)
print (r2.findall(nameString))
r3 = re.compile(r"""Brian""",re.I) # alias for IGNORECASE
print (r3.findall(nameString))
Square brackets are used to specify a "character class, which is a set of characters that you wish to match."
[abc] [a-c] [a-z] [a-zA-Z] [a-zA-Z0-9]
In [ ]:
test = """abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN
OPQRSTUVWXYZ0123456789"""
r4 = re.compile(r"""[abc]""")
print (r4.findall(test))
In [ ]:
r5 = re.compile(r"""[a-c]""")
print (r5.findall(test))
In [ ]:
r6 = re.compile(r"""[a-z]""")
print (r6.findall(test))
In [ ]:
r7 = re.compile(r"""[a-zA-Z0-9]""")
print (r7.findall(test))
The number of characters in a class can be specified by metacharacters
QUESTION: How would you express *,+, and ? with m,n syntax?
In [ ]:
r8 = re.compile(r"""a[bcd]*b""",re.I)
r9 = re.compile(r"""a{1,3}b""")
print (r8.findall(test))
print (r9.findall(test))
print(r9.findall("""aabaaabab"""))
In [ ]:
r10 = re.compile(r"""[^A-Z]""")
print (r10.findall(test))
In [ ]:
r11 = re.compile(r"\*")
print (r11.findall("""The range of the function f1* is [0,12]
and the domain of f1* is [0,144]"""))
In [ ]:
r12 = re.compile(r"\d")
print (r12.findall("My work address is 729 Arapeen Drive, Salt Lake City, UT, 84108."))
In [ ]:
r13 = re.compile(r"\D")
print (r13.findall("My work address is 729 Arapeen Drive, Salt Lake City, UT, 84108.")
)
In [ ]:
r14 = re.compile(r"\s")
result14= r14.findall("My work address is 729 Arapeen Drive, Salt Lake City, UT, 84108.")
print (result14)
r15 = re.compile(r"[ \t\n\r\f\v]")
result15 = r15.findall("My work address is 729 Arapeen Drive, Salt Lake City, UT, 84108.")
print (result15)
print( result14 == result15)
In [ ]:
r16 = re.compile(r"\S")
r17 = re.compile(r"[^ \t\n\r\f\v]")
result16 = r16.findall("My work address is 729 Arapeen Drive, Salt Lake City, UT, 84108.")
result17 = r17.findall("My work address is 729 Arapeen Drive, Salt Lake City, UT, 84108.")
print (result16)
print (result17)
print (result16 == result17)
In [ ]:
r18 = re.compile(r"\w")
r19 = re.compile(r"[a-zA-Z0-9_]")
result18 = r18.findall("My work address is 729 Arapeen Drive, Salt Lake City, UT, 84108.")
result19 = r19.findall("My work address is 729 Arapeen Drive, Salt Lake City, UT, 84108.")
print (result18)
print (result19)
print (result18 == result19)
In [ ]:
r18 = re.compile(r"\W")
r19 = re.compile(r"[^a-zA-Z0-9_]")
result18 = r18.findall("My work address is 729 Arapeen Drive, Salt Lake City, UT, 84108.")
result19 = r19.findall("My work address is 729 Arapeen Drive, Salt Lake City, UT, 84108.")
print (result18)
print (result19)
print (result18 == result19)
print (',' in result18)
In [ ]:
address = "My work address is 729 Arapeen Drive, Salt Lake City, UT, 84108."
findZipcode = re.compile(r"""\d{5,5}""")
findZipcode2 = re.compile(r"""[0-9]{5,5}""")
print (findZipcode.findall(address))
print (findZipcode2.findall(address))
In [ ]:
txt = open("../Resources/contact.html","r").read()
findPhone = re.compile(r"""[0-9]{3,3}-\d{4,4}|[0-9]{3,3}-[0-9]{3,3}-\d{4,4}""")
print (findPhone.findall(txt))
In [ ]:
findName1 = re.compile(r"""[A-Z][a-z]+\s+[A-Z][a-z]+""")
#print findName1.findall("Brian Chapman, Wendy Chapman, Jeremiah Chapman")
names1 = findName1.findall(txt)
print (names1[:80])
In [ ]:
findName2 = re.compile(r"""[A-Z][a-z]*,\s+[A-Z][a-z]*""")
names2 = findName2.findall(txt)
print (names2[:20])
In [ ]:
testString = """Brian has a nephew named Ben. Br. Chapman died yesterday. Brian Chapman Brian E. Chapman Brian Earl Chapman Wendy Webber Chapman Clare 1234 4321.1234
python python.org http://python.org www.python.org jython zython Brad Bob cpython brian http://www.python.org perl Perl PERL"""
In [ ]:
rEx1 = re.compile(r"""[a-zA-Z]*ython""")
print rEx1.findall(testString)
In [ ]:
rEx2 = re.compile(r"""Brian""",re.I)
print rEx2.findall(testString)
In [ ]:
rEx3 = re.compile(r"""jython|python|ziggy|zoom""",re.I) # this illustrates an OR
print rEx3.findall(testString)
In [ ]:
rEx4 = re.compile(r"""python|perl""") # this illustrates an OR
rEx4b = re.compile(r"""python|perl""",re.IGNORECASE) # this illustrates an OR
print rEx4.findall(testString)
print rEx4b.findall(testString)
In [ ]:
rEx5 = re.compile(r"""B[a-z]*""") # this illustrates an AND
print rEx5.findall(testString)
In [ ]: