Notes for the regex talk @ PyCon 2016 by Trey Hunner

... Below code tested using Python v2.7


In [2]:
# -*- coding:utf-8 -*-

#!/usr/bin/env python

import re

greeting = "Text whispering..."

print "x" in greeting


True

In [4]:
print(re.search(r"per", greeting))

rematch = re.search(r"per", greeting)

print rematch.group()


<_sre.SRE_Match object at 0x02F52170>
per

In [5]:
print(any(c in greeting for c in 'aeiou'))


True

In [9]:
print greeting
char_class = re.search(r'[aeiou]', "greeting")
print(char_class.group())


Text whispering...
e

In [16]:
continent = "asia"
print("Rising continent : %s" %(continent))

# --> match any string starting with a, containing any number of 
# character class in the range a-z and ending with a
m = re.search(r"^a[a-z]\w+a$", continent) 
print(m.group())


Rising continent : asia
asia

In [23]:
samp = "a"

# --> This will match strings, such as, a and ONLY a
m = re.search(r"^a$", samp)
print(m.group())


a

In [32]:
## . -> dot or period represents a single character

rhyme = "baba"
stage = "baby"

m_rhyme = re.search(r'a.{2}$', rhyme)
print(m_rhyme.group())

mstar = re.search(r'a.*a$', rhyme)
print(mstar.group())

m_stage = re.search(r'a.\w+$', stage)
print(m_stage.group())

hum = re.search(r'a.*$', stage)
print(hum.group())


aba
aba
aby
aby
(i) m_rhyme and mstar achieve the same, end result is aba

=====================

(ii) m_stage and hum are repeat of the above regex

In [38]:
# How to use the ., *, +, ? quantifiers, that matches a single character
# either zero or more (OR) one or more time (OR) zero or one time.

doctor = "srinivasan"
hrmgr = "srivathsan"

drx = re.search(r'va?sa+n', doctor)
print(drx.group())

hrx = re.search(r'va.*sa?n', hrmgr)
print(hrx.group())


vasan
vathsan

In [53]:
# word match and word character class

sweetie = "Letters_to_my_daughter"

s = re.search(r'^[a-zA-Z0-9_].*$', sweetie)
print(s.group())

sidiom = re.search(r'\w*', sweetie)
print(sidiom.group())


Letters_to_my_daughter
Letters_to_my_daughter

In [62]:
# substring using start and end

line = "You never walk alone"

mline = re.search(r'\bw[a-z]+k\b', line)
print(line[mline.start():mline.end()])

newline = "keep walking"

myline = re.search(r'\bw[a-z]*$', newline)
print(newline[myline.start():myline.end()])


walk
walking

In [26]:
## Resumed on Wed/22/Jun

""" If the pattern is not preceded by r, then the 
regex engine does not try to match for the pattern.

In the second print statement, \b produces blank
output but it is essentially backspace character.
"""

print("hello-bebe")
print '\b'


hello-bebe


In [35]:
# example of {n} and {n, o}

import re

fict = "frankenstein the devil"
comic = "shaun"
event = "172936258888"
phone = "hello"

mfict = re.search(r'\w{5,7}', fict)
print(mfict.group())

mcomic = re.search(r'\w{4}', comic)
print(comic[mcomic.start():mcomic.end()])

mevent = re.search(r'\d{6,}', event)
print(mevent.group())

mphone = re.search(r'\w{,3}', phone)
print(mphone.group())


franken
shau
172936258888
hel

In [ ]: