In [3]:
import re
In [1]:
#Sample Msg
sample="This is roger. my contact no is 415-555-4242,& 415-555-4243, 416-555-4244"
In [12]:
phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
#Search() return first match
mob=phoneNumRegex.search(sample)
print mob.group()
mob=phoneNumRegex.findall(sample)
In [15]:
#find all return All matches
mob=phoneNumRegex.findall(sample)
print mob
let say we want to categrise our result , like above phone number consist of area code & actual number . using group we can find it & use it easily.
In [24]:
phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
mob=phoneNumRegex.findall(sample)
print mob
#for find all our result is set of tuple & each touple consist of 2 group based on our regression
In [26]:
mo=phoneNumRegex.search(sample)
print 'area code: ',mo.group(1)
print 'phone no :',mo.group(2)
The regular expression r'Batman|Tina Fey' will match either 'Batman' or 'Tina Fey' . When both Batman and Tina Fey occur in the searched string, the first occurrence of matching text will be returned as the Match object.
In [27]:
heroRegex = re.compile (r'Batman|Tina Fey')
mo1 = heroRegex.search('Batman and Tina Fey.')
print mo1.group()
In [29]:
mo2 = heroRegex.search('Tina Fey and Batman.')
print mo2.group()
In [31]:
mo3=heroRegex.findall('Tina Fey and Batman.')
print mo3
In [34]:
batRegex = re.compile(r'Bat(man|mobile|copter|bat)')
mo = batRegex.search('Batmobile lost a wheel')
print mo.group()
In [35]:
mo = batRegex.search('Batbat lost a wheel')
print mo.group()
In [36]:
batRegex = re.compile(r'Bat(wo)?man')
#here "wo" is optional
mo1 = batRegex.search('The Adventures of Batman')
print mo1.group()
In [37]:
mo1 = batRegex.search('The Adventures of Batwoman')
print mo1.group()
The * (called the star or asterisk) means “match zero or more”—the group that precedes the star can occur any number of times in the text
In [38]:
batRegex = re.compile(r'Bat(wo)*man')
mo1 = batRegex.search('The Adventures of Batman')
print mo1.group()
In [39]:
mo1 = batRegex.search('The Adventures of Batwoman')
print mo1.group()
In [40]:
mo1 = batRegex.search('The Adventures of Batwowowoman')
print mo1.group()
Regex (Ha){3} will match the string 'HaHaHa <br > egex (Ha){3,5} will match 'HaHaHa' , 'HaHaHaHa' , and 'HaHaHaHaHa' .
In [41]:
haRegex = re.compile(r'(Ha){3}')
mo1 = haRegex.search('HaHaHa')
print mo1.group()
In [42]:
haRegex = re.compile(r'(Ha){3,5}')
mo1 = haRegex.search('HaHaHaHa')
print mo1.group()
In [ ]: