In [ ]:
import re
import nose
# %timeit
In [ ]:
from __future__ import print_function
# Before writing the parser, collect samples of
# the interesting lines. For now just
mail_sent = 'May 31 08:00:00 test-fe1 postfix/smtp[16669]: 7CD8E730020: to=<jon@doe.it>, relay=examplemx2.doe.it[222.33.44.555]:25, delay=0.8, delays=0.17/0.01/0.43/0.19, dsn=2.0.0, status=sent(250 ok: Message 2108406157 accepted)'
mail_delivered = 'May 31 08:00:00 test-fe1 postfix/smtp[16669]: 7CD8E730020: removed'
print("I'm goint to parse the following line", mail_sent, sep="\n\n")
In [ ]:
def test_sent():
hour, host, to = parse_line(mail_sent)
assert hour == '08:00:00'
assert to == 'jon@doe.it'
In [ ]:
# Play with mail_sent
mail_sent.split()
In [ ]:
# You can number fields with enumerate.
# Remember that ipython puts the last returned value in `_`
# in our case: _ = mail_sent.split()
# which is useful in interactive mode!
fields, counting = _, enumerate(_)
print(*counting, sep="\n")
#counting = enumerate(mail_sent.split())
#for it in counting:
# print(it)
In [ ]:
# Now we can pick fields singularly...
hour, host, dest = fields[2], fields[3], fields[6]
print("Hour: {}, host: {}, dest: {}".format(hour, host, dest))
In [ ]:
test_str_1 = 'Nov 31 08:00:00 test-fe1 postfix/smtp[16669]: 7CD8E730020: to=<jon@doe.it>, relay=examplemx2.doe.it[222.33.44.555]:25, delay=0.8, delays=0.17/0.01/0.43/0.19, dsn=2.0.0, status=sent(250 ok: Message 2108406157 accepted)'
test_str_2 = 'Nov 31 08:00:00 test-fe1 postfix/smtp[16669]: 7CD8E730020: removed'
def test_sent():
hour, host, destination = parse_line(test_str_1)
assert hour == '08:00:00'
assert host == 'test-fe1'
assert destination == 'to=<jon@doe.it>,'
def test_delivered():
hour, host, destination = parse_line(test_str_2)
print(destination)
assert hour == '08:00:00'
assert host == 'test-fe1'
assert destination is None
def parse_line(line):
""" Complete the parse line function.
"""
# Hint: "you can".split()
# Hint: "<you can slice>"[1:-1] or use re.split
pass
test_sent()
test_delivered()
In [ ]:
# Python supports regular expressions via
import re
# We start showing a grep-reloaded function
def grep(expr, fpath):
one = re.compile(expr) # ...has two lookup methods...
assert ( one.match # which searches from ^ the beginning
and one.search ) # that searches $\pyver{anywhere}$
with open(fpath) as fp:
return [x for x in fp if one.search(x)]
In [ ]:
# The function seems to work as expected ;)
assert not grep(r'^localhost', '/etc/hosts')
# And some more tests
ret = grep('127.0.0.1', '/etc/hosts')
assert ret, "ret should not be empty"
print(*ret)
In [ ]:
# Splitting with re.findall
from re import findall # can be misused too;
# eg for adding the ":" to a
mac = "00""24""e8""b4""33""20"
# ...using this
re_hex = "[0-9a-fA-F]{2}"
mac_address = ':'.join(findall(re_hex, mac))
print("The mac address is ", mac_address)
# Actually this does a bit of validation, requiring all chars to be in the 0-F range
In [ ]:
# Run the following cell many times.
# Do you always get the same results?
import timeit
test_all_regexps = ("..", "[a-fA-F0-9]{2}")
for re_s in test_all_regexps:
print(timeit.timeit(stmt="':'.join(findall(re_s, mac))",
setup="from re import findall;re_s='{}';mac='{}'".format(re_s, mac)))
In [ ]:
# We can even compare compiled vs inline regexp
import re
from time import sleep
for re_s in test_all_regexps:
print(timeit.timeit(stmt="':'.join(re_c.findall(mac))",
setup="from re import findall, compile;re_c=compile('{}');mac='{}'".format(re_s, mac)))
In [ ]:
# ...or simple
print(timeit.timeit(stmt="':'.join([mac[i:i+2] for i in range(0,12,2)])",
setup="from re import findall;mac='{}'".format(mac)))
In [ ]:
#
# Use this cell for Exercise II
#
test_str_1 = 'Nov 31 08:00:00 test-fe1 postfix/smtp[16669]: 7CD8E730020: to=<jon@doe.it>, relay=examplemx2.doe.it[222.33.44.555]:25, delay=0.8, delays=0.17/0.01/0.43/0.19, dsn=2.0.0, status=sent(250 ok: Message 2108406157 accepted)'
test_str_2 = 'Nov 31 08:00:00 test-fe1 postfix/smtp[16669]: 7CD8E730020: removed'
def test_sent():
hour, host, destination = parse_line(test_str_1)
assert hour == '08:00:00'
assert host == 'test-fe1'
assert destination == 'jon@doe.it'
def test_delivered():
hour, host, destination = parse_line(test_str_2)
assert hour == '08:00:00'
assert host == 'test-fe1'
assert destination is None
def parse_line(line):
""" Complete the parse line function.
"""
# Hint: "you can".split()
# Hint: "<you can slice>"[1:-1] or use re.split
pass
test_sent()
test_delivered()