In [ ]:
import re
import nose
# %timeit
In [ ]:
from __future__ import print_function
# Before writing the parser, collect samples of
# the interesting lines. For now just
from course import mail_sent, mail_delivered
print("I'm goint to parse the following line", mail_sent, sep="\n\n")
In [ ]:
# and %edit a simple
def test_sent():
hour, host, to = parse_line(mail_sent)
assert hour == '08:00:00'
assert to == 'jon@doe.it'
In [ ]:
# Play with mail_sent and start using basic strings in ipython
mail_sent.split()
In [ ]:
# You can number fields with enumerate.
# Remember that ipython puts the last returned value in `_`
# which is useful in interactive mode!
fields, counting = _, enumerate(_)
print(*counting, sep="\n")
In [ ]:
# Now we can pick fields singularly...
hour, host, dest = fields[2], fields[3], fields[6]
In [ ]:
# ... or with
from operator import itemgetter
which_returns_a_function = itemgetter(2, 3, 6)
assert (hour, host, dest) == which_returns_a_function(fields)
In [ ]:
# %load ../scripts/03_parsing_test.py
""" Python for System Administrators
Roberto Polli <rpolli@babel.it>
This file shows how to parse a postfix maillog file.
maillog traces every incoming and outgoing email using
different line formats.
"""
#
# Before writing the parser we collect the
# interesting lines to use as a sample
# For now we're just interested in the following cases
# 1- a mail is sent
# 2- a mail is delivered
test_str_1 = 'Nov 31 08:00:00 test-fe1 postfix/smtp[16669]: 7CD8E730020: to=<jon@doe.it>, relay=examplemx2.doe.it[222.33.44.555]:25, delay=0.8, delays=0.17/0.01/0.43/0.19, dsn=2.0.0, status=sent(250 ok: Message 2108406157 accepted)'
test_str_2 = 'Nov 31 08:00:00 test-fe1 postfix/smtp[16669]: 7CD8E730020: removed'
def test_sent():
hour, host, destination = parse_line(test_str_1)
assert hour == '08:00:00'
assert host == 'test-fe1'
assert destination == 'jon@doe.it'
def test_delivered():
hour, host, destination = parse_line(test_str_2)
assert hour == '08:00:00'
assert host == 'test-fe1'
assert destination is None
def parse_line(line):
""" Complete the parse line function.
Without watching the solution: ICAgIGltcG9ydCByZQogICAgXywgXywgaG91ciwgaG9zdCwgXywgXywgZGVzdCA9IGxpbmUuc3BsaXQoKVs6N10KICAgIHRyeToKICAgICAgICBkZXN0ID0gcmUuc3BsaXQocidbPD5dJywgZGVzdClbMV0KICAgIGV4Y2VwdDoKICAgICAgICBkZXN0ID0gTm9uZQogICAgcmV0dXJuIChob3VyLCBob3N0LCBkZXN0KQoK"""
# Hint: "you can".split()
# Hint: "<you can slice>"[1:-1] or use re.split
raise NotImplementedError("Write me!")
In [ ]:
#
# Run test
#
test_sent()
In [ ]:
# Don't look at the solution ;)
%load course/parse_line.py
In [ ]:
# Python supports regular expressions via
import re
# We start showing a grep-reloaded function
def grep(expr, fpath):
one = re.compile(expr) # ...has two lookup methods...
assert ( one.match # which searches from ^ the beginning
and one.search ) # that searches $\pyver{anywhere}$
with open(fpath) as fp:
return [x for x in fp if one.search(x)]
In [ ]:
# The function seems to work as expected ;)
assert not grep(r'^localhost', '/etc/hosts')
# And some more tests
ret = grep('127.0.0.1', '/etc/hosts')
assert ret, "ret should not be empty"
print(*ret)
In [ ]:
from re import split # is a very nice function
import sys
from course import sh
# Let's gather some ping stats
if sys.platform.startswith('win'):
cmd = "ping -n3 www.google.it"
else:
cmd = "ping -c3 -w3 www.google.it"
# Split for both space and =
ping_output = [split("[ =]", x) for x in sh(cmd)]
print(*ping_output, sep="\n")
In [ ]:
# Splitting with re.findall
from re import findall # can be misused too;
# eg for adding the ":" to a
mac = "00""24""e8""b4""33""20"
# ...using this
re_hex = "[0-9a-fA-F]{2}"
mac_address = ':'.join(findall(re_hex, mac))
print("The mac address is ", mac_address)
# Actually this does a bit of validation, requiring all chars to be in the 0-F range
In [ ]:
# Run the following cell many times.
# Do you always get the same results?
test_all_regexps = ("..", "[a-fA-F0-9]{2}")
for re_s in test_all_regexps:
%timeit ':'.join(findall(re_s, mac))
In [ ]:
# We can even compare compiled vs inline regexp
import re
from time import sleep
for re_s in test_all_regexps:
re_c = re.compile(re_s)
%timeit ':'.join(re_c.findall(mac))
In [ ]:
# Or find other methods:
# complex...
from re import sub as sed
%timeit sed(r'(..)', r'\1:', mac)
In [ ]:
# ...or simple
%timeit ':'.join([mac[i:i+2] for i in range(0,12,2)])
#Outside iPython check the timeit module
# Execise: which is the fastest method? Why?
In [ ]:
# Don't need to type this VSAN configuration script
# which uses linux FC information from /sys filesystem
from glob import glob
fc_id_path = "/sys/class/fc_host/host*/port_name"
for x in glob(fc_id_path):
# ...we boldly skip an explicit close()
pwwn = open(x).read() # 0x500143802427e66c
pwwn = pwwn[2:]
# ...and even use the slower but readable
pwwn = re.findall(r'..', pwwn)
print("member pwwn ", ':'.join(pwwn))
In [ ]:
In [ ]:
#
# Use this cell for Exercise II
#
test_delivered()