What makes Python so awesome?


In [1]:
# import css and load slide show utilities
%run talktools

# inline matplotlib plot
%matplotlib inline


Data Type

In [2]:
# Boolean
True
False

# Number
1, 2, 3        # int
1.1, 1.2, 1.3  # float
1 + 1j         # complex number

# String
'hello world', "hello world"
r'\d+'

# Bytes
b'hello world'

# None
None
Data Structure

In [3]:
[1, 2, 3]         # list

('apple', 10)     # tuple

{1, 2, 3}         # set

{'apple': 10, 'boy': 5, 'car': 3}  # dict


Out[3]:
{'apple': 10, 'car': 3, 'boy': 5}
List

In [4]:
numbers = [0, 1, 2, 3, 4]

In [5]:
numbers[1]


Out[5]:
1

In [6]:
numbers[1:3]


Out[6]:
[1, 2]

In [7]:
numbers[:2]


Out[7]:
[0, 1]

In [8]:
numbers[2:]


Out[8]:
[2, 3, 4]

In [9]:
numbers[::2]


Out[9]:
[0, 2, 4]

In [10]:
numbers[-1]


Out[10]:
4

In [11]:
numbers[::-1]


Out[11]:
[4, 3, 2, 1, 0]

In [12]:
numbers.append(5)
numbers


Out[12]:
[0, 1, 2, 3, 4, 5]
Dict

In [13]:
strlen = {'apple': 5, 'boy': 3, 'car': 3}

In [14]:
strlen['apple']


Out[14]:
5

In [15]:
strlen.get('dog', 'default')


Out[15]:
'default'

In [16]:
strlen['dog'] = 3
strlen


Out[16]:
{'apple': 5, 'car': 3, 'boy': 3, 'dog': 3}
Operators

In [17]:
1 + 1 - 1 * 1 / 1


Out[17]:
1.0

In [18]:
10 // 3


Out[18]:
3

In [19]:
10 % 3


Out[19]:
1

In [20]:
2 ** 100


Out[20]:
1267650600228229401496703205376

In [21]:
(1 + 5j) * (1 - 5j)


Out[21]:
(26+0j)

In [22]:
'hello ' + 'world'


Out[22]:
'hello world'

In [23]:
'Hello' * 10


Out[23]:
'HelloHelloHelloHelloHelloHelloHelloHelloHelloHello'

In [24]:
1 == 1


Out[24]:
True

In [25]:
1 != 2


Out[25]:
True

In [26]:
1 > 2


Out[26]:
False

In [27]:
1 >= 2


Out[27]:
False

In [28]:
1 < 2


Out[28]:
True

In [29]:
1 <= 2


Out[29]:
True

In [30]:
not True


Out[30]:
False

In [31]:
True and False


Out[31]:
False

In [32]:
True or False


Out[32]:
True
Control Flow

In [33]:
x = 1

if x == 1:
    print('Hello')


Hello

In [34]:
x = 2

if x == 1:
    print('1')
elif x == 2:
    print('2')
else:
    print('3')


2
Iterator Protocol

Example

Print ['apple', 'boy', 'cat', 'dog', 'egg'] as

1. apple
2. boy
3. cat
4. dog
5. egg

C / C++

for(int i=0; i<words_len; i++) {
    char[] word = words[i];
    printf("%d. %s\n", i + 1, word);
}

Java

int i = 1;
for(String word : words) {
    System.out.println((i + 1) + ". " + word);
    i++;
}

Python


In [35]:
for i in range(5):
    print(i)


0
1
2
3
4

In [36]:
words = ['apple', 'boy', 'cat', 'dog', 'egg']

In [37]:
for word in words:
    print(word)


apple
boy
cat
dog
egg

In [38]:
for i, word in enumerate(words, 1):
    print("{}. {}".format(i, word))


1. apple
2. boy
3. cat
4. dog
5. egg

Iterables

  • list, set, dict, collections
  • string
  • file, csv reader

In [39]:
!cat data/words.txt


apple
boy
cat
dog
egg

In [40]:
words = open('data/words.txt')

for i, word in enumerate(words, 1):
    print('{}. {}'.format(i, word.strip()))

words.close()


1. apple
2. boy
3. cat
4. dog
5. egg

In [41]:
'this has a line break\n'.strip()


Out[41]:
'this has a line break'

Iterator Consumers

  • for-loop
  • sum, min, max
  • sorted, reverse
  • map, filter, reduce

In [42]:
max([1, 2, 3, 4, 5])


Out[42]:
5

In [43]:
sum([2, 4, 6, 8, 10])


Out[43]:
30

In [44]:
sorted([3, 2, 7, 4, 6])


Out[44]:
[2, 3, 4, 6, 7]

In [45]:
words = ['apple\n', 'boy\n', 'cat\n', 'dog\n', 'egg\n']

In [46]:
stripped_words = map(str.strip, words)
list(stripped_words)


Out[46]:
['apple', 'boy', 'cat', 'dog', 'egg']

In [47]:
def has_a(word):
    return 'a' in word

stripped_a_words = map(str.strip, filter(has_a, words))
list(stripped_a_words)


Out[47]:
['apple', 'cat']

In [48]:
stripped_a_words = map(str.strip, filter(lambda w: 'a' in w, words))
list(stripped_a_words)


Out[48]:
['apple', 'cat']
For Comphension

map(str.strip, filter(lambda w: 'a' in w, words))


In [49]:
words = ['apple\n', 'boy\n', 'cat\n', 'dog\n', 'egg\n']

In [50]:
[w.strip() for w in words]


Out[50]:
['apple', 'boy', 'cat', 'dog', 'egg']

In [51]:
[w.strip() for w in words if 'a' in w]


Out[51]:
['apple', 'cat']

In [52]:
words_has_a = (w for w in words if 'a' in w)
[w.strip() for w in words_has_a]


Out[52]:
['apple', 'cat']

In [53]:
{w.strip() for w in words}


Out[53]:
{'apple', 'boy', 'cat', 'dog', 'egg'}

In [54]:
{w[0]: w.strip() for w in words}


Out[54]:
{'d': 'dog', 'e': 'egg', 'b': 'boy', 'c': 'cat', 'a': 'apple'}
Task

Round off the numbers in data/numbers.txt, then count the number of dinstinct intergers.


In [55]:
!head data/numbers.txt


91.95406798520374
87.48009624714858
99.50775591380737
98.67355609510342
105.70202974438588
87.75808656704325
91.69285475944871
102.9219158173364
118.26951038511679
108.47910987867324

Things that may useful

  • float(s): str -> float
  • round(f): round off float
  • len(l): length of a itertable

In [56]:
numbers = open('data/numbers.txt')
floats = (float(line) for line in numbers)
ints = {round(value) for value in floats}
size = len(ints)

numbers.close()

size


Out[56]:
70
Property

Java

public class Cat {
    private String name;

    public Cat(String name) {
        this.name = name;
    }

    publc String getName() {
        return name;
    }

    publc void setName(String name) {
        this.name = name;
    }
}


In [57]:
class Cat:
    def __init__(self, name):
        self.name = name

In [58]:
my_cat = Cat('Rain')
my_cat.name


Out[58]:
'Rain'

In [59]:
my_cat.name = 'Rainbow'
my_cat.name


Out[59]:
'Rainbow'

In [60]:
class Cat:
    """Schrödinger's Cat Store, Ltd"""

    def __init__(self, name):
        self._name = name
    
    @property
    def name(self):
        return self._name
    
    @name.setter
    def name(self, name):
        if any(char.isdigit() for char in name):
            raise ValueError(name + ' contains number')
        self._name = name

In [61]:
my_cat = Cat('Rain')
my_cat.name


Out[61]:
'Rain'

In [62]:
my_cat.name = 'Rainbow'
my_cat.name


Out[62]:
'Rainbow'

In [63]:
my_cat.name = '9upper'


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-63-29674cd4f56c> in <module>()
----> 1 my_cat.name = '9upper'

<ipython-input-60-d558a5c0e2eb> in name(self, name)
     12     def name(self, name):
     13         if any(char.isdigit() for char in name):
---> 14             raise ValueError(name + ' contains number')
     15         self._name = name

ValueError: 9upper contains number
Multiple Inheritance

In [ ]:
class Pet:    
    def __init__(self, name):
        self._name = name
    
    @property
    def name(self):
        return self._name
    
    @name.setter
    def name(self, name):
        if any(char.isdigit() for char in name):
            raise ValueError(name + ' contains number')
        self._name = name

        
class Cat(Pet):
    def meows(self):
        print('{}: mew-mew'.format(self.name))


class Dog(Pet):
    def barks(self):
        print('{}: bow-wow'.format(self.name))

You are selling:

  • cat
  • dog
  • flying cat
  • flying dog

How do you refactor the library to add flying cat and flying dog? If you use Java...


In [ ]:
class FlyMixin:
    def fly(self):
        print(self.name + ' is flying')

class FlyingCat(FlyMixin, Cat):
    pass


class FlyingDog(FlyMixin, Dog):
    pass


FlyingCat('SuperCat').fly()
Decorator

Still remember @property?

It is not magic


In [64]:
def hello_world():
    return print('hello world')

hello_world()


hello world

In [65]:
for _ in range(3):
    hello_world()


hello world
hello world
hello world

In [66]:
def repeat_3_times(func):

    def repeated(*args, **kwargs):
        for _ in range(3):
            func(*args, **kwargs)
            
    return repeated


@repeat_3_times
def hello_world():
    return print('hello world')

hello_world()


hello world
hello world
hello world

In [67]:
def repeat(n):

    def repeat_decorator(func):
        
        def repeated(*args, **kwargs):
            for _ in range(n):
                func(*args, **kwargs)
                
        return repeated
    
    return repeat_decorator



@repeat(5)
def hello_world():
    return print('hello world')

hello_world()


hello world
hello world
hello world
hello world
hello world
Task

Create a decorator that log how long the function takes?

>>> @time_it(10000)
>>> def heavy_computation():
>>>    return 1+1

>>> heavy_computation()

heavy_computation ran 10000 times, total: 1.556ms, avg: 0.0001556ms



Things that maybe useful

  • time.time()
  • func.__name__

In [68]:
import time

def time_it(n): 

    def time_it_n(func):
        fname = func.__name__

        def timed(*args, **kwargs):
            start = time.time()
            for _ in range(n):
                func(*args, **kwargs)
            end = time.time()

            total = (end - start)  * 1000
            avg = total / n
            
            print('{} ran {} times, total: {:.4}ms, avg: {:.4}ms'.format(fname, n, total, avg))

        return timed    

    return time_it_n


@time_it(10000)
def heavy_computation():
    return 1+1

heavy_computation()


heavy_computation ran 10000 times, total: 2.108ms, avg: 0.0002108ms
Context Manager

In [69]:
with open('data/words.txt') as f:
    for line in f:
        print(line.strip())


apple
boy
cat
dog
egg

Java 7

interface Closable {
    public void close() throws IOException;
}

Python

class ContextManager:

    def __enter__(self):
        ...
        return ...

    def __exit__(self, type, value, traceback):
        ...

In [70]:
import time

class TimeLogger:
   
    def __enter__(self):
        self.start = time.time()
        return self

    def __exit__(self, type, value, traceback):
        timespan = time.time() - self.start
        print('Complete in {}ms'.format(timespan * 1000))

with TimeLogger():
    1 + 1


Complete in 0.002384185791015625ms
Generator

In [71]:
def natural_number():
    n = 0
    while True:
        yield n
        n += 1

        
nums = natural_number()

for _ in range(5):
    print(next(nums))


0
1
2
3
4

In [72]:
def group(items, group_size=5, sep='|'):
    for i, item in enumerate(items):
        if i % group_size == 0:
            yield sep
            
        yield item

''.join(group('abcdefghijklmnopqrstuvwxyz'))


Out[72]:
'|abcde|fghij|klmno|pqrst|uvwxy|z'

In [73]:
def chain(*iterables):
    for iterable in iterables:
        yield from iterable

list(chain([1, 2, 3, 4], [5, 6, 7, 8]))


Out[73]:
[1, 2, 3, 4, 5, 6, 7, 8]
Task

Count number of request of each IP in all the log files under data/www folder

List the 10 most frequent IP and their count


In [74]:
ls -R data/www


data/www:
bar/  foo/

data/www/bar:
access-log*  access-log-0108.bz2*  access-log-0208.bz2*

data/www/foo:
access-log*  access-log-0108.gz*  access-log-0208.gz*

In [75]:
!head data/www/bar/access-log


140.180.132.213 - - [24/Feb/2008:00:08:59 -0600] "GET /ply/ply.html HTTP/1.1" 200 97238
140.180.132.213 - - [24/Feb/2008:00:08:59 -0600] "GET /favicon.ico HTTP/1.1" 404 133
75.54.118.139 - - [24/Feb/2008:00:15:40 -0600] "GET / HTTP/1.1" 200 4447
75.54.118.139 - - [24/Feb/2008:00:15:41 -0600] "GET /images/Davetubes.jpg HTTP/1.1" 200 60025
75.54.118.139 - - [24/Feb/2008:00:15:42 -0600] "GET /favicon.ico HTTP/1.1" 404 133
75.54.118.139 - - [24/Feb/2008:00:15:49 -0600] "GET /software.html HTTP/1.1" 200 3163
75.54.118.139 - - [24/Feb/2008:00:16:10 -0600] "GET /ply/index.html HTTP/1.1" 200 8018
75.54.118.139 - - [24/Feb/2008:00:16:11 -0600] "GET /ply/bookplug.gif HTTP/1.1" 200 23903
213.145.165.82 - - [24/Feb/2008:00:16:19 -0600] "GET /ply/ HTTP/1.1" 200 8018
128.143.38.83 - - [24/Feb/2008:00:31:39 -0600] "GET /favicon.ico HTTP/1.1" 404 133

Things that maybe useful

  • os.path
  • gzip
  • bz2
  • collections.Counter

In [76]:
import os
import gzip
import bz2
from collections import Counter


def gen_find(root):
    for path, dirlist, filelist in os.walk(root):
        for name in filelist:
            yield os.path.join(path, name)
            
def gen_string(bytes):
    for line in bytes:
        yield line.decode('ascii')

def gen_open(filenames):
    for name in filenames:
        if name.endswith(".gz"):
            yield gen_string(gzip.open(name))
        elif name.endswith(".bz2"):
             yield gen_string(bz2.open(name))
        else:
             yield open(name)
                        
def gen_cat(sources):
    for source in sources:
        yield from source
            
def gen_ip(logfiles):
    for line in log:
        ip, _ = line.split(' ', 1)
        yield ip


logpaths = gen_find('data/www')
logfiles = gen_open(logpaths)
log = gen_cat(logfiles)
ips = gen_ip(log)
ipcount = Counter(ips)
first_ten = ipcount.most_common(10)
first_ten


Out[76]:
[('38.98.120.84', 3354),
 ('129.192.97.6', 774),
 ('66.249.65.37', 390),
 ('201.236.226.90', 366),
 ('67.228.115.170', 294),
 ('24.10.16.193', 282),
 ('128.135.11.245', 264),
 ('71.57.91.136', 258),
 ('67.186.98.20', 246),
 ('189.70.147.197', 240)]

In [77]:
import os
import gzip
import bz2
from collections import Counter
from functools import wraps


def foreach(func):
    @wraps(func)
    def wrapped(iterable):
        for elem in iterable:
            yield from func(elem)

    return wrapped


def gen_find(root):
    for path, dirlist, filelist in os.walk(root):
        for name in filelist:
            yield os.path.join(path, name)

@foreach            
def gen_string(line):
    yield line.decode('ascii')


@foreach
def gen_open(filename):
    if filename.endswith(".gz"):
        yield gen_string(gzip.open(filename))
    elif filename.endswith(".bz2"):
         yield gen_string(bz2.open(filename))
    else:
         yield open(filename)


@foreach
def gen_cat(source):
    yield from source


@foreach
def gen_ip(line):
    ip, _ = line.split(' ', 1)
    yield ip


logpaths = gen_find('data/www')
logfiles = gen_open(logpaths)
log = gen_cat(logfiles)
ips = gen_ip(log)
ipcount = Counter(ips)
first_ten = ipcount.most_common(10)
first_ten


Out[77]:
[('38.98.120.84', 3354),
 ('129.192.97.6', 774),
 ('66.249.65.37', 390),
 ('201.236.226.90', 366),
 ('67.228.115.170', 294),
 ('24.10.16.193', 282),
 ('128.135.11.245', 264),
 ('71.57.91.136', 258),
 ('67.186.98.20', 246),
 ('189.70.147.197', 240)]

In [78]:
import numpy as np
import matplotlib.pyplot as plt

ip, freq = zip(*first_ten)

plt.xkcd()
fig, ax = plt.subplots()

pos = np.arange(9, -1, -1) + .5
ax.barh(pos, freq, align='center', height=0.5)
plt.yticks(pos, ip)

ax.set_xlabel('Frequency')
ax.set_ylabel('IP')
ax.set_title('Top 10 IP')
ax.grid(True)
plt.show()


Co-routine / Reverse Generator

In [79]:
def print_if_divided_by(n):
    while True:
        value = yield
        if value % n == 0:
            print(value)

even_printer = print_if_divided_by(2)
next(even_printer)

for num in range(10):
    even_printer.send(num)


0
2
4
6
8

In [80]:
def consumer(gen_func):
    def consumer_func(*args, **kwargs):
        generator = gen_func(*args, **kwargs)
        next(generator)
        return generator
    return consumer_func

@consumer
def print_sum():
    sum = 0
    while True:
        value = yield
        sum += value
        print('Current sum:', sum)

@consumer
def print_product():
    product = 1
    while True:
        value = yield
        product *= value
        print('Current product:', product)

@consumer
def boardcast(*consumers):
    while True:
        value = yield
        for consumer in consumers:
            consumer.send(value)

receiver = boardcast(print_sum(), print_product())
for num in range(1, 6):
    receiver.send(num)


Current sum: 1
Current product: 1
Current sum: 3
Current product: 2
Current sum: 6
Current product: 6
Current sum: 10
Current product: 24
Current sum: 15
Current product: 120

In [81]:
from contextlib import contextmanager

@contextmanager
def tag(name):
    print('<{}>'.format(name))
    yield
    print('</{}>'.format(name))

with tag('p'):
    print('text')


<p>
text
</p>
Co-routine / Two-way Generator

Javascript Callback hell

exports.processJob = function(options, next) {
  db.getUser(options.userId, function(err, user) {
    if (err) return next(err);        
    db.updateAccount(user.accountId, options.total, function(err) {
      if (err) return next(err);          
      http.post(options.url, function(err) {
        if (err) return next(err);            
        next();
      });
    }); 
  });
};


In [82]:
def process_job(options, next):
    try:
        user = yield from db.get_user(options.user_id)
        yield from db.update_account(user.account_id, options.total)
        yield from http.post(optional.url)
        next()
    except Exception as e:
        next(error)

In [83]:
import asyncio
from datetime import datetime


def tick(loop):
    print('{:%H:%M:%S.%f}    tick'.format(datetime.now()))
    loop.call_later(0.5, tick, loop)


@asyncio.coroutine
def sleep(t, log):    
    print('{:%H:%M:%S.%f}    {} go to sleep'.format(datetime.now(), log))
    
    yield from asyncio.sleep(t)
    
    print('{:%H:%M:%S.%f}    {} woke up'.format(datetime.now(), log))
    
    return


@asyncio.coroutine
def get_user(user_id):
    yield from sleep(1, 'get_user')
    return 'kenny2tam'


@asyncio.coroutine
def update_account(account_id, total):
    yield from sleep(2, 'update_account')
    print('{:%H:%M:%S.%f}    Account of {} is updated to {}'.format(datetime.now(), account_id, total))


@asyncio.coroutine
def post(url):
    yield from sleep(3, 'post')
    print('{:%H:%M:%S.%f}    Posted to {}'.format(datetime.now(), url))


@asyncio.coroutine
def process_job(options):
    user = yield from get_user(options['user_id'])
    yield from update_account(user, options['total'])
    yield from post(options['url'])


loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.call_soon(tick, loop)
loop.run_until_complete(process_job({'user_id': 1, 'total': 10, 'url': 'fb.com/king.of.logic'}))
loop.close()


12:21:04.110419    tick
12:21:04.110560    get_user go to sleep
12:21:04.611320    tick
12:21:05.112149    get_user woke up
12:21:05.112713    update_account go to sleep
12:21:05.112861    tick
12:21:05.613382    tick
12:21:06.115081    tick
12:21:06.616739    tick
12:21:07.114133    update_account woke up
12:21:07.115315    Account of kenny2tam is updated to 10
12:21:07.115443    post go to sleep
12:21:07.117816    tick
12:21:07.618746    tick
12:21:08.120326    tick
12:21:08.621718    tick
12:21:09.123500    tick
12:21:09.624705    tick
12:21:10.117108    post woke up
12:21:10.118007    Posted to fb.com/king.of.logic

In [84]:
import this


The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!
Summary
  • Iterator Protocol
  • For Comphension
  • Property
  • Multiple Inheritance
  • Decorator
  • Context Manager
  • Generator
  • Reverse Generator
  • Two-way Generator