1. Manually Consuming an Iterator


In [8]:
items = [1, 2, 3]
# Get the iterator
it = iter(items) # Invokes items.__iter__()
# Run the iterator
next(it) # Invokes it.__next__()


Out[8]:
1

In [9]:
next(it)


Out[9]:
2

In [40]:
next(it)


Out[40]:
3

In [42]:
# if you uncomment this line it would throw a StopOperation exception
# next(it)

2. Delegating Iterator


In [43]:
# if you write a container class, and want to expose an iterator over an internal collection use the __iter()__ method
class Node:
    def __init__(self):
        self._children = [1,2,3]
    def __iter__(self):
        return iter(self._children)

root = Node()
for x in root:
    print(x)


1
2
3

3. Map

map applies a function to every element of a sequence and returns an iterator of elements


In [10]:
simpsons = ['homer', 'marge', 'bart']
map(len, simpsons) # returns [0, 2, 4]

#equivalent list comprehension
[len(word) for word in simpsons]


Out[10]:
[5, 5, 4]

In [11]:
map(lambda word: word[-1], simpsons) # returns ['r','e', 't']

#equivalent list comprehension
[word[-1] for word in simpsons]


Out[11]:
['r', 'e', 't']

4. Filter

filter returns an iterator containing the elements from a sequence for which a condition is True:


In [44]:
nums = range(5)
filter(lambda x: x % 2 == 0, nums) # returns [0, 2, 4]

# equivalent list comprehension
[num for num in nums if num % 2 == 0]


Out[44]:
[0, 2, 4]

5. Named Slices


In [45]:
######    0123456789012345678901234567890123456789012345678901234567890'
record = '....................100          .......513.25   ..........'

SHARES = slice(20,32)
PRICE = slice(40,48)

cost = int(record[SHARES]) * float(record[PRICE])
cost


Out[45]:
51325.0

6. zip


In [5]:
# zip() allows you to create an iterable view over a tuple created out of two separate iterable views
prices = { 'ACME' : 45.23, 'AAPL': 612.78, 'IBM': 205.55, 'HPQ' : 37.20, 'FB' : 10.75 }

min_price = min(zip(prices.values(), prices.keys()))  #(10.75, 'FB')

max((zip(prices.values(), prices.keys())))


Out[5]:
(612.78, 'AAPL')

zip can only be iterated over once!


In [6]:
prices_and_names = zip(prices.values(), prices.keys())
print(min(prices_and_names))

# running the following code would fail
#print(min(prices_and_names))


(10.75, 'FB')

In [10]:
# zip usually stops when any individual iterator ends (it iterates only until the end of the shortest sequence)

a = [1, 2, 3]
b = ['w', 'x', 'y', 'z']
for i in zip(a,b):
    print(i)


(1, 'w')
(2, 'x')
(3, 'y')

In [12]:
# use zip_longest to keep iterating through longer sequences
from itertools import zip_longest
for i in zip_longest(a,b):
    print(i)


(1, 'w')
(2, 'x')
(3, 'y')
(None, 'z')

In [13]:
# zip can run over more then 2 sequences

c = ['aaa', 'bbb', 'ccc']
for i in zip(a,b,c):
    print(i)


(1, 'w', 'aaa')
(2, 'x', 'bbb')
(3, 'y', 'ccc')

7. itemgetter


In [51]:
from operator import itemgetter

In [52]:
rows = [
{'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]

rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_fname


Out[52]:
[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004},
 {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
 {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
 {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]

In [53]:
rows_by_uid = sorted(rows, key=itemgetter('uid'))
rows_by_uid


Out[53]:
[{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
 {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
 {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
 {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]

In [22]:
# itemgetter() function can also accept multiple keys
rows_by_lfname = sorted(rows, key=itemgetter('lname','fname'))
rows_by_lfname


Out[22]:
[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
 {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
 {'fname': 'Big', 'lname': 'Jones', 'uid': 1004},
 {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]

8. attrgetter


In [23]:
from operator import attrgetter

In [24]:
#used to sort objects that dont natively support comparison
class User:
    def __init__(self, user_id):
        self.user_id = user_id
    def __repr__(self):
        return 'User({})'.format(self.user_id)
        
users = [User(23), User(3), User(99)]
users


Out[24]:
[User(23), User(3), User(99)]

In [25]:
sorted(users, key=attrgetter('user_id'))


Out[25]:
[User(3), User(23), User(99)]

In [26]:
min(users, key=attrgetter('user_id'))


Out[26]:
User(3)

9. groupby

The groupby() function works by scanning a sequence and finding sequential “runs” of identical values (or values returned by the given key function). On each iteration, it returns the value along with an iterator that produces all of the items in a group with the same value.


In [27]:
from operator import itemgetter
from itertools import groupby

In [28]:
rows = [
{'address': '5412 N CLARK', 'date': '07/01/2012'},
{'address': '5148 N CLARK', 'date': '07/04/2012'},
{'address': '5800 E 58TH', 'date': '07/02/2012'},
{'address': '2122 N CLARK', 'date': '07/03/2012'},
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
{'address': '1060 W ADDISON', 'date': '07/02/2012'},
{'address': '4801 N BROADWAY', 'date': '07/01/2012'},
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]

# important!  must sort data on key field first!
rows.sort(key=itemgetter('date'))

#iterate in groups
for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for i in items:
        print('   %s' % i)


07/01/2012
   {'date': '07/01/2012', 'address': '5412 N CLARK'}
   {'date': '07/01/2012', 'address': '4801 N BROADWAY'}
07/02/2012
   {'date': '07/02/2012', 'address': '5800 E 58TH'}
   {'date': '07/02/2012', 'address': '5645 N RAVENSWOOD'}
   {'date': '07/02/2012', 'address': '1060 W ADDISON'}
07/03/2012
   {'date': '07/03/2012', 'address': '2122 N CLARK'}
07/04/2012
   {'date': '07/04/2012', 'address': '5148 N CLARK'}
   {'date': '07/04/2012', 'address': '1039 W GRANVILLE'}

10. Generator Expressions


In [29]:
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
positives = (n for n in mylist if n > 0)

positives


Out[29]:
<generator object <genexpr> at 0x00000000062B03F0>

In [30]:
for x in positives:
    print(x)


1
4
10
2
3

In [31]:
nums = [1, 2, 3, 4, 5]
sum(x * x for x in nums)


Out[31]:
55

In [32]:
# Output a tuple as CSV
s = ('ACME', 50, 123.45)
','.join(str(x) for x in s)


Out[32]:
'ACME,50,123.45'

In [33]:
# Determine if any .py files exist in a directory
import os
files = os.listdir('.')
if any(name.endswith('.py') for name in files):
    print('There be python!')
else:
    print('Sorry, no python.')


Sorry, no python.

In [34]:
# Data reduction across fields of a data structure
portfolio = [
{'name':'GOOG', 'shares': 50},
{'name':'YHOO', 'shares': 75},
{'name':'AOL', 'shares': 20},
{'name':'SCOX', 'shares': 65}
]
min(s['shares'] for s in portfolio)


Out[34]:
20

In [35]:
s = sum((x * x for x in nums)) # Pass generator-expr as argument
s = sum(x * x for x in nums) # More elegant syntax
s


Out[35]:
55

11. compress

itertools.compress() takes an iterable and an accompanying Boolean selector sequence as input. As output, it gives you all of the items in the iterable where the corresponding element in the selector is True.


In [36]:
from itertools import compress

In [37]:
addresses = [
'5412 N CLARK',
'5148 N CLARK',
'5800 E 58TH',
'2122 N CLARK'
'5645 N RAVENSWOOD',
'1060 W ADDISON',
'4801 N BROADWAY',
'1039 W GRANVILLE',
]
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]

In [38]:
more5 = [n > 5 for n in counts]
more5


Out[38]:
[False, False, True, False, False, True, True, False]

In [54]:
list(compress(addresses, more5))


Out[54]:
['5800 E 58TH', '4801 N BROADWAY', '1039 W GRANVILLE']

12. reversed


In [55]:
#iterates in reverse
a = [1, 2, 3, 4]
for x in reversed(a):
    print(x)


4
3
2
1

In [58]:
#you can customize the behavior of reversed for your class by implementing __reversed()__ method
class Counter:
    def __init__(self, start):
        self.start = start
    # Forward iterator
    def __iter__(self):
        n = 1
        while n <= self.start:
            yield n
            n += 1
    # Reverse iterator
    def __reversed__(self):
        n = self.start
        while n > 0:
            yield n
            n -= 1

foo = Counter(5)
for x in reversed(foo):
    print(x)


5
4
3
2
1

13. Generators with State


In [64]:
# To expose state available at each step of iteration, use a classs that implements __iter__()
class countingiterator:
    def __init__(self, items):
        self.items=items
    def __iter__(self):
        self.clear_count()
        for item in self.items:
            self.count+=1
            yield item
    def clear_count(self):
        self.count=0

foo = countingiterator(["aaa","bbb","ccc"])

for i in foo:
    print("{}:{}".format(foo.count, i))


1:aaa
2:bbb
3:ccc

14. islice and dropwhile


In [67]:
# itertools.islice allows slicing of iterators
def count(n):
    while True:
        yield n
        n += 1

c=count(0)
#the next line would fail
# c[10:20]
import itertools
for x in itertools.islice(c,10,15):
    print(x)


10
11
12
13
14

In [68]:
c=count(0)
for x in itertools.islice(c, 10, 15, 2):
    print(x)


10
12
14

In [82]:
# if you don't know how many to skip, but can define a skip condition, use dropwhile()
from itertools import dropwhile
foo = ['#','#','#','#','aaa','bbb','#','ccc']
def getstrings(f):
    for i in f:
        yield i

for ch in dropwhile(lambda ch: ch.startswith('#'), getstrings(foo)):
    print(ch)


aaa
bbb
#
ccc

15. Permutations and Combinations of Elements


In [83]:
from itertools import permutations

In [85]:
items = ['a', 'b', 'c']

for p in permutations(items):
    print(p)


('a', 'b', 'c')
('a', 'c', 'b')
('b', 'a', 'c')
('b', 'c', 'a')
('c', 'a', 'b')
('c', 'b', 'a')

In [86]:
# for smaller subset permutations
for p in permutations(items,2):
    print(p)


('a', 'b')
('a', 'c')
('b', 'a')
('b', 'c')
('c', 'a')
('c', 'b')

In [87]:
# itertools.combinations ignores element order in creating unique sets
from itertools import combinations
for c in combinations(items, 3):
    print(c)


('a', 'b', 'c')

In [88]:
for c in combinations(items, 2):
    print(c)


('a', 'b')
('a', 'c')
('b', 'c')

In [90]:
# itertools.combinations_with_replacement() will not remove an item from the list of possible candidates after it is chosen 
# in other words, the same value can occur more then once
from itertools import combinations_with_replacement

for c in combinations_with_replacement(items, 3):
    print(c)


('a', 'a', 'a')
('a', 'a', 'b')
('a', 'a', 'c')
('a', 'b', 'b')
('a', 'b', 'c')
('a', 'c', 'c')
('b', 'b', 'b')
('b', 'b', 'c')
('b', 'c', 'c')
('c', 'c', 'c')

16. Iterating with Indexes


In [1]:
# enumerate returns the iterated item and an index
my_list = ['a', 'b', 'c']
for idx, val in enumerate(my_list):
    print(idx, val)


0 a
1 b
2 c

In [3]:
# pass a starting index to enumerate
for idx, val in enumerate(my_list, 7):
    print(idx, val)


7 a
8 b
9 c

17. chain


In [15]:
# chain iterates over several sequences, one after the other
# making them look like one long sequence

from itertools import chain
a = [1, 2]
b = ['x', 'y', 'z']
for x in chain(a, b):
    print(x)


1
2
x
y
z

18. Flatten a Nested Sequence


In [17]:
# you want to traverse a sequence with nested sub sequences as one big sequence
from collections import Iterable

def flatten(items, ignore_types=(str, bytes)):
    for x in items:
        if isinstance(x, Iterable) and not isinstance(x, ignore_types): # ignore types treats iterable string/bytes as simple values
            yield from flatten(x)
        else:
            yield x

items = [1, 2, [3, 4, [5, 6], 7], 8]

for x in flatten(items):
    print(x)


1
2
3
4
5
6
7
8

19. Merging Presorted Iterables


In [18]:
import heapq
a = [1, 4, 7]
b = [2, 5, 6]
for c in heapq.merge(a, b):
    print(c)


1
2
4
5
6
7