1 Merging and Splitting Iterators

Take several iterators as arguments and returns a single iterator


In [2]:
from itertools import chain
for i in chain([1,2,3], ['a', 'b', 'c']):
    print(i, end=' ')


1 2 3 a b c 

In [3]:
from itertools import *
def make_iterables_to_chain():
    yield [1, 2, 3]
    yield ['a', 'b', 'c']

for i in chain.from_iterable(make_iterables_to_chain()):
    print(i, end=' ')
print()


1 2 3 a b c 

use zip_longest to zip a tuple


In [4]:
from itertools import *
r1 = range(3)
r2 = range(2)
for r12 in zip(r1,r2):
    print(r12)
print()


(0, 0)
(1, 1)


In [5]:
print(list(zip_longest(r1,r2)))


[(0, 0), (1, 1), (2, None)]

In [6]:
print(list(zip_longest(r1,r2, fillvalue='a')))


[(0, 0), (1, 1), (2, 'a')]

islice returns selcted item by index


In [9]:
from itertools import * 
print('Stop at 5')
for i in islice(range(100), 5):
    print(i, end =' ')
print('\n')

print('start at 5, and stop at 10')
for i in islice(range(100), 5, 10):
    print(i, end=' ')
print('\n')

print('by ten to 100')
for i in islice(range(100), 0,100, 10):
    print(i, end=' ')
print('\n')


Stop at 5
0 1 2 3 4 

start at 5, and stop at 10
5 6 7 8 9 

by ten to 100
0 10 20 30 40 50 60 70 80 90 

tee returns several independent iterators(default to 2)


In [13]:
from itertools import *
r = islice(count(), 5)
r1,r2 = tee(r)
print('r1', list(r1))
print('r2', list(r2))


r1 [0, 1, 2, 3, 4]
r2 [0, 1, 2, 3, 4]

2 Converting Inputs


In [16]:
from itertools import *

values = [(0, 5), (1, 6), (2, 7), (3, 8), (4, 9)]

for i in starmap(lambda x, y: (x, y, x * y), values):
    print('{} * {} = {}'.format(*i))


0 * 5 = 0
1 * 6 = 6
2 * 7 = 14
3 * 8 = 24
4 * 9 = 36

3 Producing New Values


In [18]:
from itertools import *

for i in zip(count(1), ['a', 'b', 'c']):
    print(i)


(1, 'a')
(2, 'b')
(3, 'c')

count() take start and step arguments


In [19]:
import fractions
from itertools import *

start = fractions.Fraction(1, 3)
step = fractions.Fraction(1, 3)

for i in zip(count(start, step), ['a', 'b', 'c']):
    print('{}: {}'.format(*i))


1/3: a
2/3: b
1: c

In [20]:
from itertools import *

for i in zip(range(7), cycle(['a', 'b', 'c'])):
    print(i)


(0, 'a')
(1, 'b')
(2, 'c')
(3, 'a')
(4, 'b')
(5, 'c')
(6, 'a')

In [21]:
from itertools import *

for i in repeat('over-and-over', 5):
    print(i)


over-and-over
over-and-over
over-and-over
over-and-over
over-and-over

4 Filtering


In [22]:
from itertools import *


def should_drop(x):
    print('Testing:', x)
    return x < 1


for i in dropwhile(should_drop, [-1, 0, 1, 2, -2]):
    print('Yielding:', i)


Testing: -1
Testing: 0
Testing: 1
Yielding: 1
Yielding: 2
Yielding: -2

dropwhile() does not filter every item of the input; after the condition is false the first time, all of the remaining items in the input are returned.


In [23]:
from itertools import *


def should_take(x):
    print('Testing:', x)
    return x < 2


for i in takewhile(should_take, [-1, 0, 1, 2, -2]):
    print('Yielding:', i)


Testing: -1
Yielding: -1
Testing: 0
Yielding: 0
Testing: 1
Yielding: 1
Testing: 2

As soon as should_take() returns False, takewhile() stops processing the input.

compress() offers another way to filter the contents of an iterable. Instead of calling a function, it uses the values in another iterable to indicate when to accept a value and when to ignore it.


In [24]:
from itertools import *

every_third = cycle([False, False, True])
data = range(1, 10)

for i in compress(data, every_third):
    print(i, end=' ')
print()


3 6 9 

5 Group Data


In [26]:
import functools
from itertools import *
import operator
import pprint


@functools.total_ordering
class Point:

    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __repr__(self):
        return '({}, {})'.format(self.x, self.y)

    def __eq__(self, other):
        return (self.x, self.y) == (other.x, other.y)

    def __gt__(self, other):
        return (self.x, self.y) > (other.x, other.y)


# Create a dataset of Point instances
data = list(map(Point,
                cycle(islice(count(), 3)),
                islice(count(), 7)))
print('Data:')
pprint.pprint(data, width=35)
print()


Data:
[(0, 0),
 (1, 1),
 (2, 2),
 (0, 3),
 (1, 4),
 (2, 5),
 (0, 6)]


In [28]:
print('Grouped, unsorted:')
for k, g in groupby(data, operator.attrgetter('x')):
    print(k, list(g))
print()


Grouped, unsorted:
0 [(0, 0)]
1 [(1, 1)]
2 [(2, 2)]
0 [(0, 3)]
1 [(1, 4)]
2 [(2, 5)]
0 [(0, 6)]


In [30]:
# Sort the data
data.sort()
print('Sorted:')
pprint.pprint(data, width=35)
print()


Sorted:
[(0, 0),
 (0, 3),
 (0, 6),
 (1, 1),
 (1, 4),
 (2, 2),
 (2, 5)]


In [31]:
print('Grouped, sorted:')
for k, g in groupby(data, operator.attrgetter('x')):
    print(k, list(g))
print()


Grouped, sorted:
0 [(0, 0), (0, 3), (0, 6)]
1 [(1, 1), (1, 4)]
2 [(2, 2), (2, 5)]

The input sequence needs to be sorted on the key value in order for the groupings to work out as expected.

Nested for loops that iterate over multiple sequences can often be replaced with product(), which produces a single iterable whose values are the Cartesian product of the set of input values.


In [32]:
from itertools import *
import pprint

FACE_CARDS = ('J', 'Q', 'K', 'A')
SUITS = ('H', 'D', 'C', 'S')

DECK = list(
    product(
        chain(range(2, 11), FACE_CARDS),
        SUITS,
    )
)

for card in DECK:
    print('{:>2}{}'.format(*card), end=' ')
    if card[1] == SUITS[-1]:
        print()


 2H  2D  2C  2S 
 3H  3D  3C  3S 
 4H  4D  4C  4S 
 5H  5D  5C  5S 
 6H  6D  6C  6S 
 7H  7D  7C  7S 
 8H  8D  8C  8S 
 9H  9D  9C  9S 
10H 10D 10C 10S 
 JH  JD  JC  JS 
 QH  QD  QC  QS 
 KH  KD  KC  KS 
 AH  AD  AC  AS