In [8]:
items = [1, 2, 3]
# Get the iterator
it = iter(items) # Invokes items.__iter__()
# Run the iterator
next(it) # Invokes it.__next__()
Out[8]:
In [9]:
next(it)
Out[9]:
In [40]:
next(it)
Out[40]:
In [42]:
# if you uncomment this line it would throw a StopOperation exception
# next(it)
In [43]:
# if you write a container class, and want to expose an iterator over an internal collection use the __iter()__ method
class Node:
def __init__(self):
self._children = [1,2,3]
def __iter__(self):
return iter(self._children)
root = Node()
for x in root:
print(x)
map applies a function to every element of a sequence and returns an iterator of elements
In [10]:
simpsons = ['homer', 'marge', 'bart']
map(len, simpsons) # returns [0, 2, 4]
#equivalent list comprehension
[len(word) for word in simpsons]
Out[10]:
In [11]:
map(lambda word: word[-1], simpsons) # returns ['r','e', 't']
#equivalent list comprehension
[word[-1] for word in simpsons]
Out[11]:
filter returns an iterator containing the elements from a sequence for which a condition is True:
In [44]:
nums = range(5)
filter(lambda x: x % 2 == 0, nums) # returns [0, 2, 4]
# equivalent list comprehension
[num for num in nums if num % 2 == 0]
Out[44]:
In [45]:
###### 0123456789012345678901234567890123456789012345678901234567890'
record = '....................100 .......513.25 ..........'
SHARES = slice(20,32)
PRICE = slice(40,48)
cost = int(record[SHARES]) * float(record[PRICE])
cost
Out[45]:
In [5]:
# zip() allows you to create an iterable view over a tuple created out of two separate iterable views
prices = { 'ACME' : 45.23, 'AAPL': 612.78, 'IBM': 205.55, 'HPQ' : 37.20, 'FB' : 10.75 }
min_price = min(zip(prices.values(), prices.keys())) #(10.75, 'FB')
max((zip(prices.values(), prices.keys())))
Out[5]:
zip can only be iterated over once!
In [6]:
prices_and_names = zip(prices.values(), prices.keys())
print(min(prices_and_names))
# running the following code would fail
#print(min(prices_and_names))
In [10]:
# zip usually stops when any individual iterator ends (it iterates only until the end of the shortest sequence)
a = [1, 2, 3]
b = ['w', 'x', 'y', 'z']
for i in zip(a,b):
print(i)
In [12]:
# use zip_longest to keep iterating through longer sequences
from itertools import zip_longest
for i in zip_longest(a,b):
print(i)
In [13]:
# zip can run over more then 2 sequences
c = ['aaa', 'bbb', 'ccc']
for i in zip(a,b,c):
print(i)
In [51]:
from operator import itemgetter
In [52]:
rows = [
{'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_fname
Out[52]:
In [53]:
rows_by_uid = sorted(rows, key=itemgetter('uid'))
rows_by_uid
Out[53]:
In [22]:
# itemgetter() function can also accept multiple keys
rows_by_lfname = sorted(rows, key=itemgetter('lname','fname'))
rows_by_lfname
Out[22]:
In [23]:
from operator import attrgetter
In [24]:
#used to sort objects that dont natively support comparison
class User:
def __init__(self, user_id):
self.user_id = user_id
def __repr__(self):
return 'User({})'.format(self.user_id)
users = [User(23), User(3), User(99)]
users
Out[24]:
In [25]:
sorted(users, key=attrgetter('user_id'))
Out[25]:
In [26]:
min(users, key=attrgetter('user_id'))
Out[26]:
The groupby() function works by scanning a sequence and finding sequential “runs” of identical values (or values returned by the given key function). On each iteration, it returns the value along with an iterator that produces all of the items in a group with the same value.
In [27]:
from operator import itemgetter
from itertools import groupby
In [28]:
rows = [
{'address': '5412 N CLARK', 'date': '07/01/2012'},
{'address': '5148 N CLARK', 'date': '07/04/2012'},
{'address': '5800 E 58TH', 'date': '07/02/2012'},
{'address': '2122 N CLARK', 'date': '07/03/2012'},
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
{'address': '1060 W ADDISON', 'date': '07/02/2012'},
{'address': '4801 N BROADWAY', 'date': '07/01/2012'},
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
# important! must sort data on key field first!
rows.sort(key=itemgetter('date'))
#iterate in groups
for date, items in groupby(rows, key=itemgetter('date')):
print(date)
for i in items:
print(' %s' % i)
In [29]:
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
positives = (n for n in mylist if n > 0)
positives
Out[29]:
In [30]:
for x in positives:
print(x)
In [31]:
nums = [1, 2, 3, 4, 5]
sum(x * x for x in nums)
Out[31]:
In [32]:
# Output a tuple as CSV
s = ('ACME', 50, 123.45)
','.join(str(x) for x in s)
Out[32]:
In [33]:
# Determine if any .py files exist in a directory
import os
files = os.listdir('.')
if any(name.endswith('.py') for name in files):
print('There be python!')
else:
print('Sorry, no python.')
In [34]:
# Data reduction across fields of a data structure
portfolio = [
{'name':'GOOG', 'shares': 50},
{'name':'YHOO', 'shares': 75},
{'name':'AOL', 'shares': 20},
{'name':'SCOX', 'shares': 65}
]
min(s['shares'] for s in portfolio)
Out[34]:
In [35]:
s = sum((x * x for x in nums)) # Pass generator-expr as argument
s = sum(x * x for x in nums) # More elegant syntax
s
Out[35]:
itertools.compress() takes an iterable and an accompanying Boolean selector sequence as input. As output, it gives you all of the items in the iterable where the corresponding element in the selector is True.
In [36]:
from itertools import compress
In [37]:
addresses = [
'5412 N CLARK',
'5148 N CLARK',
'5800 E 58TH',
'2122 N CLARK'
'5645 N RAVENSWOOD',
'1060 W ADDISON',
'4801 N BROADWAY',
'1039 W GRANVILLE',
]
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]
In [38]:
more5 = [n > 5 for n in counts]
more5
Out[38]:
In [54]:
list(compress(addresses, more5))
Out[54]:
In [55]:
#iterates in reverse
a = [1, 2, 3, 4]
for x in reversed(a):
print(x)
In [58]:
#you can customize the behavior of reversed for your class by implementing __reversed()__ method
class Counter:
def __init__(self, start):
self.start = start
# Forward iterator
def __iter__(self):
n = 1
while n <= self.start:
yield n
n += 1
# Reverse iterator
def __reversed__(self):
n = self.start
while n > 0:
yield n
n -= 1
foo = Counter(5)
for x in reversed(foo):
print(x)
In [64]:
# To expose state available at each step of iteration, use a classs that implements __iter__()
class countingiterator:
def __init__(self, items):
self.items=items
def __iter__(self):
self.clear_count()
for item in self.items:
self.count+=1
yield item
def clear_count(self):
self.count=0
foo = countingiterator(["aaa","bbb","ccc"])
for i in foo:
print("{}:{}".format(foo.count, i))
In [67]:
# itertools.islice allows slicing of iterators
def count(n):
while True:
yield n
n += 1
c=count(0)
#the next line would fail
# c[10:20]
import itertools
for x in itertools.islice(c,10,15):
print(x)
In [68]:
c=count(0)
for x in itertools.islice(c, 10, 15, 2):
print(x)
In [82]:
# if you don't know how many to skip, but can define a skip condition, use dropwhile()
from itertools import dropwhile
foo = ['#','#','#','#','aaa','bbb','#','ccc']
def getstrings(f):
for i in f:
yield i
for ch in dropwhile(lambda ch: ch.startswith('#'), getstrings(foo)):
print(ch)
In [83]:
from itertools import permutations
In [85]:
items = ['a', 'b', 'c']
for p in permutations(items):
print(p)
In [86]:
# for smaller subset permutations
for p in permutations(items,2):
print(p)
In [87]:
# itertools.combinations ignores element order in creating unique sets
from itertools import combinations
for c in combinations(items, 3):
print(c)
In [88]:
for c in combinations(items, 2):
print(c)
In [90]:
# itertools.combinations_with_replacement() will not remove an item from the list of possible candidates after it is chosen
# in other words, the same value can occur more then once
from itertools import combinations_with_replacement
for c in combinations_with_replacement(items, 3):
print(c)
In [1]:
# enumerate returns the iterated item and an index
my_list = ['a', 'b', 'c']
for idx, val in enumerate(my_list):
print(idx, val)
In [3]:
# pass a starting index to enumerate
for idx, val in enumerate(my_list, 7):
print(idx, val)
In [15]:
# chain iterates over several sequences, one after the other
# making them look like one long sequence
from itertools import chain
a = [1, 2]
b = ['x', 'y', 'z']
for x in chain(a, b):
print(x)
In [17]:
# you want to traverse a sequence with nested sub sequences as one big sequence
from collections import Iterable
def flatten(items, ignore_types=(str, bytes)):
for x in items:
if isinstance(x, Iterable) and not isinstance(x, ignore_types): # ignore types treats iterable string/bytes as simple values
yield from flatten(x)
else:
yield x
items = [1, 2, [3, 4, [5, 6], 7], 8]
for x in flatten(items):
print(x)
In [18]:
import heapq
a = [1, 4, 7]
b = [2, 5, 6]
for c in heapq.merge(a, b):
print(c)