Unpacking tuple/sequence into a collection of variables
Any sequence/iterable can be unpacked into variables using an assignment operation. The number of variables and structure must match the number of sequence items:
In [ ]:
p = (4, 5, 6, 7)
x, y, z, w = p # x -> 4
data = ['ACME', 50, 91.1, (2012, 12, 21)]
name, _, price, date = data # name -> 'ACME', data -> (2012, 12, 21)
s = 'Hello'
a, b, c, d, e = s # a -> H
p = (4, 5)
x, y, z = p # "ValueError"
In [ ]:
def drop_first_last(grades):
""" Drop first and last exams, then average the rest. """
first, *middle, last = grades
return avg(middle)
def arbitrary_numbers():
""" Name and email followed by phone number(s). """
record = ('Dave', 'dave@example.com', '555-555-5555', '555-555-5544')
name, email, *phone_numbers = record # phone_number always a list
return phone_numbers
def recent_to_first_n():
""" Most recent quarter compares to the average of the first n. """
sales_records = ('23.444', '234.23', '0', 23.12, '15.56')
*trailing_qtrs, current_qtr = sales_record
trailing_avg = sum(trailing_qtrs) / len(trailing_qtrs)
return avg_comparison(trailing_avg, current_qtr)
This is often implemented with iterables of unknown(arbitrary) length, and known pattern: "everything after element 1 is a number".
In [ ]:
####### 1 ##############
records = [ ('foo', 1, 2), ('bar', 'hello'), ('foo', 3, 4) ]
def do_foo(x, y):
print('foo', x, y)
def do_bar(s):
print('bar', s)
for tag, *args in records:
if tag == 'foo':
do_foo(*args)
elif tag == 'bar':
do_bar(*args)
#########################
######## 2 ##############
line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'
uname, *fields, homedir, sh = line.split(':') # uname -> nobody
#########################
######### 3 #############
record = ('ACME', 50, 123, 45, (12, 18, 2012))
name, *_, (*_, year) = record # name and year
#########################
######### 4 #############
def sum(items):
""" Recursions are not recommended w/ Python. """
head, *tail = items
return head + sum(*tail) if tail else head
#########################
Keep a limited history of the last few items seen during iteration or processing.
Use collections.deque: perform a simple text search on a sequence of lines and yield matching lines with previous N lines of conext when found:
In [ ]:
from collections import deque
def search(lines, pattern, history=5):
""" Returns a line that matches the pattern and 5 previous lines"""
previous_lines = deque(maxlen=history) # a generator of a list with max length
for line in lines:
if pattern in line:
yield line, previous_lines
previous_lines.append(line)
# Example use on a file
if __name__ == '__main__':
with open('somefile.txt') as f:
for line, prevlines in search(f, 'python', 5):
for pline in prevlines:
print(pline, end='')
print(line, end='')
print('-' * 20)
Generator functions (with yield) are common when searching for items. This decouples the process of searching from the code that uses results:
In [ ]:
######## 1, 2, 3 ########
q = deque(maxlen=3)
q.append(1)
q.appendleft(4)
q.pop() # 1
q.popleft() # 4
#########################
In [ ]:
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums)) # [42, 37 ,23]
print(heapq.nsmallest(3, nums)) # [-4, 1, 2]
heap.heappop(nums) # -4
# use key parameter to use with complicated data structures
portfolio = [
{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
# if N is close to the size of the items:
sorted(nums)[:N] # a better approach
In [ ]:
import heapq
class PriorityQueue:
def __init__(self):
self._queue = []
self._index = 0
def __repr__(self):
return 'PriorityQueue({}) with index({})'.format(self._queue, self._index)
def push(self, item, priority):
heapq.heappush(self._queue, (-priority, self._index, item)) # heappush(list, ())
self._index += 1
def pop(self):
return heapq.heappop(self._queue)[-1] # self_queue includes [(priority, index, item)]
class Item:
def __init__(self, name):
self.name = name
def __repr__(self):
return 'Item({!r})'.format(self.name)
q = PriorityQueue()
print(q)
q.push(Item('foo'), 1)
print(q)
q.push(Item('bar'), 5)
print(q)
q.push(Item('spam'), 4)
print(q)
q.push(Item('grok'), 1)
print(q)
q.pop() # -> Item('bar')
print(q)
q.pop() # -> Item('spam')
print(q)
q.pop() # -> Item('foo')
print(q)
q.pop() # -> Item('grok')
print(q)
# foo and grok were popped in the same order in which they were inserted
This recipe focuses on the use of heapq module. Functions heapq.heappush() and heapq.heappop() insert and remove items from a list _queue so that the first item in the list has the highest priority.
index value is used to properly order items with the same priority; index also works for comparison operations:
By introducing the extra index and making (priority, index, item) tuples, you avoid this problem entirely since no two tuples will ever have the same value for index (and Python never bothers to compare the remaining tuple values once the result of com‐ parison can be determined)::
we can use this queue for communication between threads, but we will need to add appropriate locking and signaling (look ahead)
Make a dictionary that maps keys to more than one value (multidict)
A dictionary is a mapping where each key is mapped to a single value. When mapping keys to multiple values, we need to store multiple values in a different container: list or set.
In [15]:
from collections import defaultdict
d = defaultdict(list) # multiple values will be added to a list
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)
d = defaultdict(set) # multiple values will be added to a set
d['a'].add(1)
d['b'].add(2)
d['a'].add(5)
# Messier setdefault
d = {}
d.setdefault('a', []).append(1)
d.setdefault('a', []).append(2) # will add to the existing list
# Even messier
d = {}
for key, value in paiers:
if key not in d:
d[key] = []
d[key].append(value)
# Best!
d = defaultdict(list)
for key, value in pairs:
d[key].append(value)
Control the order of items in a dictionary when iterating or serializing
Use OrderedDict from the collections to control dictionary order. It is particularly useful when building a mapping that later will be serialized or encoded into a different format. For example, when controlling the order of fields appearing in a JSON encoding, first build the data in OrderedDict and then json dump.
In [ ]:
from collections import OrderedDict
d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4
for key in d:
print(key, d[key]) # -> 'foo 1', 'bar 2', 'spam 3', 'grok 4'
# Use when serializing JSON
import json
json.dumps(d) # -> '{"foo": 1, "bar": 2, "spam": 3, "grok": 4}'
An OrderedDict is an expensive procedure - beware for items exceeding 100000 lines:
Performing various calculations (min, max, sort) on a dictionary
Reverse keys and values, then perform a calculation function on the zip result.
Important:
1. max/min/sort is performed on the keys
2. if the keys are the same, max/min/sort is then based on the values
3. zip creates an iterator, which can only be consumed once
In [ ]:
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
# to get calculated values first reverse and zip
min_price = min(zip(prices.values(), prices.keys())) # (10.75, 'FB')
max_price = max(zip(prices.values(), prices.keys())) # (612.78, 'AAPL')
# to rank the data use zip with sorted
prices_sorted = sorted(zip(prices.values(), prices.keys())) # [(10.75, 'FB'), (37.2, 'HPQ')...]
# the iterator can be consumed only once
prices_and_names = zip(prices.values(), prices.keys())
print(min(prices_and_names)) # result OK
print(max(prices_and_names)) # ValueError: max() arg is an empty sequence
In [ ]:
#### 1 #############
min(prices) # 'AAPL'
max(prices) # 'IBM'
#### 2 ############
min(prices.values()) # 10.75
max(prices.values()) # 612.78
#### 3 ############
min(prices, key=lambda k: prices[k]) # 'FB'
max(prices, key=lambda k: prices[k]) # 'AAPL' -> perfrom calculation on values and return key
# to get the value as well as the key, additionally:
min_key = min(prices, key=lambda k: prices[k])
min_value = prices[min(prices, key=lambda k: prices[k])]
#### 4, 5 #########
prices = { 'AAA' : 45.23, 'ZZZ': 45.23 }
min(zip(prices.values(), prices.keys())) # (45.23, 'AAA')
max(zip(prices.values(), prices.keys())) # (45.23, 'ZZZ')
In [4]:
a={
'x' : 1,
'y' : 2,
'z' : 3
}
b={
'w' : 10,
'x' : 11,
'y' : 2
}
# find keys in common
a.keys() & b.keys() # {'x', 'y'}
# find keys in a that are not in b
a.keys() - b.keys() # {'z'}
# find (key, value) pairs in common
a.items() & b.items() # {('y', 2)}
# alter/filter dictionary contents - make a new dict with selected keys removed
c = { key: a[key] for key in a.keys() - {'z', 'w'}} # {'x': 1, 'y': 2}
The keys() method of a dictionary returns a keys-view object that exposes the keys. Key views support set operations: unions, intersections, and differences.
The items() method of a dictionary returns an items-view object consisting of (key, value) pairs. This object supports similar set operations and can be used to perform operations such as finding out which key-value pairs two dictionaries have in common.
Although similar, the values() method of a dictionary does not support the set oper‐ ations described in this recipe. In part, this is due to the fact that unlike keys, the items contained in a values view aren’t guaranteed to be unique. However, if you must perform such calculations, they can be accomplished by simply converting the values to a set first.
Eliminate the duplicate values in a sequence, but preserve the order
In [ ]:
###### 1 #########
def dedupe(items):
''' Add a unique item to the seen, and then check agains seen.'''
seen = set()
for item in items:
if item not in seen:
yield item
seen.add(item)
a = [1, 5, 2, 1, 9, 1, 5, 10]
list(dedupe(a)) # [1, 5, 2, 9, 10]
##### 2 ##########
def dedupe(items, key=None): # key is similar to min/max/sorted
''' Purpose of the key argument is to specify a function(lambda)
that converts sequence items into a hashable type for the
purposes of duplicate detection.
'''
seen = set()
for item in items:
val = item if key is None else key(item) # key could be lambda of values, keys, etc.
if val not in seen:
yield item
seen.add(val)
a = [ {'x':1, 'y':2}, {'x':1, 'y':3}, {'x':1, 'y':2}, {'x':2, 'y':4}]
# remove duplicates based on x/y values
list(dedupe(a, key=lambda d: (d['x'], d['y']))) # [{'x': 1, 'y': 2}, {'x': 1, 'y': 3}, {'x': 2, 'y': 4}]
##### 3 #########
# remove duplicates based on x values - for each item in "a" sequence execute the lambda function
list(dedupe(a, key=lambda d: d['x'])) # [{'x': 1, 'y': 2}, {'x': 2, 'y': 4}]
In [ ]:
# let's eliminate duplicate lines from a file using the dedupe(items, key=None) generator
with open('somefile.txt', 'r') as f:
# the generator will spit out a single value (line) at a time,
# while keeping track (a pointer) to where it is located during each yield
for line in dedupe(f):
# process unique lines
pass
In [ ]: