Generator expressions


In [6]:
symbols = '$#%^&'
[ord(s) for s in symbols]


Out[6]:
[36, 35, 37, 94, 38]

In [12]:
tuple(ord(s) for s in symbols)


Out[12]:
(36, 35, 37, 94, 38)

In [13]:
(ord(s) for s in symbols)


Out[13]:
<generator object <genexpr> at 0x1048d7f78>

In [14]:
for x in (ord(s) for s in symbols):
    print(x)


36
35
37
94
38

In [17]:
import array
array.array('I', (ord(s) for s in symbols))


Out[17]:
array('I', [36, 35, 37, 94, 38])

In [19]:
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
for tshirt in ((c, s) for c in colors for s in sizes):
    print(tshirt)


black S
black M
black L
white S
white M
white L

In [28]:
for tshirt in ('%s %s' % (c, s) for c in colors for s in sizes):
    print(tshirt)


black S
black M
black L
white S
white M
white L

Tuples as Records


In [22]:
lax_coordinates = (33.9425, -118.408056)
city, year, pop, chg, area = ('Tokyo', 2003, 32450, 0.66, 8014)
traveler_ids = [('USA', '31195855'), ('BRA', 'CE342567'), ('ESP', 'XDA205856')]

In [25]:
for passport in sorted(traveler_ids):
    print('%s/%s' % passport)


BRA/CE342567
ESP/XDA205856
USA/31195855

In [27]:
for country, _ in traveler_ids:
    print(country)


USA
BRA
ESP

Tuple Unpacking


In [32]:
import os
_, filename = os.path.split('/home/kyle/afile.txt')
print(filename)


afile.txt

In [34]:
a, b, *rest = range(5)

In [36]:
a, b, rest


Out[36]:
(0, 1, [2, 3, 4])

In [37]:
a, b, *rest = range(3)
a, b, rest


Out[37]:
(0, 1, [2])

In [39]:
a, b, *rest = range(2)
a, b, rest


Out[39]:
(0, 1, [])

In [42]:
a, *body, c, d = range(5)
a, body, c, d


Out[42]:
(0, [1, 2], 3, 4)

In [43]:
*head, b, c, d = range(5)
head, b, c, d


Out[43]:
([0, 1], 2, 3, 4)

In [44]:
metro_areas = [('Tokyo','JP',36.933,(35.689722,139.691667)),
               ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
               ('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
               ('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
               ('Sao Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
              ]

In [45]:
print('{:15} | {:^9} | {:^9}'.format('', 'lat.', 'long.'))


                |   lat.    |   long.  

In [46]:
fmt = '{:15} | {:9.4f} | {:9.4f}'

In [47]:
fmt


Out[47]:
'{:15} | {:9.4f} | {:9.4f}'

In [48]:
for name, cc, pop, (latitude, longitude) in metro_areas:
    if longitude <= 0:
        print(fmt.format(name, latitude, longitude))


Mexico City     |   19.4333 |  -99.1333
New York-Newark |   40.8086 |  -74.0204
Sao Paulo       |  -23.5478 |  -46.6358

Named tuples


In [49]:
from collections import  namedtuple

In [50]:
City = namedtuple('City', 'name country population coordinates')

In [52]:
tokyo = City('Tokyo', 'JP', 36.933, (35.689722, 139.691667))

In [53]:
tokyo


Out[53]:
City(name='Tokyo', country='JP', population=36.933, coordinates=(35.689722, 139.691667))

In [54]:
tokyo.population


Out[54]:
36.933

In [56]:
tokyo.name


Out[56]:
'Tokyo'

In [57]:
tokyo.coordinates


Out[57]:
(35.689722, 139.691667)

In [58]:
tokyo[1]


Out[58]:
'JP'

In [59]:
# a few useful methods on namedtuple
City._fields


Out[59]:
('name', 'country', 'population', 'coordinates')

In [64]:
LatLong = namedtuple('LatLong', 'lat long')
delhi_data = ('Delhi NCR', 'IN', 21.935, LatLong(28.613889, 77.208889))
delhi = City._make(delhi_data)  # instantiate a named tuple from an iterable

In [62]:
delhi._asdict()


Out[62]:
OrderedDict([('name', 'Delhi NCR'),
             ('country', 'IN'),
             ('population', 21.935),
             ('coordinates', LatLong(lat=28.613889, long=77.208889))])

In [63]:
for key, value in delhi._asdict().items():
    print(key + ':', value)


name: Delhi NCR
country: IN
population: 21.935
coordinates: LatLong(lat=28.613889, long=77.208889)

Slicing


In [65]:
# why slices and range exclude the last item

l = [10,20,30,40,50,60]
l[:2]


Out[65]:
[10, 20]

In [67]:
l[2:]


Out[67]:
[30, 40, 50, 60]

In [68]:
# slice objects
s = 'bicycle'
s[::3]


Out[68]:
'bye'

In [69]:
s[::-1]


Out[69]:
'elcycib'

In [70]:
s[::-2]


Out[70]:
'eccb'

In [81]:
invoice = """
0.....6.................................40........52...55........
1909  Pimoroni PiBrella                     $17.50    3    $52.50
1489  6mm Tactile Switch x20                $4.95    2      $9.90
1510  Panavise Jr. - PV-201                 $28.00    1    $28.00
1601  PiTFT Mini Kit 320x240                $34.95    1    $34.95
"""

In [84]:
SKU = slice(0,6)
DESCRIPTION = slice(6, 40)
UNIT_PRICE = slice(40, 52)
QUANTITY = slice(52, 55)
ITEM_TOTAL = slice(55, None)

In [85]:
line_items = invoice.split('\n')[2:]
for item in line_items:
    print(item[UNIT_PRICE], item[DESCRIPTION])


    $17.50   Pimoroni PiBrella                 
    $4.95    6mm Tactile Switch x20            
    $28.00   Panavise Jr. - PV-201             
    $34.95   PiTFT Mini Kit 320x240            
 

Assigning to Slices


In [101]:
l = list(range(10))
l


Out[101]:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [102]:
l[2:5] = [20, 30]
l


Out[102]:
[0, 1, 20, 30, 5, 6, 7, 8, 9]

In [103]:
del l[5:7]
l


Out[103]:
[0, 1, 20, 30, 5, 8, 9]

In [104]:
l[3::2] = [11, 22]
l


Out[104]:
[0, 1, 20, 11, 5, 22, 9]

In [105]:
l[2:5] = 100
l


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-105-d29be80f6a36> in <module>()
----> 1 l[2:5] = 100
      2 l

TypeError: can only assign an iterable

In [106]:
l[2:5] = [100]
l


Out[106]:
[0, 1, 100, 22, 9]

Using + and * with Sequences


In [108]:
l = [1, 2, 3]
l * 5


Out[108]:
[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]

In [109]:
5 * 'abcd'


Out[109]:
'abcdabcdabcdabcdabcd'

Building Lists of Lists


In [113]:
board = [['_'] *3  for i in range(3)]
board


Out[113]:
[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [114]:
board[1][2] = 'X'
board


Out[114]:
[['_', '_', '_'], ['_', '_', 'X'], ['_', '_', '_']]

Augmented Assignment with Sequences


In [116]:
l = [1, 2, 3]
id(l)


Out[116]:
4378448072

In [117]:
l *= 2
id(l)  # same list


Out[117]:
4378448072

In [118]:
t=(1,2,3)
id(t)


Out[118]:
4371237552

In [119]:
t *= 2
id(t)  # new tuple was created


Out[119]:
4370770824

A += Assignment Puzzler


In [122]:
import dis
dis.dis('s[a] += b')


  1           0 LOAD_NAME                0 (s)
              3 LOAD_NAME                1 (a)
              6 DUP_TOP_TWO
              7 BINARY_SUBSCR
              8 LOAD_NAME                2 (b)
             11 INPLACE_ADD
             12 ROT_THREE
             13 STORE_SUBSCR
             14 LOAD_CONST               0 (None)
             17 RETURN_VALUE

• Putting mutable items in tuples is not a good idea.

• Augmented assignment is not an atomic operation—we just saw it throwing an exception after doing part of its job.

• Inspecting Python bytecode is not too difficult, and is often helpful to see what is going on under the hood.

list.sort and the sorted Built-In Function

sorted() makes a new list, doesn't touch the original.

sort() changes list in place.


In [126]:
fruits = ['grape', 'raspberry', 'apple', 'banana']
sorted(fruits)


Out[126]:
['apple', 'banana', 'grape', 'raspberry']

In [127]:
fruits


Out[127]:
['grape', 'raspberry', 'apple', 'banana']

In [128]:
sorted(fruits, reverse=True)


Out[128]:
['raspberry', 'grape', 'banana', 'apple']

In [129]:
sorted(fruits, key=len)


Out[129]:
['grape', 'apple', 'banana', 'raspberry']

In [130]:
sorted(fruits, key=len, reverse=True)


Out[130]:
['raspberry', 'banana', 'grape', 'apple']

In [131]:
fruits


Out[131]:
['grape', 'raspberry', 'apple', 'banana']

In [134]:
fruits.sort()  # note that sort() returns None

In [135]:
fruits


Out[135]:
['apple', 'banana', 'grape', 'raspberry']

Next: use bisect module to better search sorted lists.

Managing Ordered Sequences with bisect


In [163]:
breakpoints=[60, 70, 80, 90]
grades='FDCBA'
bisect.bisect(breakpoints, 99)


Out[163]:
4

In [165]:
bisect.bisect(breakpoints, 59)


Out[165]:
0

In [166]:
bisect.bisect(breakpoints, 75)


Out[166]:
2

In [155]:
def grade(score, breakpoints=[60, 70, 80, 90], grades='FDCBA'):
    i = bisect.bisect(breakpoints, score)
    return grades[i]

In [157]:
[grade(score) for score in [33, 99, 77, 70, 89, 90, 100]]


Out[157]:
['F', 'A', 'C', 'C', 'B', 'A', 'A']

In [158]:
grade(4)


Out[158]:
'F'

In [159]:
grade(93)


Out[159]:
'A'

Inserting with bisect.insort


In [169]:
import bisect
import random

SIZE = 7

random.seed(1729)

my_list = []
for i in range(SIZE):
    new_item = random.randrange(SIZE*2)
    bisect.insort(my_list, new_item)
    print('%2d ->' % new_item, my_list)


10 -> [10]
 0 -> [0, 10]
 6 -> [0, 6, 10]
 8 -> [0, 6, 8, 10]
 7 -> [0, 6, 7, 8, 10]
 2 -> [0, 2, 6, 7, 8, 10]
10 -> [0, 2, 6, 7, 8, 10, 10]

Arrays


In [173]:
from array import array
from random import random

floats = array('d', (random() for i in range(10**7)))
floats[-1]


Out[173]:
0.051056611520245765

In [174]:
fp = open('floats.bin', 'wb')
floats.tofile(fp)
fp.close()

In [176]:
floats2 = array('d')
fp = open('floats.bin', 'rb')
floats2.fromfile(fp, 10**7)
fp.close()
floats2[-1]
floats2 == floats


Out[176]:
True

To sort an array, use a = array.array(a.typecode, sorted(a)). To keep it sorted while adding to it, use bisect.insort.

Memory Views

The built-in memorview class is a shared-memory sequence type that lets you handle slices of arrays without copying bytes.


In [184]:
# Changing the value of an array item by poking one of its bytes
import array

numbers = array.array('h', [-2, -1, 0, 1, 2])
memv = memoryview(numbers)
len(memv)


Out[184]:
5

In [185]:
memv[0]


Out[185]:
-2

In [187]:
memv_oct = memv.cast('B')  # ch type of array to unsigned char
memv_oct.tolist()


Out[187]:
[254, 255, 255, 255, 0, 0, 1, 0, 2, 0]

In [188]:
memv_oct[5] = 4

In [191]:
numbers


Out[191]:
array('h', [-2, -1, 1024, 1, 2])

NumPy and SciPy


In [194]:
import numpy

a = numpy.arange(12)
a


Out[194]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [195]:
type(a)


Out[195]:
numpy.ndarray

In [196]:
a.shape


Out[196]:
(12,)

In [197]:
a.shape = 3, 4  # turn a into three units of 4
a


Out[197]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [198]:
a[2]


Out[198]:
array([ 8,  9, 10, 11])

In [199]:
a[2, 1]


Out[199]:
9

In [200]:
a[:, 1]


Out[200]:
array([1, 5, 9])

In [201]:
a.transpose()


Out[201]:
array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

Loading, saving, and operating:

Use numpy.loadtxt()

Deques and Other Queues

Inserting and removing from the left of a list (the 0-index end) is costly. collections.deque is a thread-safe double-ended queue designed for fast inserting and removing from both ends.


In [204]:
from collections import deque

dq = deque(range(10), maxlen=10)
dq


Out[204]:
deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [205]:
dq.rotate(3)
dq


Out[205]:
deque([7, 8, 9, 0, 1, 2, 3, 4, 5, 6])

In [206]:
dq.rotate(-4)
dq


Out[206]:
deque([1, 2, 3, 4, 5, 6, 7, 8, 9, 0])

In [207]:
dq.appendleft(-1)
dq


Out[207]:
deque([-1, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [208]:
dq.extend([11, 22, 33])
dq


Out[208]:
deque([3, 4, 5, 6, 7, 8, 9, 11, 22, 33])

In [210]:
dq.extendleft([10, 20, 30, 40])
dq


Out[210]:
deque([40, 30, 20, 10, 40, 30, 20, 10, 3, 4])

a hidden cost: removing items from the middle of a deque is not as fast

On using single type in list: "we put items in a list to process them later, which implies that all items should support at least some operation in common".


In [211]:
# but a workaround with `key`
l = [28, 14, '28', 5, '9', '1', 0, 6, '23', 19]
sorted(l, key=int)


Out[211]:
[0, '1', 5, 6, '9', 14, 19, '23', 28, '28']

In [212]:
sorted(l, key=str)


Out[212]:
[0, '1', 14, 19, '23', 28, '28', 5, 6, '9']

In [ ]: