In [ ]:
from IPython.display import YouTubeVideo
YouTubeVideo('EnSu9hHGq5o') # How cool is the IPython Notebook?
In [ ]:
my_list = ['a', 'b', 'c', 4, 'e']
i = 0 # start a counter at 0
while i < len(my_list): # loop as long as the counter is still within the size of the list
v = my_list[i] # get the element
print(v) # do something with the element
i += 1 # increment counter
In [ ]:
for i in range(len(my_list)): # automatically choose the right values of i
v = my_list[i] # get the element
print(v) # do something with the element
In [ ]:
for v in my_list: # get each element
print(v) # do something with the element
No integers! No distraction over how lists happen to be implemented in the language!
In [ ]:
for v in ['if', 'and', 'or', 'but']:
print(v)
In [ ]:
for v in 'monty':
print(v)
In [ ]:
for v in {'a': 1, 'b': 2, 'c': 3, 'd': 4}:
print(v)
(in surprising order)
... for completion's sake:
In [ ]:
d = {'a': 1, 'b': 2, 'c': 3}
print('dict:', d)
print()
print('.keys() method')
print('------------')
for k in d.keys(): # Same as cell above, but *explicit*
print(k)
print()
print('.values() method')
print('------------')
for v in d.values():
print(v)
print()
print('.items() method')
print('------------')
for i in d.items():
print(i)
# or
print('.items() method')
print('------------')
for k, v in d.items():
print(k, v)
In [ ]:
path = '/home/hen/repos/open-source-science-workshop/bash/.bash_aliases'
with open(path) as f:
for line in f:
print(line, end='') # optional argument end, in this case supresses new line
Very handy because otherwise we'd be screwed if the file was too big to fit in memory.
In [ ]:
import re # re is short for regex which is short for regular expression
text = 'He was carefully disguised but captured quickly by police.'
for match in re.finditer(r'\w+ly', text):
print(match.group(0))
In [ ]:
import os
for root, dirs, files in os.walk('/home/hen/repos/open-source-science-workshop/sessions/2014-05-27'):
print(root)
print('----')
print('dirs:', dirs)
print('files:', files)
print()
In [ ]:
import itertools
for n in itertools.count(): # count forever... infinite iterable!
if n > 10:
break
print(n)
In [ ]:
iterable = itertools.chain( # Chain these together:
itertools.repeat(17, 3), # repeat 17 3 times
range(-2, 3), # range from -2 to 3
'sam', # some letters
itertools.cycle(range(4)), # cycle 0123 forever
)
for n in iterable:
print(n)
if n == 3:
break # cycle is also infinite, so we better stop at some point
In [ ]:
# Let's take out the infinite part from that iterable
iterable = itertools.chain(
itertools.repeat(17, 3),
range(-2, 3),
'sam',
)
In [ ]:
# Make a list
list(iterable)
In [ ]:
# Make a set
set(iterable)
Wait, what? The iterable was already depleted! If you know you want to use an iterable more than once, that means you won't get any benefit from it being a stream of one-at-a-time values, so you should make it into a list. Usually iterables are generated on-the-fly when we need them. For now we'll just create a new iterable for each example.
In [ ]:
iterable = itertools.chain(itertools.repeat(17, 3), range(-2, 3))
# Make a set
set(iterable)
In [ ]:
iterable = itertools.chain(itertools.repeat(17, 3), range(-2, 3))
# Use a comprehension to do some arithmetic
[2*x**2 for x in iterable]
In [ ]:
# Call a function that takes an iterable as argument
iterable = itertools.chain(itertools.repeat(17, 3), range(-2, 3))
sum(iterable)
In [ ]:
# Do both?
iterable = itertools.chain(itertools.repeat(17, 3), range(-2, 3))
sum(x**2 for x in iterable)
In [ ]:
# Sort it
iterable = itertools.chain(itertools.repeat(17, 3), range(-2, 3))
for x in sorted(iterable):
print(x)
sorted
takes an iterable, and produces an iterable!
In [ ]:
# More functions that like iterables
iterable = itertools.chain(itertools.repeat(17, 3), range(-2, 3))
print(min(iterable))
iterable = itertools.chain(itertools.repeat(17, 3), range(-2, 3))
print(max(iterable))
iterable = itertools.chain(itertools.repeat(17, 3), range(-2, 3))
print(any(x < 0 for x in iterable))
iterable = itertools.chain(itertools.repeat(17, 3), range(-2, 3))
print(all(iterable))
print('--'.join('ABC'))
In [ ]:
# Wrong
for i in range(len(my_list)):
v = my_list[i] # This only works for lists. Lots of iterables can't do this.
print(i, v)
In [ ]:
# Right
for i, v in enumerate(my_list): # Works on any iterable.
print(i, v)
In [ ]:
# Optional start argument
list(enumerate(my_list, start=1))
Hint: anytime you see range(len(something))
, you don't need it
In [ ]:
names = ['Eiffel Tower', 'Empire State', 'Sears Tower', 'Burj Khalifa', 'Taipei 101']
heights = [324, 381, 442, 828, 509]
for i in range(len(names)):
name = names[i]
height = heights[i]
print('{:12}: {} meters'.format(name, height))
Stay focused! You still don't have to think about integers!
In [ ]:
for name, height in zip(names, heights):
print('{:12}: {} meters'.format(name, height))
zip
: pair of streams -->
stream of pairs
In [ ]:
# Combine the above two examples
for idx, (name, height) in enumerate(zip(names, heights), start=1):
print('{}. {:12}: {} meters'.format(idx, name, height))
dict
can take streams of pairs...
In [ ]:
buildings = dict(zip(names, heights))
print(buildings)
In [ ]:
print(max(buildings.values()))
In [ ]:
print(max(buildings.items(), key=lambda x: x[1]))
The lambda
keyword makes a quick one-line function.
We could do the same thing like:
def key(x):
return x[1]
Functions like min
, max
, and sorted
, take an optional key
argument, where you can give it a function that takes a value and returns some value to actually sort the elements with.
The key we used above just takes the second item from each element, which for the items()
method on dictionaries, is the value.
In [ ]:
sorted([-2, 4, 6, -10, 24, -4, 0], key= lambda x: (x-3)**2)
In [ ]:
max(buildings, key=buildings.get)
# For each item, checks buildings.get(item)
In [ ]:
nums = [88, 73, 92, 72, 40, 38, 25, 20, 90, 72]
In [ ]:
diffs = []
for i in range(len(nums)-1):
diffs.append(nums[i+1] - nums[i])
print(diffs)
zip
can help here as well.
In [ ]:
diffs = []
for prev, n in zip(nums[:-1], nums[1:]):
diffs.append(n - prev)
print(diffs)
The next step would be to consider a comprehension.
In [ ]:
diffs = [n - prev for prev, n in zip(nums[:-1], nums[1:])]
print(diffs)
When the body of your loop is only one line, and your initializing an empty data structure above it, it's probably a good idea. In a more complicated example, though, leaving it as the loop will be more readable.
This is a common enough case that we might consider making a diff
function.
Actually, the package numpy
, an array computing library, has that.
We'll use numpy
later on.
In [ ]:
for n in nums:
if n % 2 == 0:
print(n) # do something for each even value
In this case it's not so bad, that condition is short. But what if we had to do a few lines of work to check the condition?
Really this loop is doing two things:
Let's refactor the selecting and the doing. Abstract the operations apart.
In [ ]:
def evens(stream):
selected = []
for value in stream:
if value % 2 == 0:
selected.append(value)
return selected
for n in evens(nums):
print(n)
Well, we've succesfully abstracted. The second part is beautiful, but the first part is ugly.
In [ ]:
def hello_world():
yield 'Hello'
yield 'world'
print(hello_world())
In [ ]:
for x in hello_world():
print(x)
Long explanation:
A generator is a function that can keep producing values as an iterable.
It runs until it hits a yield
statement, sends that value out, and pauses.
When it gets asked for another value, it continues where it left off.
In [ ]:
def hello_world_with_logging():
print('LOG: function started')
yield 'Hello'
print('LOG: function resumed')
yield 'world'
print('LOG: function resumed again')
for x in hello_world_with_logging():
print(x)
In [ ]:
def evens(stream):
"""
Yield only the even numbers from a stream.
""""
for n in stream:
if n % 2 == 0:
yield n
In [ ]:
for n in evens(nums):
print(n)
In [ ]:
for n in evens(itertools.count()):
print(n)
if n > 20:
break
What would happen with the non-generator evens
?
It would run out of memory trying to make an infinite list.
Generators produce values as you need them.
This is called laziness or lazy evalution.
In [ ]:
%%bash
echo 'config-type = ini
# My config file
user = hen
home = /home/hen
# Nothing to see here
uname = Linux 3.13.0.27-generic x86_64' > config.ini
In [ ]:
# test it
print(open('config.ini').read())
In [ ]:
# Here we go
config = {}
f = open('config.ini')
for line in f:
line = line.strip() # Remove surrounding whitespace
if line.startswith('#'):
continue # Skip comments
if not line:
continue # Skip blank lines
# An interesting line
# Split by =
name, value = line.split('=')
# Again, strip whitespace
config[name.strip()] = value.strip()
f.close()
print(config)
Again, this loop is doing two things:
So, we make a generator.
In [ ]:
def interesting_lines(lines):
"""
Filter the interesting lines from a file
(or any stream of lines).
"""
for line in lines:
line = line.strip()
if line.startswith('#'):
continue
if not line:
continue
yield line
And why not put the processing in its own function as well.
In [ ]:
def process(line):
"""
Process a config file line into a name, value pair
"""
words = line.split('=')
# Something could go wrong, so we should put a helpful error message
if len(words) != 2:
raise ValueError("Cannot process line '{}'".format(line))
name, value = words
return name.strip(), value.strip()
In [ ]:
config = {}
with open('config.ini') as f:
for line in interesting_lines(f):
name, value = process(line)
config[name] = value
print(config)
We can leverage the abstraction even more:
In [ ]:
config = dict(process(line) for line in interesting_lines(open('config.ini')))
print(config)
There's also the function filter
, which takes a boolean function as input, and uses it to decide what elements of an iterable to keep:
In [ ]:
def is_interesting(line):
"""
Return True if the line is interesting.
"""
line = line.strip()
if line.startswith('#'):
return False
return bool(line)
In [ ]:
config = dict(process(line) for line in filter(is_interesting, open('config.ini')))
print(config)
Are we breaking a Zen of Python rule by having two ways to do it? Maybe. I think the generator way is more Pythonic--the meaning is more obvious even without knowing exactly what the functions do. That's a subjective judgment, I'll admit.
In [ ]:
def range_2d(width, height):
"""
Produce a stream of 2D coordinates.
"""
for y in range(height):
for x in range(width):
yield x, y
In [ ]:
print(range_2d(3,3))
In [ ]:
print(list(range_2d(3, 3)))
(btw... this is a Cartesian product, and itertools
already has it)
In [ ]:
list(itertools.product('abc', [1, 2]))
In [ ]:
list(itertools.product(range(3), repeat=2))
In [ ]:
# Here's a lamda again
range_2d = lambda x, y: itertools.product(range(x), range(y))
But wait... rows and columns are still integers. What is the Pythonic way? Iterate over cells.
Ideally, this spreadsheet package has implemented this (it should, if it's Pythonic).
for cell in sheet.cells():
value = cell.get_value()
do_something(value)
if this_is_my_value(value):
break
An iterable is not the same thing as an iterator:
An iterable is a book full of pages, an iterator is a bookmark.
The iter
function makes an iterator.
The next
function gets the next value.
iterator = iter(iterable)
value = next(iterator)
value = next(iterator)
next
is the only operation you can do on an iterator.
You can't look back or ahead, you can't reset it.
In [ ]:
iterable = itertools.cycle(itertools.product([True, False], repeat=2))
iterator = iter(iterable)
print(next(iterator))
print(next(iterator))
print(next(iterator))
print(next(iterator))
print(next(iterator))
print(next(iterator))
Some functions actually make an iterator directly.
These are usually the ones that aren't very helpful when we print them.
They also return themselves when you call iter
on them, so it's not a big deal if you do (compare cells above and below).
In [ ]:
iterator = itertools.product([True, False], repeat=2)
print(iterator)
print(next(iterator))
print(next(iterator))
print(next(iterator))
print(next(iterator))
print(next(iterator))
print(next(iterator))
If there are no more values left, you'll get a StopIteration
error.
For a list, we need iter
to make an iterator (a list is an iterable).
In [ ]:
print(my_list)
In [ ]:
iterator = iter(my_list)
print(next(my_list))
print(next(my_list))
print(next(my_list))
print(next(my_list))
print(next(my_list))
Now you might guess how the for
loop works:
It calls iter
, then calls next
until it gets a StopIteration
error.
Whats a real example when we might want to use next
?
Maybe we want to ignore the first line in config.ini
:
In [ ]:
config = {}
with open('config.ini') as f:
# Read the first line.
header = next(f)
print('header:', header)
for line in interesting_lines(f):
name, value = process(line)
config[name] = value
print(config)
Make a generator function that solves Ex. 2 from last week. But instead of taking a list and returning a list, take a stream, and return a stream (i.e. make a generator function).
Ex. 2 Given a list of numbers, return a list where all adjacent == elements have been reduced to a single element, so [1, 2, 2, 3] returns [1, 2, 3]. You may create a new list or modify the passed in list.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
In [ ]:
def remove_adjacent(stream):
last = None
for item in stream:
if item != last:
yield item
last = item
print(list(remove_adjacent([None, 1, 1, 2, 2, 3, 3, 4, 2, 1, 1, 0])))
# Or, slightly fancier.
def remove_adjacent(stream):
iterator = iter(stream) # in case we have an iterable, not an iterator
last = next(iterator)
yield last # The first one is always good.
for item in iterator:
if item != last:
yield item
last = item
print(list(remove_adjacent([None, 1, 2, 2, 3, 3, 4, 2, 1, 1, 0])))
Make a generator function that takes two inputs:
q
x0
, make it optional with a default of 0
.The generator should yield values, each the previous value plus q * (random.random() - 0.5)
.
(We subtract 1 before multiplying because random.random()
returns a random value from 0 to 1 and we want an equal chance of negative and positive jumps).
The next cell will test it.
In [ ]:
import random
# def random_walk...
In [ ]:
import matplotlib.pyplot as plt
% matplotlib inline
T = 50
walk = random_walk(1)
plt.plot(range(T), [next(walk) for _ in range(T)])
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
In [ ]:
def random_walk(q, x0=0):
x = x0
while True:
yield x
x += q * (random.random() - 0.5)
In [ ]:
def generator_3np1(n):
while n!= 1:
yield n
if n % 2 == 0:
n //= 2
else:
n = 3*n + 1
list(generator_3np1(23))
Next, abstract the logic. Write one function that takes a value and returns the next. Write another function that decides when to stop. Then modify your generator function to use those functions.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
In [ ]:
def next_3np1(n):
if n % 2 == 0:
return n //2
else:
return 3*n + 1
def stop_condition(n):
return n == 1
def generator_3np1(n):
while not stop_condition(n):
yield n
n = next_3np1(n)
list(generator_3np1(23))