Appendix: Python Language Essentials


In [ ]:
from __future__ import division
from numpy.random import randn
import numpy as np
import os
import matplotlib.pyplot as plt
np.random.seed(12345)
plt.rc('figure', figsize=(10, 6))
from pandas import *
import pandas
np.set_printoptions(precision=4)

The Python interpreter

$ python
Python 2.7.2 (default, Oct  4 2011, 20:06:09)
[GCC 4.6.1] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> a = 5
>>> print a
5

In [ ]:
%%writefile hello_world.py
print 'Hello world'
$ ipython
Python 2.7.2 |EPD 7.1-2 (64-bit)| (default, Jul  3 2011, 15:17:51)
Type "copyright", "credits" or "license" for more information.

IPython 0.12 -- An enhanced Interactive Python.
?         -> Introduction and overview of IPython's features.
%quickref -> Quick reference.
help      -> Python's own help system.
object?   -> Details about 'object', use 'object??' for extra details.

In [1]: %run hello_world.py
Hello world

In [2]:

The Basics

Language Semantics

Indentation, not braces

for x in array: if x < pivot: less.append(x) else: greater.append(x)
for x in array { if x < pivot { less.append(x) } else { greater.append(x) } }
for x in array { if x < pivot { less.append(x) } else { greater.append(x) } }
a = 5; b = 6; c = 7

Everything is an object

Comments


In [ ]:
results = []
for line in file_handle:
    # keep the empty lines for now
    # if len(line) == 0:
    #   continue
    results.append(line.replace('foo', 'bar'))

Function and object method calls


In [ ]:
result = f(x, y, z)
g()

In [ ]:
obj.some_method(x, y, z)

In [ ]:
result = f(a, b, c, d=5, e='foo')

Variables and pass-by-reference


In [ ]:
a = [1, 2, 3]

In [ ]:
b = a

In [ ]:
a.append(4)
b

In [ ]:
def append_element(some_list, element):
    some_list.append(element)

In [ ]:
data = [1, 2, 3]

append_element(data, 4)

In [4]: data
Out[4]: [1, 2, 3, 4]

Dynamic references, strong types


In [ ]:
a = 5
type(a)
a = 'foo'
type(a)

In [ ]:
'5' + 5

In [ ]:
a = 4.5
b = 2
# String formatting, to be visited later
print 'a is %s, b is %s' % (type(a), type(b))
a / b

In [ ]:
a = 5
isinstance(a, int)

In [ ]:
a = 5; b = 4.5
isinstance(a, (int, float))
isinstance(b, (int, float))

Attributes and methods

In [1]: a = 'foo' In [2]: a. a.capitalize a.format a.isupper a.rindex a.strip a.center a.index a.join a.rjust a.swapcase a.count a.isalnum a.ljust a.rpartition a.title a.decode a.isalpha a.lower a.rsplit a.translate a.encode a.isdigit a.lstrip a.rstrip a.upper a.endswith a.islower a.partition a.split a.zfill a.expandtabs a.isspace a.replace a.splitlines a.find a.istitle a.rfind a.startswith
>>> getattr(a, 'split')

"Duck" typing


In [ ]:
def isiterable(obj):
    try:
        iter(obj)
        return True
    except TypeError: # not iterable
        return False

In [ ]:
isiterable('a string')
isiterable([1, 2, 3])
isiterable(5)
if not isinstance(x, list) and isiterable(x): x = list(x)

Imports


In [ ]:
# some_module.py
PI = 3.14159

def f(x):
    return x + 2

def g(a, b):
    return a + b

In [ ]:
import some_module
result = some_module.f(5)
pi = some_module.PI

In [ ]:
from some_module import f, g, PI
result = g(5, PI)

In [ ]:
import some_module as sm
from some_module import PI as pi, g as gf

r1 = sm.f(pi)
r2 = gf(6, pi)

Binary operators and comparisons


In [ ]:
5 - 7
12 + 21.5
5 <= 2

In [ ]:
a = [1, 2, 3]
b = a
# Note, the list function always creates a new list
c = list(a)
a is b
a is not c

In [ ]:
a == c

In [ ]:
a = None
a is None

Strictness versus laziness


In [ ]:
a = b = c = 5
d = a + b * c

Mutable and immutable objects


In [ ]:
a_list = ['foo', 2, [4, 5]]
a_list[2] = (3, 4)
a_list

In [ ]:
a_tuple = (3, 5, (4, 5))
a_tuple[1] = 'four'

Scalar Types

Numeric types


In [ ]:
ival = 17239871
ival ** 6

In [ ]:
fval = 7.243
fval2 = 6.78e-5

In [ ]:
3 / 2

In [ ]:
from __future__ import division

In [ ]:
3 / float(2)

In [ ]:
3 // 2

In [ ]:
cval = 1 + 2j
cval * (1 - 2j)

Strings


In [ ]:
a = 'one way of writing a string'
b = "another way"

In [ ]:
c = """
This is a longer string that
spans multiple lines
"""

In [ ]:
a = 'this is a string'
a[10] = 'f'
b = a.replace('string', 'longer string')
b

In [ ]:
a = 5.6
s = str(a)
s

In [ ]:
s = 'python'
list(s)
s[:3]

In [ ]:
s = '12\\34'
print s

In [ ]:
s = r'this\has\no\special\characters'
s

In [ ]:
a = 'this is the first half '
b = 'and this is the second half'
a + b

In [ ]:
template = '%.2f %s are worth $%d'

In [ ]:
template % (4.5560, 'Argentine Pesos', 1)

Booleans


In [ ]:
True and True
False or True

In [ ]:
a = [1, 2, 3]
if a:
    print 'I found something!'

b = []
if not b:
    print 'Empty!'

In [ ]:
bool([]), bool([1, 2, 3])
bool('Hello world!'), bool('')
bool(0), bool(1)

Type casting


In [ ]:
s = '3.14159'
fval = float(s)
type(fval)
int(fval)
bool(fval)
bool(0)

None


In [ ]:
a = None
a is None
b = 5
b is not None

In [ ]:
def add_and_maybe_multiply(a, b, c=None):
    result = a + b

    if c is not None:
        result = result * c

    return result

Dates and times


In [ ]:
from datetime import datetime, date, time
dt = datetime(2011, 10, 29, 20, 30, 21)
dt.day
dt.minute

In [ ]:
dt.date()
dt.time()

In [ ]:
dt.strftime('%m/%d/%Y %H:%M')

In [ ]:
datetime.strptime('20091031', '%Y%m%d')

In [ ]:
dt.replace(minute=0, second=0)

In [ ]:
dt2 = datetime(2011, 11, 15, 22, 30)
delta = dt2 - dt
delta
type(delta)

In [ ]:
dt
dt + delta

Control Flow

If, elif, and else


In [ ]:
if x < 0:
    print 'It's negative'

In [ ]:
if x < 0:
    print 'It's negative'
elif x == 0:
    print 'Equal to zero'
elif 0 < x < 5:
    print 'Positive but smaller than 5'
else:
    print 'Positive and larger than 5'

In [ ]:
a = 5; b = 7
c = 8; d = 4
if a < b or c > d:
    print 'Made it'

For loops


In [ ]:
for value in collection:
    # do something with value

In [ ]:
sequence = [1, 2, None, 4, None, 5]
total = 0
for value in sequence:
    if value is None:
        continue
    total += value

In [ ]:
sequence = [1, 2, 0, 4, 6, 5, 2, 1]
total_until_5 = 0
for value in sequence:
    if value == 5:
        break
    total_until_5 += value

In [ ]:
for a, b, c in iterator:
    # do something

While loops


In [ ]:
x = 256
total = 0
while x > 0:
    if total > 500:
        break
    total += x
    x = x // 2

pass


In [ ]:
if x < 0:
    print 'negative!'
elif x == 0:
    # TODO: put something smart here
    pass
else:
    print 'positive!'

In [ ]:
def f(x, y, z):
    # TODO: implement this function!
    pass

Exception handling


In [ ]:
float('1.2345')
float('something')

In [ ]:
def attempt_float(x):
    try:
        return float(x)
    except:
        return x

In [ ]:
attempt_float('1.2345')
attempt_float('something')

In [ ]:
float((1, 2))

In [ ]:
def attempt_float(x):
    try:
        return float(x)
    except ValueError:
        return x

In [ ]:
attempt_float((1, 2))

In [ ]:
def attempt_float(x):
    try:
        return float(x)
    except (TypeError, ValueError):
        return x

In [ ]:
f = open(path, 'w')

try:
    write_to_file(f)
finally:
    f.close()

In [ ]:
f = open(path, 'w')

try:
    write_to_file(f)
except:
    print 'Failed'
else:
    print 'Succeeded'
finally:
    f.close()

range and xrange


In [ ]:
range(10)

In [ ]:
range(0, 20, 2)

In [ ]:
seq = [1, 2, 3, 4]
for i in range(len(seq)):
    val = seq[i]

In [ ]:
sum = 0
for i in xrange(10000):
    # % is the modulo operator
    if x % 3 == 0 or x % 5 == 0:
        sum += i

Ternary Expressions


In [ ]:
x = 5
value = 'Non-negative' if x >= 0 else 'Negative'

Data structures and sequences

Tuple


In [ ]:
tup = 4, 5, 6
tup

In [ ]:
nested_tup = (4, 5, 6), (7, 8)
nested_tup

In [ ]:
tuple([4, 0, 2])
tup = tuple('string')
tup

In [ ]:
tup[0]

In [ ]:
tup = tuple(['foo', [1, 2], True])
tup[2] = False

# however
tup[1].append(3)
tup

In [ ]:
(4, None, 'foo') + (6, 0) + ('bar',)

In [ ]:
('foo', 'bar') * 4

Unpacking tuples


In [ ]:
tup = (4, 5, 6)
a, b, c = tup
b

In [ ]:
tup = 4, 5, (6, 7)
a, b, (c, d) = tup
d
tmp = a a = b b = tmp
b, a = a, b
seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)] for a, b, c in seq: pass

Tuple methods


In [ ]:
a = (1, 2, 2, 2, 3, 4, 2)
a.count(2)

List


In [ ]:
a_list = [2, 3, 7, None]

tup = ('foo', 'bar', 'baz')
b_list = list(tup)
b_list
b_list[1] = 'peekaboo'
b_list

Adding and removing elements


In [ ]:
b_list.append('dwarf')
b_list

In [ ]:
b_list.insert(1, 'red')
b_list

In [ ]:
b_list.pop(2)
b_list

In [ ]:
b_list.append('foo')
b_list.remove('foo')
b_list

In [ ]:
'dwarf' in b_list

Concatenating and combining lists


In [ ]:
[4, None, 'foo'] + [7, 8, (2, 3)]

In [ ]:
x = [4, None, 'foo']
x.extend([7, 8, (2, 3)])
x

In [ ]:
everything = []
for chunk in list_of_lists:
    everything.extend(chunk)

In [ ]:
everything = []
for chunk in list_of_lists:
    everything = everything + chunk

Sorting


In [ ]:
a = [7, 2, 5, 1, 3]
a.sort()
a

In [ ]:
b = ['saw', 'small', 'He', 'foxes', 'six']
b.sort(key=len)
b

Binary search and maintaining a sorted list


In [ ]:
import bisect
c = [1, 2, 2, 2, 3, 4, 7]
bisect.bisect(c, 2)
bisect.bisect(c, 5)
bisect.insort(c, 6)
c

Slicing


In [ ]:
seq = [7, 2, 3, 7, 5, 6, 0, 1]
seq[1:5]

In [ ]:
seq[3:4] = [6, 3]
seq

In [ ]:
seq[:5]
seq[3:]

In [ ]:
seq[-4:]
seq[-6:-2]

In [ ]:
seq[::2]

In [ ]:
seq[::-1]

Built-in Sequence Functions

enumerate


In [ ]:
i = 0
for value in collection:
   # do something with value
   i += 1

In [ ]:
for i, value in enumerate(collection):
   # do something with value

In [ ]:
some_list = ['foo', 'bar', 'baz']
mapping = dict((v, i) for i, v in enumerate(some_list))
mapping

sorted


In [ ]:
sorted([7, 1, 2, 6, 0, 3, 2])
sorted('horse race')

In [ ]:
sorted(set('this is just some string'))

zip


In [ ]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zip(seq1, seq2)

In [ ]:
seq3 = [False, True]
zip(seq1, seq2, seq3)

In [ ]:
for i, (a, b) in enumerate(zip(seq1, seq2)):
    print('%d: %s, %s' % (i, a, b))

In [ ]:
pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'),
            ('Schilling', 'Curt')]
first_names, last_names = zip(*pitchers)
first_names
last_names

In [ ]:
zip(seq[0], seq[1], ..., seq[len(seq) - 1])

reversed


In [ ]:
list(reversed(range(10)))

Dict


In [ ]:
empty_dict = {}
d1 = {'a' : 'some value', 'b' : [1, 2, 3, 4]}
d1

In [ ]:
d1[7] = 'an integer'
d1
d1['b']

In [ ]:
'b' in d1

In [ ]:
d1[5] = 'some value'
d1['dummy'] = 'another value'
del d1[5]
ret = d1.pop('dummy')
ret

In [ ]:
d1.keys()
d1.values()

In [ ]:
d1.update({'b' : 'foo', 'c' : 12})
d1

Creating dicts from sequences


In [ ]:
mapping = {}
for key, value in zip(key_list, value_list):
    mapping[key] = value

In [ ]:
mapping = dict(zip(range(5), reversed(range(5))))
mapping

Default values


In [ ]:
if key in some_dict:
    value = some_dict[key]
else:
    value = default_value

In [ ]:
value = some_dict.get(key, default_value)

In [ ]:
words = ['apple', 'bat', 'bar', 'atom', 'book']
by_letter = {}

for word in words:
    letter = word[0]
    if letter not in by_letter:
        by_letter[letter] = [word]
    else:
        by_letter[letter].append(word)

by_letter
by_letter.setdefault(letter, []).append(word)

In [ ]:
from collections import defaultdict
by_letter = defaultdict(list)
for word in words:
    by_letter[word[0]].append(word)

In [ ]:
counts = defaultdict(lambda: 4)

Valid dict key types


In [ ]:
hash('string')
hash((1, 2, (2, 3)))
hash((1, 2, [2, 3])) # fails because lists are mutable

In [ ]:
d = {}
d[tuple([1, 2, 3])] = 5
d

Set


In [ ]:
set([2, 2, 2, 1, 3, 3])
{2, 2, 2, 1, 3, 3}

In [ ]:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}
a | b  # union (or)
a & b  # intersection (and)
a - b  # difference
a ^ b  # symmetric difference (xor)

In [ ]:
a_set = {1, 2, 3, 4, 5}
{1, 2, 3}.issubset(a_set)
a_set.issuperset({1, 2, 3})

In [ ]:
{1, 2, 3} == {3, 2, 1}

List, set, and dict comprehensions


In [ ]:
strings = ['a', 'as', 'bat', 'car', 'dove', 'python']
[x.upper() for x in strings if len(x) > 2]

In [ ]:
unique_lengths = {len(x) for x in strings}
unique_lengths

In [ ]:
loc_mapping = {val : index for index, val in enumerate(strings)}
loc_mapping
loc_mapping = dict((val, idx) for idx, val in enumerate(strings)}

Nested list comprehensions


In [ ]:
all_data = [['Tom', 'Billy', 'Jefferson', 'Andrew', 'Wesley', 'Steven', 'Joe'],
            ['Susie', 'Casey', 'Jill', 'Ana', 'Eva', 'Jennifer', 'Stephanie']]

In [ ]:
names_of_interest = []
for names in all_data:
    enough_es = [name for name in names if name.count('e') > 2]
    names_of_interest.extend(enough_es)

In [ ]:
result = [name for names in all_data for name in names
          if name.count('e') >= 2]
result

In [ ]:
some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
flattened = [x for tup in some_tuples for x in tup]
flattened

In [ ]:
flattened = []

for tup in some_tuples:
    for x in tup:
        flattened.append(x)

In [ ]:
In [229]: [[x for x in tup] for tup in some_tuples]

Functions


In [ ]:
def my_function(x, y, z=1.5):
    if z > 1:
        return z * (x + y)
    else:
        return z / (x + y)

In [ ]:
my_function(5, 6, z=0.7)
my_function(3.14, 7, 3.5)

Namespaces, scope, and local functions


In [ ]:
def func():
    a = []
    for i in range(5):
        a.append(i)

In [ ]:
a = []
def func():
    for i in range(5):
        a.append(i)

In [ ]:
a = None
def bind_a_variable():
    global a
    a = []
bind_a_variable()
print a

In [ ]:
def outer_function(x, y, z):
    def inner_function(a, b, c):
        pass
    pass

Returning multiple values


In [ ]:
def f():
    a = 5
    b = 6
    c = 7
    return a, b, c

a, b, c = f()

In [ ]:
return_value = f()

In [ ]:
def f():
    a = 5
    b = 6
    c = 7
    return {'a' : a, 'b' : b, 'c' : c}

Functions are objects


In [ ]:
states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda',
          'south   carolina##', 'West virginia?']

In [ ]:
import re  # Regular expression module

def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip()
        value = re.sub('[!#?]', '', value) # remove punctuation
        value = value.title()
        result.append(value)
    return result

In [ ]:
In [15]: clean_strings(states)
Out[15]:
['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South Carolina',
 'West Virginia']

In [ ]:
def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

In [ ]:
In [22]: clean_strings(states, clean_ops)
Out[22]:
['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South Carolina',
 'West Virginia']

In [ ]:
In [23]: map(remove_punctuation, states)
Out[23]:
['   Alabama ',
 'Georgia',
 'Georgia',
 'georgia',
 'FlOrIda',
 'south   carolina',
 'West virginia']

Anonymous (lambda) functions


In [ ]:
def short_function(x):
    return x * 2

equiv_anon = lambda x: x * 2

In [ ]:
def apply_to_list(some_list, f):
    return [f(x) for x in some_list]

ints = [4, 0, 1, 5, 6]
apply_to_list(ints, lambda x: x * 2)

In [ ]:
strings = ['foo', 'card', 'bar', 'aaaa', 'abab']

In [ ]:
strings.sort(key=lambda x: len(set(list(x))))
strings

Closures: functions that return functions


In [ ]:
def make_closure(a):
    def closure():
        print('I know the secret: %d' % a)
    return closure

closure = make_closure(5)

In [ ]:
def make_watcher():
    have_seen = {}

    def has_been_seen(x):
        if x in have_seen:
            return True
        else:
            have_seen[x] = True
            return False

    return has_been_seen

In [ ]:
watcher = make_watcher()
vals = [5, 6, 1, 5, 1, 6, 3, 5]
[watcher(x) for x in vals]
def make_counter(): count = [0] def counter(): # increment and return the current count count[0] += 1 return count[0] return counter counter = make_counter()

In [ ]:
def format_and_pad(template, space):
    def formatter(x):
        return (template % x).rjust(space)

    return formatter

In [ ]:
fmt = format_and_pad('%.4f', 15)
fmt(1.756)

Extended call syntax with *args, **kwargs


In [ ]:
a, b, c = args
d = kwargs.get('d', d_default_value)
e = kwargs.get('e', e_default_value)

In [ ]:
def say_hello_then_call_f(f, *args, **kwargs):
    print 'args is', args
    print 'kwargs is', kwargs
    print("Hello! Now I'm going to call %s" % f)
    return f(*args, **kwargs)

def g(x, y, z=1):
    return (x + y) / z

In [ ]:
In [8]:  say_hello_then_call_f(g, 1, 2, z=5.)
args is (1, 2)
kwargs is {'z': 5.0}
Hello! Now I'm going to call <function g at 0x2dd5cf8>
Out[8]: 0.6

Currying: partial argument application


In [ ]:
def add_numbers(x, y):
    return x + y

In [ ]:
add_five = lambda y: add_numbers(5, y)

In [ ]:
from functools import partial
add_five = partial(add_numbers, 5)

In [ ]:
# compute 60-day moving average of time series x
ma60 = lambda x: pandas.rolling_mean(x, 60)

# Take the 60-day moving average of of all time series in data
data.apply(ma60)

Generators


In [ ]:
some_dict = {'a': 1, 'b': 2, 'c': 3}
for key in some_dict:
    print key,

In [ ]:
dict_iterator = iter(some_dict)
dict_iterator

In [ ]:
list(dict_iterator)

In [ ]:
def squares(n=10):
    for i in xrange(1, n + 1):
        print 'Generating squares from 1 to %d' % (n ** 2)
        yield i ** 2

In [ ]:
In [2]: gen = squares()

In [3]: gen
Out[3]: <generator object squares at 0x34c8280>

In [ ]:
In [4]: for x in gen:
   ...:     print x,
   ...:
Generating squares from 0 to 100
1 4 9 16 25 36 49 64 81 100

In [ ]:
def make_change(amount, coins=[1, 5, 10, 25], hand=None):
    hand = [] if hand is None else hand
    if amount == 0:
        yield hand
    for coin in coins:
        # ensures we don't give too much change, and combinations are unique
        if coin > amount or (len(hand) > 0 and hand[-1] < coin):
            continue

        for result in make_change(amount - coin, coins=coins,
                                  hand=hand + [coin]):
            yield result

In [ ]:
for way in make_change(100, coins=[10, 25, 50]):
    print way
len(list(make_change(100)))

Generator expresssions


In [ ]:
gen = (x ** 2 for x in xrange(100))
gen

In [ ]:
def _make_gen():
    for x in xrange(100):
        yield x ** 2
gen = _make_gen()

In [ ]:
sum(x ** 2 for x in xrange(100))
dict((i, i **2) for i in xrange(5))

itertools module


In [ ]:
import itertools
first_letter = lambda x: x[0]

names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']

for letter, names in itertools.groupby(names, first_letter):
    print letter, list(names) # names is a generator

Files and the operating system


In [ ]:
path = 'ch13/segismundo.txt'
f = open(path)

In [ ]:
for line in f:
    pass

In [ ]:
lines = [x.rstrip() for x in open(path)]
lines

In [ ]:
with open('tmp.txt', 'w') as handle:
    handle.writelines(x for x in open(path) if len(x) > 1)

open('tmp.txt').readlines()

In [ ]:
os.remove('tmp.txt')