Table of content:
In [1]:
%matplotlib inline
Ex0: More than just code
In [2]:
L = []
print dir(L)
for elt in dir(L):
print elt, '\t', getattr(L, elt).__doc__
Ex1: nested dicts
In [3]:
from pprint import pprint
cl = 'module level'
def test_function():
def inner_test():
st2 = 'secondary_function'
cl = 'inner test'
print 'inner function locals: \n', pprint(locals())
st = 'primary function'
it = inner_test()
cl = 'test function'
print 'locals: \n', pprint(locals())
# print 'globals:', pprint(globals())
return 'test function return'
test_function()
Out[3]:
In [4]:
gl = globals()
print '__name__' in gl.keys()
print gl['__name__']
import numpy as np
print np.__name__
Ex2: First catch-22:
Tell what's wrong with the code below and correct it:
In [5]:
foo = 'bar'
def ret_foo():
return foo
def nice_foo():
foo = 'nice'
def foo_mod_function():
foo += '-bar'
if __name__ == '__main__':
print foo
print type(foo)
print foo.__class__
print ret_foo()
nice_foo()
print foo
foo_mod_function()
print foo
Ex3: Duck Typing Make all the modification necesseray for the code to run correctly
In [34]:
if __name__ == '__main__':
print type('a')
print type("a")
print type('acknowledge')
print type(u'acknowledge')
print type(3)
print type(3.1)
print type(foo2)
print '---------------'
print isinstance('a', str)
print isinstance(u'a', str)
print isinstance('a', bool)
print isinstance([], (tuple, list, set))
print issubclass(str, bool)
print '---------------'
print bool('a')
print bool('')
print bool([])
print '---------------'
print 'a' + 'b'
print 1 + 2
print 1 + 2.1
print 'a' + 1
print '---------------'
print 'Py' * 2
print 'Py'-'thon'
print [1,2,3]*2
print 1 / 2
print 1 % 2
print 1 / 2.
print 1 % 2.
assert(1 / 2 == 1 / 2.)
print 3.621 % 2.5
print 8.621 % 7.5
assert(3.621 % 2.5 == 8.621 % 7.5)
Ex4: Catching/Trhowing the exceptions
Make the function below work in accordance with specification
In [7]:
def simple_adder(var1, var2): #tester function: no need to modify
return var1 + var2
def adder(var1, var2):
"""
Adds var1 and var2.
In case their types are incompatible, both variables are converted to strings and their sum returned.
The variables are always printed before being returned
If unrelated error is raised from within variables, it will be passed along and no matter
If a TypeError was raised, it will be printed before printing the variables
"""
return var1 + var2
if __name__ == '__main__':
print adder(1, 1)
print adder(1, 3.1)
print adder('1', 'a')
print adder('a', 1)
print adder(2, 'b')
However, complexity comes at the expense of performance
In [8]:
%timeit 1 + 1
%timeit simple_adder(1, 1)
%timeit adder(1, 1)
Ex5: Mutability and dict keys Correct all the errors
In [10]:
import hashlib
from hashlib import md5
dico = {'Name': 'Zara', 'Age': 27}
sup_str = 'super'
class crazy_string(object):
def __init__(self, value):
self.value = value
def __hash__(self):
m = hashlib.md5()
m.update(self.value)
return int('0x'+m.hexdigest(),0)
def __eq__(self, other):
if type(other) == type(self):
return self.value == other.value
else:
return False
if __name__ == "__main__":
print "Value : %s" % dict.get('Age')
print "Value : %s" % dict.get('Sex', "Never")
print sup_str, '\t', hex(id(sup_str))
sup_str += '!'
print sup_str, '\t', hex(id(sup_str))
print '---------------------'
cstr = crazy_string('super')
dico[cstr] = 'sure?'
print dico
cstr.value = 'super!'
print dico
dico[cstr]
Ex6: Mutability: pointers v.s. copies
Explain what happens below and correct the erroneous lines
In [11]:
global foo
foo = ['bar']
def ret_foo():
return foo
def nice_foo():
foo = ['nice']
return foo
def real_nice_foo():
foo[0] = 'nice'
def foo_mod_function():
global foo
foo += ['bar']
def simple_function(value = 0):
print value
value += 1
return value
def first_closure(value = [0]):
print value[0]
value[0] += 1
return value[0]
if __name__ == '__main__':
print foo
print ret_foo(), hex(id(foo))
print '---------------'
same_foo = nice_foo()
print same_foo, hex(id(same_foo))
print foo, hex(id(foo))
print '---------------'
real_nice_foo()
print foo, hex(id(foo))
print '---------------'
foo_mod_function()
print '---------------'
simple_function()
simple_function()
simple_function(1)
assert(simple_function() == simple_function())
print '---------------'
first_closure()
first_closure()
first_closure([1])
first_closure([1])
first_closure()
print '---------------'
print dir(first_closure)
print first_closure.func_closure
print first_closure.func_defaults
print first_closure.__closure__
print '---------------'
assert(first_closure() == first_closure())
Ex7: Closures in Closures enclose things well
Get rid of errors and ensure that everything works as the docs say
In [18]:
def closure(variable, memory = []):
print memory
memory += [variable]
def closure_generator(initial_memory):
def inner_closure(variable, memory=[initial_memory]):
"""
Adds variavble to the memory. If types mismatch, cast everything to string
"""
print memory
memory[0] += variable
return inner_closure
if __name__ == "__main__":
print dir(closure)
print closure.func_defaults
closure(3)
closure(2)
closure('pretty')
closure('')
_closure = closure_generator(3)
_closure(1)
_closure(3)
_closure('pretty')
Ex8: Wrappers wrap well
Modify the wrapper below so that it caches calls to a function
you might need the following knowledge:
- ```*args``` give you a list of unnamed arguments passed to the function
- `**kwargs` give you a dict of named arguments passed to the function
- `kwargs.update(dict(zip(func.func_code.co_varnames, args)))` allows you to transform unnamed arguments into named ones
- tuples are only equal if their contents and order in which contents appear are identical
- you can enumerate the contents of a dict with a `name_of_dict.iteritems()` command
- you can sort in a simple maner with a `sorted(whatever you need to sort)`
- dicts only take as keys hashable items
In [50]:
# courtesy http://www.brianholdefehr.com/decorators-and-functional-python
def logging_decorator(func):
def wrapper():
wrapper.count += 1
print "The function I modify has been called {0} times(s).".format(
wrapper.count)
func()
wrapper.count = 0
return wrapper
def a_function():
print "I'm a normal function."
def print_args(*args):
print args
def print_kwargs(**kwargs):
print kwargs
if __name__ == "__main__":
modified_function = logging_decorator(a_function)
modified_function()
modified_function()
@logging_decorator
def a_function():
print "I'm a normal function."
a_function()
lst = 1, 2, 3, 4
print_args(lst)
print_args(*lst)
dct = {'a':1, 'b':2, 'c':3}
# print_kwargs(dct)
print_kwargs(**dct)
In [113]:
# Your solution here please
def caching_decorator(func):
def wrapper(*args, **kwargs):
pass
return wrapper
def a_function(arg1, arg2):
print arg1, arg2
return arg1 + arg2
if __name__ == "__main__":
modified_function = caching_decorator(a_function)
print modified_function(1, 1)
print modified_function('a', 'b')
print modified_function('a', 'b')
Ex9: runtime class modification
Explain where the erorrs come from and correct them
In [19]:
class little_class_in_the_prairie(object):
def __init__(self, payload):
self.payload = payload
@staticmethod
def present_myself(asker='John'):
return "I am a little house in the prairie and I know '%s'"% asker
@classmethod
def present_myself_1(cls, asker='John'):
return "I am a %s and I know '%s'"% (cls, asker)
def present_myself_2(self, asker='John'):
return "I am a little house in the prairie, I know '%s' and my content is '%s'"% (asker, self.payload)
def outside_function(self):
return self.payload
if __name__ == "__main__":
lcitp = little_class_in_the_prairie('inner payload')
print lcitp.payload
print '----------------'
# print little_class_in_the_prairie.payload
print little_class_in_the_prairie.present_myself()
print lcitp.present_myself()
print little_class_in_the_prairie.present_myself_1()
print lcitp.present_myself_1()
# print little_class_in_the_prairie.present_myself_2()
print lcitp.present_myself_2()
print '----------------'
print getattr(lcitp, 'payload')
print getattr(lcitp, '__init__')
print getattr(lcitp, 'present_myself_2')
print getattr(lcitp, 'present_myself_2')() # < Yep, this is totally a Pythonic currying notation
print '----------------'
print hasattr(lcitp, 'present_myself_2')
print hasattr(lcitp, 'not_my_function')
setattr(lcitp, 'not_my_function', outside_function)
print hasattr(lcitp, 'not_my_function')
print lcitp.not_my_function()
Ex10: Advanced wrappers
Modify the wrapper below so that it times the execution of the function it wraps. Make sure the wrapper preserves the name of the function and it's documentation.
In [20]:
def args_as_ints(f):
def g(*args, **kwargs):
args = [int(x) for x in args]
kwargs = dict((k, int(v)) for k, v in kwargs.items())
return f(*args, **kwargs)
return g
@args_as_ints
def funny_function(x, y, z=3):
"""Computes x*y + 2*z"""
return x*y + 2*z
Ex11: Pythonic map-reduce
In [22]:
def increment_list(lst):
newlist = []
for elt in lst:
newlist.append(elt+1)
return newlist
def plus_one(integer):
return integer+1
def sum_integers(integer1, integer2):
return integer1+integer2
if __name__ == '__main__':
test_lst = [1,1,1,1]
print increment_list(test_lst)
print map(plus_one, test_lst)
print '-----------------'
%timeit increment_list(test_lst)
%timeit map(plus_one, test_lst)
print '-----------------'
print sum(test_lst)
print reduce(sum_integers, test_lst)
Ex12: Iterators, itertools and list comprehensions
For the first two blocks of code, explain what each line does, correct the eventual errors. For the third block of code, read the documentation for all the imported module and illustrate it with an example
In [23]:
from time import sleep
def count_to_3():
print "begin"
for i in range(3):
print "before yield", i
yield i
print "after yield", i
print "end"
if __name__ == "__main__":
counter = count_to_3()
print '--------------------'
counter.next()
sleep(1)
counter.next()
sleep(1)
counter.next()
sleep(1)
counter.next()
print '--------------------'
counter = count_to_3()
print '--------------------'
for i in counter:
print i
In [26]:
def increment_list(lst):
newlist = []
for elt in lst:
newlist.append(elt+1)
return newlist
def plus_one(integer):
return integer+1
def sum_integers(integer1, integer2):
return integer1+integer2
if __name__ == '__main__':
test_lst = [1,1,1,1]
print increment_list(test_lst)
print [elt+1 for elt in test_lst]
print map(plus_one, test_lst)
print '-----------------'
%timeit increment_list(test_lst)
%timeit [elt+1 for elt in test_lst]
%timeit map(plus_one, test_lst)
print '-----------------'
print sum(test_lst)
print reduce(sum_integers, test_lst)
print '-----------------'
test_generator = (elt for elt in test_lst)
print test_generator
for i in test_generator:
print i
print '-----------------'
dico = dict( (_i, elt) for _i, elt in enumerate(test_lst))
print dico
dico = dict( (_i, elt) for _i, elt in enumerate(test_generator))
print dico
In [27]:
def layer_1(iterator):
""" add one """
for elt in iterator:
yield elt + 1
def layer_2(iterator):
""" convert ot string """
for elt in iterator:
yield str(elt)
def layer_3(iterator):
""" add super-prefix """
for elt in iterator:
yield 'super' + elt
tst_lst = [1, 2, 3, 4, 5]
if __name__ == "__main__":
for elt in layer_3(layer_2(layer_1(tst_lst))):
print elt
print '----------------------------------'
for elt in reduce((lambda x, y: y(x)), [layer_1, layer_2, layer_3], tst_lst): # in functional languages, we are doing a fold
print elt
In [28]:
from itertools import cycle, chain, compress, ifilter, imap, izip, izip_longest, product, permutations, combination
Ex13: Lambdas
Sort the following dicts using a lambda function. Here are some informations you might need:
- 'sorted()' function will perform the sorting of a list for you
- 'key' function allows you to select what element will be selected for the comparison if value is a lists of elements
- 'reverse' allows to invert the sorting order
- '{}.iteritems()' will create an iterator of (key, value) pairs from a dictionary
In [24]:
dct = {'a':(1, 1), 'b':(1, 2), 'c':(2, 3)}
Ex14: Functional Python
use a lambda function to count the number of elements in the following lists and then to count the number of elements with length over 4 in the strings below:
In [29]:
# map-imap-ifilter-reduce
from itertools import ifilter, imap, count
from collections import defaultdict, Counter
a = [1, 2, 3]
b = [4, 5, 6, 7]
c = [8, 9, 1, 2, 3]
s1 = 'fs a prul'
s2 = 'prul a fs tke i dama'
s3 = 'dama ka a i prul'
# Solution:
Ex15: collections Implement each task in the simplest manner you could think of, and then using the collections. Compare the speed of implementations.
In [30]:
from collections import namedtuple, deque, Counter, OrderedDict, defaultdict
dct = {'a':1,'b':2,'c':3}
master_dct = {}
st = [1,2,3,4,5,6,1,3,7,4,1,6,7]
master_lst = []
def naive_sort_dict():
pass
def naive_dict_stabilization():
pass
def naive_counter():
pass
def naive_dict_ordering():
pass
if __name__ == "__main__":
pass
Ex16: Basic threading
In [ ]:
from multiprocessing import Pool
# WARNING: threads and multiprocessing don't work with REPL but require parsed files.
# ipython is ok
# Map, imap,
# partition
# reduce as chain of map responses to which a reduce function is applied
def f(x):
return x*x
def f2(x1, x2):
return "%s; %s", (x1, x2)
if __name__ == '__main__':
p = Pool(5)
print(p.map(f, [1, 2, 3]))
for elt in (p.imap(f, ['a', 'b', 'c'], ['b','c','d'])):
print elt
p.terminate()
Ex 17: using RAM profiler
1) Download and install python memory profiler:
pip install -U memory_profiler
2) Execute a memory profiler on the following function (NB: you'll need to fill it in locall):
3) Wrap your function with a logging wrapper from a previous exercice are re-run the profiler.
What do you observe?
In [31]:
# you will need to copy this code you your local machine and run it there
from memory_profiler import profile
@profile
def my_func():
a = [1] * (10 ** 6)
b = [2] * (2 * 10 ** 7)
del b
return a
if __name__ == '__main__':
my_func()
Ex 18: Basic numpy arrays and their variants
1) Python matrix and python array are not the same. For the sake of simplicity, everyone is usung np.array and not np.matrix/
2) array creation routines : random data, linspace, mesh, diag, zeros
3) .dtype and astype(np.float)
4) show np.save, np.load
5) .shape, not dim(); [:, 1]; newaxis; boolean indexing; stepping [::2], start/stop: [2:], [:-1]
6) Linear algebra basics: `*`, .T/.H, dot(a,b), .I (achtung: raises exception, generally better to use explicit solvers)
7) reshape, pad, newaxis, repeat, concatenate
8) copy v.s. deepcopy
9) function vectorization
10) boolean element logical tests
11) scipy.sparse.lilmatrix
12) show how embedded list manipulation become easier in numpy
In [2]:
import numpy as np
def f1(lst_arg):
collection_list = []
for sublist in lst_arg:
collection_list.append([])
for element in sublist:
collection_list[-1].append(element**2)
return collection_list
def f2(lst_args):
return np.array(lst_args)**2
if __name__ == "__main__":
lst = [[i]*10 for i in range (0, 10)]
# print lst
# print f1(lst)
# print f2(lst)
%timeit f1(lst)
%timeit f2(lst)
In [37]:
from itertools import combinations_with_replacement
arr = np.zeros((4, 4, 2))
for i, j in combinations_with_replacement(range(0,4), 2):
arr[i, j] = (i, j)
arr[j, i] = (j, i)
print arr.shape
lines =arr[:, :, 0]
columns = arr[:, :, 1]
print lines
print columns
print lines[1, :]
print columns[:, 1]
lines[::2,:]
In [ ]:
print lines>1
print columns>1
print np.logical_or(lines>1, columns>1)
print np.logical_and(lines>1, columns>1)
print np.logical_xor(lines>1, columns>1)
print lines[lines>1]
new_arr = np.zeros((4,4))
new_arr[lines>1] = lines[lines>1]
print new_arr
In [40]:
A = np.zeros((2, 2))
B = A
B[0, 0] = 1
print A
print A.dtype
A = np.zeros((2, 2))
A = A.astype(np.str)
B = A.copy()
B[0, 0] = '1.0'
print A
print A.dtype
In [52]:
print np.pad(lines,((1,2),(3,4)), 'edge')
print np.reshape(arr, (2,4,4))
print lines[np.newaxis,:, :]
print np.repeat(lines, 2, axis=1)
print np.concatenate((lines, columns), axis=1)
print n_arr = np.rollaxis(arr, 2)
print n_arr.shape
print arr.shape
Out[52]:
In [66]:
print lines*columns
print lines
lines *= 10
print lines
lines /= 10
print lines
hmat = np.dot(lines, columns)
print hmat
print np.linalg.eigh(hmat)
In [18]:
print np.random.rand(2,2)
print np.zeros((2,2))
print np.ones((2,2))
print np.linspace(0, 3, 4).reshape((2,2))
print np.diag(np.array([1,2]))
Ex 19: Using numpy arrays indexing
1) In the following exercice, create a function without "for" or "while" loops that takes in a numpy matrix and index list and returns matrix sorted by the indexes provided, so that the matrix is first sorted on the column a), then column b), then column c), etc..., a-.. being indexes provided.
2) Write a unit-test that
3) Advanced, thus optional:
In [13]:
import numpy as np
# select filtering indexes
filter_indexes = [1, 3]
# generate the test data
raw_data = np.random.randint(0, 4, size=(50,5))
# create a column that we would use for indexing
index_columns = raw_data[:, filter_indexes]
# sort the index columns by lexigraphic order over all the indexing columns
argsorts = np.lexsort(index_columns.T)
# sort both the index and the data column
sorted_index = index_columns[argsorts, :]
sorted_data = raw_data[argsorts, :]
# in each indexing column, find if number in row and row-1 are identical
# then group to check if all numbers in corresponding positions in row and row-1 are identical
autocorrelation = np.all(sorted_index[1:, :] == sorted_index[:-1, :], axis=1)
# find out the breakpoints: these are the positions where row and row-1 are not identical
breakpoints = np.nonzero(np.logical_not(autocorrelation))[0]+1
# finally find the desired subsets
subsets = np.split(sorted_data, breakpoints)
Ex 20: fitting a function
In [19]:
from scipy import stats
X = stats.poisson(3.5)
Y = stats.norm()
t_statistic, p_value = stats.ttest_ind(X.rvs(size=1000), X.rvs(size=1000))
print "t-statistic =", t_statistic
print "p-value =", p_value
In [31]:
from scipy.optimize import curve_fit
from matplotlib import pyplot as plt
def func(x, a, b, c):
return a * np.exp(-b * x) + c
xdata = np.linspace(0, 4, 50)
y = func(xdata, 2.5, 1.3, 0.5)
ydata = y + 0.2 * np.random.normal(size=len(xdata))
popt, pcov = curve_fit(func, xdata, ydata)
print popt
print pcov
plt.plot(xdata, ydata)
plt.plot(xdata, func(xdata, *popt))
plt.show()
In [22]:
from scipy import odr
def lin(p, x):
a, b, c, d = p
return c*x
def pol2(p, x):
a, b, c, d = p
return b*x**2 + c*x + d
def pol3(p, x):
a, b, c, d = p
return a*x**3 + b**x**2 + c*x + d
def regress(x, y, x_sd, y_sd, function_to_fit=pol3, name_to_plot='', figure_no=1):
def plot_result():
x_fit = np.linspace(np.min(x)*0.95, np.max(x)*1.05, 1000)
y_fit = function_to_fit(out.beta, x_fit)
lin_fit = lin(lin_out.beta, x_fit)
plt.subplot(2, 2, figure_no)
plt.title(name_to_plot+': \n %.2fx^3 + %.2fx^2 + %.2fx + %.2f v.s. %.2fx. \n Res var gain: x %.2f' % tuple(out.beta.tolist()+[lin_out.beta.tolist()[2]]+[lin_out.res_var/out.res_var]))
plt.errorbar(x, y, xerr=x_sd, yerr=y_sd, linestyle='None', marker='x')
plt.plot(x_fit, y_fit, 'g')
plt.plot(x_fit, lin_fit, 'r')
plt.autoscale(tight=True)
model = odr.Model(function_to_fit)
data = odr.RealData(x, y, sx=x_sd, sy=y_sd)
_odr = odr.ODR(data, model, beta0=[1., 1., 10., 0.01])
out = _odr.run()
lin_model = odr.Model(lin)
lin_odr = odr.ODR(data, lin_model, beta0=[0., 0., 10., 0.01])
lin_out = lin_odr.run()
lin_out.pprint()
plot_result()
return out.beta
Ex 21: Matplotlib and pyplot:
In [53]:
from matplotlib import pyplot as plt
years = np.linspace(1800, 2010, 210)
temp_data = np.random.rand(210)
fig, ax = plt.subplots(figsize=(14,4))
ax.plot(years, temp_data)
ax.axis('tight')
ax.set_title('tempeatures in Stockholm')
ax.set_xlabel('year')
ax.set_ylabel('temperature (C)');
plt.show()
# axes sharing:
f, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, sharey=True)
ax1.plot(years, temp_data)
ax1.set_title('Sharing both axes')
ax2.plot(years, temp_data, 'g.')
ax3.scatter(years, 2 * temp_data ** 2 - 1, color='r')
f.subplots_adjust(hspace=0)
plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False)
plt.show()
r_array = np.random.rand(100, 100)
plt.imshow(r_array, interpolation='nearest', cmap='gray')
plt.colorbar()
plt.show()
Additional:
Show that each module gives an access to a set of elements that are available as a dict mapping to different elements
In [5]:
import math
print(dir(math))
Show that there are actually closures inside the functions we are working on
In [21]:
def f1(arg1=None):
return None
def f_outer():
def f_inner(params):
return a+params
a = 1
return f_inner
if __name__ == "__main__":
print dir(f1)
print f1.__class__
print f1.__defaults__
print dir(f_outer)
print f_outer()
print dir(f_outer())
print f_outer().__closure__
/
operator means integer division on integers and float division on floats in python 2, but in python 3 the division is always float.
*
operator will multiply any number-likes, but will repeat a string or a list
In [2]:
print 1/2
print 1./2
In [3]:
from __future__ import division
print 1/2
In [5]:
print 'abc'*3
print [1, 2, 3]*3
"%s, %f" formatting allows to easily format strings you would like to pring
In [8]:
print "%s is ok, but: \t %s is better off as %.2f" %(3., 1/3, 1/3)
Using docstrings properly is absolutely essential for writing good python code. Not only it will help the tomorrow-you to figure out what you've just did, combined with Sphinx it makes documentation creation for your code really easy.
In [11]:
def func1(s):
"""
Print a string 's' and tell how many characters it has
"""
print(s + " has " + str(len(s)) + " characters")
if __name__ == "__main__":
print func1.__doc__
print help(func1)