Timing and profiling: timeit
and line_profiler
In [1]:
%%file timing.py
"""
some simple things to time
"""
import time
def _list_comprehension(N):
return [x*x for x in xrange(N)]
def _for_append(N):
L = []
for x in xrange(N):
L.append(x*x)
return L
def _for_setitem(N):
L = [None]*N
i = 0
for x in xrange(N):
L[i] = x*x
i += 1
return L
def timed(f):
def dec(*args, **kwds):
start = time.time()
res = f(*args, **kwds)
dec.__time__[f.__name__] = time.time() - start
return res
def get_time():
return dec.__time__.values()[0]
dec.__time__ = {}
dec.timed = get_time
return dec
def compare(f1, f2, N, M=1000):
t1 = 0; t2 = 0
for i in xrange(M):
f1(N)
t1 += f1.timed()
for i in xrange(M):
f2(N)
t2 += f2.timed()
print "ratio: %s" % (t1/t2)
if __name__ == '__main__':
N = 10000
print("size = %s" % N)
start = time.time()
_list_comprehension(N)
end = time.time() - start
print("%s: list comp" % end)
start = time.time()
_for_append(N)
end = time.time() - start
print("%s: for append" % end)
start = time.time()
_for_setitem(N)
end = time.time() - start
print("%s: for setitem" % end)
# EOF
In [2]:
!python2.7 timing.py
In [3]:
import timing
@timing.timed
def sum_squared(x):
return sum(i*i for i in x)
print "result: %s" % sum_squared(xrange(50))
print "time: %s" % sum_squared.timed()
In [4]:
def sum_squared(x):
return sum(i*i for i in x)
%timeit sum_squared(xrange(50))
Profiling your code
In [5]:
%%file profiling.py
"""some simple things to profile
"""
GLOBAL = 1
def _repeat(counter):
"""Using the GLOBAL value directly.
"""
for count in xrange(counter):
GLOBAL
def _repeat_local(counter):
"""Making GLOBAL a local variable.
"""
local = GLOBAL
for count in xrange(counter):
local
def _repeat_2(counter):
"""Using the built-in `True` in a loop.
"""
for count in xrange(counter):
True
def _repeat_local_2(counter):
"""Making `True` a local variable.
"""
true = True
for count in xrange(counter):
true
def _test(counter):
"""Call all functions.
"""
_repeat(counter)
_repeat_local(counter)
_repeat_2(counter)
_repeat_local_2(counter)
def profile(code_string):
"""Check the run times.
"""
import cProfile
profiler = cProfile.Profile()
profiler.run(code_string)
profiler.print_stats()
if __name__ == '__main__':
profile('_test(int(1e8))')
# EOF
In [19]:
!python profiling.py
In [6]:
%%file http_search.py
"""http pattern search
"""
import re
PATTERN = r"https?:\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?"
@profile
def scan_for_http(f):
addresses = []
for line in f:
result = re.search(PATTERN, line)
if result:
addresses.append(result.group(0))
return addresses
if __name__ == "__main__":
import sys
f = open(sys.argv[1], 'r')
addresses = scan_for_http(f)
for address in addresses:
pass
#print(address)
# EOF
In [37]:
%%bash
kernprof -lv http_search.py sample.html
In [7]:
%%file http_search.py
"""http pattern search
"""
import re
PATTERN = r"https?:\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?"
@profile
def scan_for_http(f):
addresses = []
pat = re.compile(PATTERN) # <-- NOTE
for line in f:
result = pat.search(line)
if result:
addresses.append(result.group(0))
return addresses
if __name__ == "__main__":
import sys
f = open(sys.argv[1], 'r')
addresses = scan_for_http(f)
for address in addresses:
pass
#print(address)
# EOF
In [39]:
%%bash
kernprof -lv http_search.py sample.html
Not covered: memory profiling. See guppy
and pympler
Efficiency in language patterns
In [15]:
import timing
@timing.timed
def use_ON(iterable):
result = []
for item in iterable:
result.insert(0, item)
return result
@timing.timed
def use_O1(iterable):
result = []
for item in iterable:
result.append(item)
result.reverse()
return result
@timing.timed
def use_list(iterable):
result = list(iterable)
result.reverse()
return result
def compare_ON_O1(N):
r1 = use_ON(range(N))
r2 = use_O1(range(N))
print use_ON.timed() / use_O1.timed()
def compare_O1_list(N):
r1 = use_list(range(N))
r2 = use_O1(range(N))
print use_list.timed() / use_O1.timed()
for i in [100,1000,10000]:
print "for %s, ON:O1 =" % i,
compare_ON_O1(i)
print "for %s, O1:list =" % i,
compare_O1_list(i)
In [16]:
import timing
@timing.timed
def double_extend(N):
x = range(N)
x.extend(x)
return x
@timing.timed
def double_concatenate(N):
x = range(N)
return x+x
for i in [100,1000,10000]:
print "N=%s" % i,
timing.compare(double_extend, double_concatenate, N=i)
In [17]:
import timing
@timing.timed
def search_list(N):
x = list(xrange(N))
return N in x
@timing.timed
def search_set(N):
x = set(xrange(N))
return N in x
for j in [10,100,1000]:
for i in [1000,10000,100000]:
print "M=%s, N=%s" % (j, i),
timing.compare(search_list, search_set, N=i, M=j)
In [17]:
N = 10000
x = set(xrange(N))
%timeit N in x
x = list(xrange(N))
%timeit N in x
EXERCISE: Write test code that calculates the sum of all squares of the numbers from zero to one million. Use a for loop that directly adds with +=
, a list comprehension, and also generator comprehension. Try it with range
and xrange
. Use different numbers, e.g. smaller and larger than one million. How do they compare?
In [24]:
%%file looping.py
"""test some looping constructs
"""
def generator(N):
return sum(i*i for i in xrange(N))
def list_comp(N):
return sum([i*i for i in xrange(N)])
def for_loop(N):
sum = 0
for i in xrange(N):
sum += i*i
return sum
for N in [100,1000,10000,1000000]:
print "N = %s" % N
%timeit generator(N)
%timeit list_comp(N)
%timeit for_loop(N)
In [23]:
# %load looping.py
"""test some looping constructs
"""
def generator(N):
return sum(i*i for i in xrange(N))
def list_comp(N):
return sum([i*i for i in xrange(N)])
def for_loop(N):
sum = 0
for i in xrange(N):
sum += i*i
return sum
for N in [100,1000,10000,1000000]:
print "N = %s" % N
%timeit generator(N)
%timeit list_comp(N)
%timeit for_loop(N)
Vector programming: numpy
numpy
basics
In [39]:
import numpy as np
a = np.array([1,2,3,4])
b = np.array([5,6,7,8])
print a+b
print a*b
print a**2 - 2*b + 1
In [44]:
print np.sin(a)
print np.max(a)
In [49]:
print np.hstack((a,b))
print np.add(a,b)
print np.add.accumulate(a)
In [54]:
c = np.arange(0,8,2)
print c
d = np.empty(c.shape, dtype=int)
for i,j in enumerate(c):
d[i] = j**2
print d[:i+1]
In [62]:
e = np.vstack((a,b))
print e
In [64]:
print e.shape
e.shape = (-1,8)
print e
c = e.reshape((4,2))
print c
In [66]:
d = c.T
print d
b = d.flatten()
print b
In [67]:
c[0,0] = -1
b[-1] = 10
print d
slice
versus where
In [71]:
d[0,:2] = [11,13]
c[2:,0] = [15,17]
print d
In [72]:
b = d[0,::2]
print b
np.add(b,-10,b)
print b
print d
In [77]:
x = np.arange(1e8)
%timeit x.T
%timeit y = x[1::2]
%timeit np.add(c,d.T)
In [113]:
x = np.linspace(0,16,8, dtype=int)
print x
print np.where(x >= 10)
print x[np.where(x >= 10)]
print x[x >= 10]
In [114]:
x[x % 2 != 0] = x[x % 2 != 0] - 1
print x
In [115]:
def _sinc(x):
if x == 0.0:
return 1.0
return np.sin(x)/x
sinc = np.vectorize(_sinc) # could use as a decorator
print sinc(d)
In [116]:
%timeit map(_sinc, x)
%timeit sinc(x)
%timeit np.sinc(x)
EXERCISE: Profile the functions in roll.py
, strategy.py
, and trials.py
. Where are the hot spots? Can you make any significant improvements? Try converting to vectorized versions of the code in those three files where it seems appropriate. Can you make any gains? (Don't touch the code in optimize.py
for now.) Note that some functions don't vectorize well, so make sure to retain the original verions of the code -- especially since some other techniques for speeding up code may prefer non-vector versions.
See: 'solution'
Programming efficency: testing and error handling
In [118]:
x = range(11)
y = range(-5,6)
def add(x,y):
return x+y
def squared(x):
return x*x
print [squared(i) for i in (add(j,k) for j,k in zip(x,y))]
print map(squared, map(add, x, y))
%timeit [squared(i) for i in (add(j,k) for j,k in zip(x,y))]
%timeit map(squared, map(add, x, y))
In [119]:
from multiprocessing.dummy import Pool
tmap = Pool().map
%timeit map(squared, range(10))
%timeit tmap(squared, range(10))
In [120]:
def sleepy_squared(x):
import time
time.sleep(.1)
return x*x
%timeit map(sleepy_squared, range(10))
%timeit tmap(sleepy_squared, range(10))
nose
and saving your validation code
In [121]:
%%file test_squared_map.py
"""sanity check for our parallel maps
"""
from multiprocessing.dummy import Pool
tmap = Pool().map
x = range(11)
def squared(x):
return x*x
def test_squared():
assert map(squared, x) == tmap(squared, x)
# EOF
In [122]:
!nosetests test_squared_map.py
In [127]:
def bad(x):
import sleep
sleep(x)
return x
print map(bad, range(2))
In [128]:
def less_bad(x):
try:
import sleep
except ImportError:
return None
sleep(x)
return x
map(less_bad, range(2))
Out[128]:
pyflakes
and pep8
In [126]:
%%bash
pyflakes-2.7 test_squared_map.py
pep8-2.7 test_squared_map.py
In [135]:
%%file state.py
"""some good state utilities
"""
def check_pickle(x):
"checks the pickle across a subprocess"
import pickle
import subprocess
fail = True
try:
_x = pickle.dumps(x)
fail = False
finally:
if fail:
print "DUMP FAILED"
msg = "python -c import pickle; print pickle.loads(%s)" % repr(_x)
print "SUCCESS" if not subprocess.call(msg.split(None,2)) else "LOAD FAILED"
# EOF
In [136]:
import pickle
print pickle.dumps([1,2,3])
In [137]:
print pickle.dumps(squared)
In [138]:
import state
state.check_pickle([1,2,3])
In [139]:
state.check_pickle(squared)
In [140]:
%%file sleepy.py
'''test file for pickling
'''
def sleepy_squared(x):
import time
time.sleep(.1)
return x*x
In [141]:
import sleepy
state.check_pickle(sleepy.sleepy_squared)
Let's look at going parallel with multiprocessing...