In [4]:
from sklearn import datasets
boston = datasets.load_boston()
In [5]:
print boston.DESCR
In [13]:
# create dictionary with values that are empty lists
prices = {
'expensive' : [],
'affordable' : []
}
# the price of a house will be appended to the corresponding list (expensive or affordable)
for value in boston.target:
if value > 20:
prices['expensive'].append(value)
else:
prices['affordable'].append(value)
from pprint import pprint as pp
pp(prices)
In [14]:
# how many affordable and expensive houses are there?
print "Expensive:", len(prices['expensive'])
print "Affordable:", len(prices['affordable'])
In [19]:
# What proportion of expensive houses are more than $40K? List comprehension is good for operating on a list
from __future__ import division # what's another way to do this without importing?
len([v for v in prices['expensive'] if v > 40])/len(prices['expensive'])
Out[19]:
In [35]:
# whats the average price of an affordable house?
avg = sum(i for i in prices['affordable'])/len(prices['affordable'])
print avg
print "Average price: ${:,.0f}".format(avg*1000) # make it pretty
Functions behave like any other object, such as an int
or a list
In [231]:
def square(x):
"""Square of x""" # what are these """ """ called? What are they for?
return x**2
def cube(x):
"""Cube of x."""
return x**3
def root(x):
"""Square root of x."""
return x**.5
In [233]:
# create a dictionary of functions
funcs = {
'square': square,
'cube': cube,
'root': root,
}
In [234]:
x = 2
print square(x)
print cube(x)
print root(x)
In [235]:
# print function name and output, sorted by function name
for key, val in sorted(funcs.items()):
print key, val(x)
In [245]:
def derivative(x, f, h = 0.01): # using default value for h
""" Calculate the derivative of any continuous, differentiable function """
return (f(x+h) - f(x))/h
$$ f'(x) = 6x + 5$$
$$ f'(2) = 6\times2 + 5 = 17$$
In [138]:
def some_func(x):
return 3*x**2 + 5*x + 3
In [251]:
derivative(2, some_func, h = 0.01) # passing in function f. What happens if I include a value for h?
Out[251]:
In [257]:
# this is how you might performing timing outside of the Jupyter notebook
# Lots of documentation online about time decorator
import time
def sum_squares(n):
""" Sum of the squares from 0 to n-1 """
return sum([x**2 for x in range(n)])
def timer(f,n):
""" Time how long it takes to evaluate a function """
start = time.clock()
result = f(n)
elapsed = time.clock() - start
return result, elapsed
In [258]:
n = 10000000
timer(sum_squares, n)
Out[258]:
In [259]:
%timeit sum_squares(n)
In [167]:
# The map function applies a function to each member of a collection
# map(some_function, some_sequence)
map(square, range(10))
Out[167]:
In [176]:
# The filter function applies a predicate to each member of a collection,
# retaining only those members where the predicate is True
def is_even(x):
return x % 2 == 0
filter(is_even, range(10))
Out[176]:
In [177]:
# It is common to combine map and filter
map(square, filter(is_even, range(10)))
Out[177]:
In [181]:
# The reduce function reduces a collection using a binary operator to combine sequential items, two at a time
def add_them(x, y):
return x + y
# another implementation of the sum function - like a running total
reduce(add_them, [1,2,3,4,5])
Out[181]:
In [182]:
def custom_sum(xs, func):
"""Returns the sum of xs after applying func to xs."""
return sum(map(func, xs))
xs = range(10)
print custom_sum(xs, square)
print custom_sum(xs, cube)
print custom_sum(xs, root)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2) Using reduce
and map
, write a python program to find the largest element in the list of integers, floats or strings (that are numbers).
For example: [2, '3', 4.0, 2, -1, '10', 9, -4.3, 8, 7, 11, 3]
. Should return 11
.
Hint:
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
If you find it hard to understand what a lambda function is doing, it should probably be rewritten as a regular function.
In [185]:
# Using standard functions
n = 10
def square(x):
return x*x
square(n)
Out[185]:
In [186]:
map(square, range(n))
Out[186]:
In [187]:
# Using lambda function
sqr = lambda x: x*x
sqr(n)
Out[187]:
In [265]:
n=10
map(sqr, range(n))
Out[265]:
In [266]:
# what does this function do?
s1 = reduce(lambda x, y: x+y, map(lambda x: x**2, range(1,10)))
print(s1)
In [267]:
# functional expressions and lambdas are cool
# but can be difficult to read when over-used
# Here is a more comprehensible version
s2 = sum(x**2 for x in range(1, 10))
print(s2)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
In [21]:
ans = map(lambda x: x*x, filter(lambda x: x%2 == 0, range(10)))
print ans
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Recursive functions generally have:
Factorial:
$$ n! = n\times(n-1)\times(n-2)\times...\times2\times1$$For example, $$4! = 4\times3\times2\times1 = 24 $$
In [268]:
def fact(n):
"""Returns the factorial of n"""
# base case
if n==0:
return 1
# recursive case
else:
return n * fact(n-1)
In [269]:
[(n,fact(n)) for n in range(1,10)]
Out[269]:
Fibonacci sequence:
$$F_n = F_{n-1} + F_{n-2},\!\,$$Output is:
$$1, 1, 2, 3, 5, 8, 13, 21, ...$$
In [193]:
def fib1(n):
"""Fibonacci with recursion"""
# base case
if n==0 or n==1:
return 1
# recursive caae
else:
return fib1(n-1) + fib1(n-2)
In [270]:
[(i,fib1(i)) for i in range(20)]
Out[270]:
In [272]:
# In Python, a more efficient version that does not use recursion is
def fib2(n):
"""Fibonacci without recursion"""
a, b = 0, 1
for i in range(1, n+1):
a, b = b, a + b # notice the variable setting is on one line
return b
In [273]:
[(i,fib2(i)) for i in range(20)]
Out[273]:
In [274]:
# Note that the recursive version is much slower than the non-recursive version
%time fib1(30)
%time fib2(30)
Out[274]:
This is because it makes many duplicate function calls. For example:
We can get around that by caching results, but we won't cover that here.
In [275]:
def quick_sort(xs):
""" Classic quick sort """
# base case
if xs == []:
return xs
# recursive case
else:
pivot = xs[0] # choose starting pivot to be on the left
less_than = [x for x in xs[1:] if x <= pivot]
more_than = [x for x in xs[1:] if x > pivot]
return quick_sort(less_than) + [pivot] + quick_sort(more_than)
In [276]:
xs = [11,3,1,4,1,5,9,2,6,5,3,5,9,0,10,4,3,7,4,5,8,-1]
print quick_sort(xs)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Euclid's algorithm for finding the greatest common divisor of two numbers is
gcd(a, 0) = a
gcd(a, b) = gcd(b, a modulo b)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
In [215]:
# Iterators can be created from sequences with the built-in function iter()
xs = [1,2,3]
x_iter = iter(xs)
print x_iter.next() # python "remembers" where the pointer is
print x_iter.next()
print x_iter.next()
print x_iter.next()
In [216]:
# Most commonly, iterators are used (automatically) within a for loop
# which terminates when it encouters a StopIteration exception
x_iter = iter(xs)
for x in x_iter:
print x
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Starting with range(1, 20)
, make a list of the squares of each odd number in the following ways
map
and filter
. lambda
(twice!). But you don't have to!map
and/or filter
, you need to apply a function to each element of a list.for
loopThe answer should be [1, 9, 25, 49, 81, 121, 169, 225, 289, 361]
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Q1. Rewrite the factorial function so that it does not use recursion. Hint: consider using reduce
since you're multiplying consecutive numbers.
Here is the original code:
def fact(n):
"""Returns the factorial of n."""
# base case
if n==0:
return 1
# recursive case
else:
return n * fact(n-1)
for i in range(1,11):
print fact(i),
Q2. Write a function, normalize(x)
that takes in a vector x
and outputs a normalized vector x_norm
in the following way:
where,
$$ \mu = \frac{1}{n}\sum_{i=1}^{n}x_{i} $$$$ \sigma^2 = \frac{1}{n}\sum_{i=1}^{n}(x_{i} - \mu)^2 $$Each $X_i$ is a single data point from the input list. For example, an input list x = [1,2,3,4]
should output x_norm = [-1.3416407865,-0.4472135955,0.4472135955,1.3416407865]
.
Note that the sum of the new list should be 0 and the standard deviation should be 1.0 - this is why it's called normalizing. It's also called standardizing
Q3. Matrix Multiplication and list comprehension
Rewrite the matrix multiplication code to use list comprehension instead of nested for loops.
Q4.
Write a program to merge two dictionaries together. Assume the dictionaries have the same keys. For example:
a = {
"key1" : 1,
"key3" : "snafu",
"key2" : 5,
"key5" : 7,
"key4" : 0,
}
b = {
"key2" : 6,
"key1" : 8,
"key4" : "bar",
"key3" : 9,
"key5" : "foo"
}
becomes
c = {
'key1': (1, 8),
'key2': (5, 6),
'key3': ('snafu', 9),
'key4': (0, 'bar'),
'key5': (7, 'foo')
}
Hint: See the collections
module for a helper function.
Q5. Pascal's triangle
1
1 1
1 2 1
1 3 3 1
1 4 6 4 1
1 5 10 10 5 1
...
Do this recursively.
Q1. The four adjacent digits in the 1000-digit number that have the greatest product are 9 × 9 × 8 × 9 = 5832.
73167176531330624919225119674426574742355349194934
96983520312774506326239578318016984801869478851843
85861560789112949495459501737958331952853208805511
12540698747158523863050715693290963295227443043557
66896648950445244523161731856403098711121722383113
62229893423380308135336276614282806444486645238749
30358907296290491560440772390713810515859307960866
70172427121883998797908792274921901699720888093776
65727333001053367881220235421809751254540594752243
52584907711670556013604839586446706324415722155397
53697817977846174064955149290862569321978468622482
83972241375657056057490261407972968652414535100474
82166370484403199890008895243450658541227588666881
16427171479924442928230863465674813919123162824586
17866458359124566529476545682848912883142607690042
24219022671055626321111109370544217506941658960408
07198403850962455444362981230987879927244284909188
84580156166097919133875499200524063689912560717606
05886116467109405077541002256983155200055935729725
71636269561882670428252483600823257530420752963450
Write a program to find the thirteen adjacent digits in the 1000-digit number that have the greatest product. What is the value of this product? (Euler problem #8)
The answer shoud be 23514624000.
Q2.
A string of consecutive successes is known as a success run. Write a function that returns the counts for runs of length $k$ for each $k$ observed in a dictionary. Hint: check out the itertools
library.
For example: if the trials were [0, 1, 0, 1, 1, 0, 0, 0, 0, 1], the function should return
{1: 2, 2: 1})