Homework 1

The goal of this homework is to ensure you have a decent understanding of Python AND know how to read and interpret documentation. Please make heavy use of Google, StackOverflow, the Python 3 Documentation, and of course the help function.

Each problem has an associated test via the assert statement that will tell you if you implemented it properly.

Feel free to use any previously functions for future problems (i.e. feel free to use a square function when implementing square_and_add_one)

Run this First

This line import several useful modules into memory for later use.


In [1]:
import os, sys, csv, json, math, random, collections, time, itertools, functools

This line create a CSV (comma seperated file) called hw1data.csv in the current working directory.


In [2]:
%%file hw1data.csv
id,sex,weight
1,M,190
2,F,120
3,F,110
4,M,150
5,O,120
6,M,120
7,F,140


Overwriting hw1data.csv

Basic


In [16]:
def double(x):
    """
    double the value x
    """
    return x * 2

assert double(10) == 20

In [18]:
def apply_to_100(f):
    """
    runs some abitrary function f on the value 100 and returns the output
    """
    return f(100)

assert(apply_to_100(double) == 200)

In [19]:
"""
create a an anonymous function using lambda that takes some value x and adds 1 to x
"""
add_one = lambda x: x + 1

assert apply_to_100(add_one) == 101

In [29]:
def get_up_to_first_three_elements(l):
    """
    get up to the first three elements in list l
    """
    return l[:3]

assert get_up_to_first_three_elements([1,2,3,4]) == [1,2,3]
assert get_up_to_first_three_elements([1,2]) == [1,2]
assert get_up_to_first_three_elements([1]) == [1]
assert get_up_to_first_three_elements([]) == []

In [60]:
def caesar_cipher(s, key):
    """
    https://www.hackerrank.com/challenges/caesar-cipher-1
    Given an unencrypted string s and an encryption key (an integer), compute the caesar cipher.
    
    Basically just shift each letter by the value of key. A becomes C if key = 2. This is case sensitive.
    
    What is a Caesar Cipher? https://en.wikipedia.org/wiki/Caesar_cipher
    
    Hint: ord function https://docs.python.org/2/library/functions.html#ord
    Hint: chr function https://docs.python.org/2/library/functions.html#chr

    print(ord('A'), ord('Z'), ord('a'), ord('z'))
    print(chr(65), chr(90), chr(97), chr(122))
    """

    new_s = []

    for c in s:
        if ord('A') <= ord(c) <= ord('Z'):
            new_c = chr(ord('A') + (ord(c) - ord('A') + 2) % 26)
            new_s.append(new_c)
        elif ord('a') <= ord(c) <= ord('z'):
            new_c = chr(ord('a') + (ord(c) - ord('a') + 2) % 26)
            new_s.append(new_c)
        else:
            new_s.append(c)
    
    return "".join(new_s)

assert caesar_cipher("middle-Outz", 2) == "okffng-Qwvb"

Working with Files


In [8]:
def create_list_of_lines_in_hw1data():
    """
    Read each line of hw1data.csv into a list and return the list of lines.
    Remove the newline character ("\n") at the end of each line.
    
    What is a newline character? https://en.wikipedia.org/wiki/Newline
    
    Hint: Reading a File (https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects)
    """
    with open("hw1data.csv", "r") as f:
        lines = [line.strip() for line in f]
        # lines = f.read().splitlines() # alternative 1
        # lines = [line.strip() for line in f.readlines()] # altenative 2
    return lines

assert create_list_of_lines_in_hw1data() == [
        "id,sex,weight", "1,M,190", "2,F,120", "3,F,110",
        "4,M,150", "5,O,120", "6,M,120", "7,F,140",
    ]

In [9]:
def filter_to_lines_with_just_M():
    """
    Read each line in like last time except filter down to only the rows with "M" in them.
    
    Hint: Filter using List Comprehensions (http://www.diveintopython.net/power_of_introspection/filtering_lists.html)
    """
    lines = create_list_of_lines_in_hw1data()
    return [line for line in lines if "M" in line]

assert filter_to_lines_with_just_M() == ["1,M,190", "4,M,150", "6,M,120"]

In [ ]:
def filter_to_lines_with_just_F():
    """
    Read each line in like last time except filter down to only the rows with "M" in them.
    """
    lines = create_list_of_lines_in_hw1data()
    return [line for line in lines if "F" in line]

assert filter_to_lines_with_just_F() == ["2,F,120", "3,F,110", "7,F,140"]

In [10]:
def filter_to_lines_with_any_sex(sex):
    """
    Read each line in like last time except filter down to only the rows with "M" in them.
    """
    lines = create_list_of_lines_in_hw1data()
    return [line for line in lines if sex in line]

assert filter_to_lines_with_any_sex("O") == ["5,O,120"]

In [11]:
def get_average_weight():
    """
    This time instead of just reading the file, parse the csv using csv.reader.
    
    get the average weight of all people rounded to the hundredth place
    
    Hint: https://docs.python.org/3/library/csv.html#csv.reader
    """
    weights = []
    with open("hw1data.csv", "r") as f:
        reader = csv.reader(f)
        next(reader)
        for row in reader:
            weights.append(int(row[2]))
    avg_weight = sum(weights) / len(weights)
    return round(avg_weight, 2)

assert get_average_weight() == 135.71

In [15]:
def create_list_of_dicts_in_hw1data():
    """
    create list of dicts for each line in the hw1data (except the header)
    """
    with open("hw1data.csv", "r") as f:
        reader = csv.DictReader(f)
        dicts = [row for row in reader]
    return dicts

assert create_list_of_dicts_in_hw1data() == [
        {"id": "1", "sex": "M", "weight": "190"},
        {"id": "2", "sex": "F", "weight": "120"},
        {"id": "3", "sex": "F", "weight": "110"},
        {"id": "4", "sex": "M", "weight": "150"},
        {"id": "5", "sex": "O", "weight": "120"},
        {"id": "6", "sex": "M", "weight": "120"},
        {"id": "7", "sex": "F", "weight": "140"}
    ]

Project Euler


In [4]:
def sum_of_multiples_of_three_and_five_below_1000():
    """
    https://projecteuler.net/problem=1
    If we list all the natural numbers below 10 that are multiples of 3 or 5, we get 3, 5, 6 and 9.
    The sum of these multiples is 23.
    Find the sum of all the multiples of 3 or 5 below 1000.

    Hint: Modulo Operator (https://docs.python.org/3/reference/expressions.html#binary-arithmetic-operations)
    Hint: List Comprehension (https://docs.python.org/3/tutorial/datastructures.html#list-comprehensions)
    Hint: Range Function (https://docs.python.org/3/library/functions.html#func-range)
    """
    return sum(i for i in range(1000) if i % 3 == 0 or i % 5 == 0)

def sum_of_multiples_of_l_below_y(l, y):
    """
    Modification of Project Euler Problem 1
    Find the sum of all multiples of any number in the list l, below the value y.

    Hint: https://docs.python.org/3/library/functions.html#any
    """
    return sum(i for i in range(y) if any(i % x == 0 for x in l))

def sum_of_even_fibonacci_under_4million():
    """
    https://projecteuler.net/problem=2
    Each new term in the Fibonacci sequence is generated by adding the previous two terms.
    By starting with 1 and 2, the first 10 terms will be:
    1, 2, 3, 5, 8, 13, 21, 34, 55, 89, ...
    By considering the terms in the Fibonacci sequence whose values do not exceed four million,
    find the sum of the even-valued terms.
    
    Hint: While Loops (http://learnpythonthehardway.org/book/ex33.html)
    """
    the_sum = 0
    a, b = 1, 2
    while b < 4000000:
        if b % 2 == 0:
            the_sum += b
        a, b = b, a + b
    return the_sum

def test_all():
    assert sum_of_multiples_of_three_and_five_below_1000() == 233168
    assert sum_of_multiples_of_l_below_y([3,5,15], 1000) == 233168
    assert sum_of_even_fibonacci_under_4million() == 4613732
    
test_all()

Strings


In [5]:
from collections import Counter

def remove_punctuation(s):
    """remove periods, commas, and semicolons
    """
    return s.replace(".", "").replace(",", "").replace(";", "")

def tokenize(s):
    """return a list of lowercased tokens (words) in a string without punctuation
    """
    return remove_punctuation(s.lower()).split(" ")

def word_count(s):
    """count the number of times each word (lowercased) appears and return a dictionary
    """
    words = tokenize(s)
    return Counter(words)

def test_all():
    test_string1 = "A quick brown Al, jumps over the lazy dog; sometimes..."
    test_string2 = "This this is a sentence sentence with words multiple multiple times."
    
    # ---------------------------------------------------------------------------------- #
    
    test_punctuation1 = "A quick brown Al jumps over the lazy dog sometimes"
    test_punctuation2 = "This this is a sentence sentence with words multiple multiple times"
    
    assert remove_punctuation(test_string1) == test_punctuation1
    assert remove_punctuation(test_string2) == test_punctuation2
   
    # ---------------------------------------------------------------------------------- #
    
    test_tokens1 = ["a", "quick", "brown", "al", "jumps", "over", "the", "lazy", "dog", "sometimes"]
    test_tokens2 = [
        "this", "this", "is", "a", "sentence", "sentence", "with", "words", "multiple", "multiple", "times"
    ]

    assert tokenize(test_string1) == test_tokens1
    assert tokenize(test_string2) == test_tokens2

    # ---------------------------------------------------------------------------------- #

    test_wordcount1 = {
        "a": 1, "quick": 1, "brown": 1, "al": 1, "jumps": 1, "over": 1, "the": 1, "lazy": 1, "dog": 1, "sometimes": 1
    }
    test_wordcount2 = {"this": 2, "is": 1, "a": 1, "sentence": 2, "with": 1, "words": 1, "multiple": 2, "times": 1}
    
    assert word_count(test_string1) == test_wordcount1
    assert word_count(test_string2) == test_wordcount2

test_all()

Linear Algebra

Please find the following empty functions and write the code to complete the logic. These functions are focused around implementing vector algebra operations. The vectors can be of any length. If a function accepts two vectors, assume they are the same length. Khan Academy has a decent introduction: [https://www.khanacademy.org/math/linear-algebra/vectors_and_spaces/vectors/v/vector-introduction-linear-algebra]


In [69]:
def vector_add(v, w):
    """adds two vectors componentwise and returns the result
        v + w = [4, 5, 1] + [9, 8, 1] = [13, 13, 2]
    """
    return [v_i + w_i for v_i, w_i in zip(v,w)]

def vector_subtract(v, w):
    """subtracts two vectors componentwise and returns the result
        v + w = [4, 5, 1] - [9, 8, 1] = [-5, -3, 0]
    """
    return [v_i - w_i for v_i, w_i in zip(v,w)]

def vector_sum(vectors):
    """sums a list of vectors or arbitrary length and returns the resulting vector
    [[1,2], [4,5], [8,3]] = [13,10]
    """
    v_copy = list(vectors)
    result = v_copy.pop()
    for v in v_copy:
        result = vector_add(result, v)
    return result

def scalar_multiply(c, v):
    """returns a vector where components are multplied by c"""
    return [c * v_i for v_i in v]

def vector_mean(vectors):
    """compute the vector whose i-th element is the mean of the
    i-th elements of the input vectors"""
    n = len(vectors)
    return scalar_multiply(1/n, vector_sum(vectors))

def dot(v, w):
    """dot product v.w
    v_1 * w_1 + ... + v_n * w_n"""
    return sum(v_i * w_i for v_i, w_i in zip(v, w))

def sum_of_squares(v):
    """ v.v square each component and sum them
    v_1 * v_1 + ... + v_n * v_n"""
    return dot(v, v)

def magnitude(v):
    """the Norm of a vector, the sqrt of the sum of the squares of the components"""
    return math.sqrt(sum_of_squares(v))

def squared_distance(v, w):
    """ the squared distance of v to w"""
    return sum_of_squares(vector_subtract(v, w))

def distance(v, w):
    """ the distance of v to w"""
    return math.sqrt(squared_distance(v, w))

def cross_product(v, w): # or outer_product(v, w)
    """Bonus:
    The outer/cross product of v and w"""
    for i in v:
        yield scalar_multiply(i, w)

def test_all():
    test_v = [4, 5, 1] 
    test_w = [9, 8, 1] 
    list_v = [[1,2], [4,5], [8,3]]
    
    print("Vector Add", test_v, test_w, vector_add(test_v, test_w))
    print("Vector Subtract", test_v, test_w, vector_subtract(test_v, test_w))
    print("Vector Sum", list_v, vector_sum(list_v))
    print("Scalar Multiply", 3, test_w, scalar_multiply(3, test_w))
    print("Dot", test_v, test_w, dot(test_v, test_w))
    print("Sum of Squares", test_v, sum_of_squares(test_v))
    print("Magnitude", test_v, magnitude(test_v))
    print("Distance", test_v, test_w, distance(test_v, test_w))
    print("Cross Product", list(cross_product(test_v, test_w)))

    assert vector_add(test_v, test_w) == [13, 13, 2]
    assert vector_subtract(test_v, test_w) == [-5, -3, 0]
    assert vector_sum(list_v) == [13,10] 
    assert scalar_multiply(3, test_w) == [27, 24, 3]
    assert dot(test_v, test_w) == 77
    assert sum_of_squares(test_v) == 42
    assert magnitude(test_v) == 6.48074069840786
    assert distance(test_v, test_w) == 5.830951894845301
    assert list(cross_product(test_v, test_w)) == [[36, 32, 4], [45, 40, 5], [9, 8, 1]]

test_all()


Vector Add [4, 5, 1] [9, 8, 1] [13, 13, 2]
Vector Subtract [4, 5, 1] [9, 8, 1] [-5, -3, 0]
Vector Sum [[1, 2], [4, 5], [8, 3]] [13, 10]
Scalar Multiply 3 [9, 8, 1] [27, 24, 3]
Dot [4, 5, 1] [9, 8, 1] 77
Sum of Squares [4, 5, 1] 42
Magnitude [4, 5, 1] 6.48074069840786
Distance [4, 5, 1] [9, 8, 1] 5.830951894845301
Cross Product [[36, 32, 4], [45, 40, 5], [9, 8, 1]]