In [1]:
def to_binary(x):
the_sum = 0
# enumerate returns pairs of values from `x`
# as well as the index of each value
for index, value in enumerate(x):
the_sum += value * 2**index
return the_sum
In [2]:
my_list = [1, 1]
to_binary(my_list)
Out[2]:
In [3]:
my_list = [1, 0, 0, 0, 1, 1, 0, 1]
to_binary(my_list)
Out[3]:
One note - there are actually 2 possible solutions to this problem, depending on which value of [1, 0, 0, 0, 1, 1, 0, 1] is treated as the least-significant bit (LSB). The solution above treats the left-most bit as the LSB (i.e. the bit that gets multiplied by $2^0=1$). How would you rewrite the function to treat the right-most bit as the LSB?
data/alice_in_wonderland.txt into memory. How many characters does it contain? How does this compare to its size on disk?
In [3]:
import os
with open('data/alice_in_wonderland.txt', 'r') as file:
alice = file.read()
# how many characters are in Alice?
print('number of characters is {}'.format(len(alice)))
# how large is the file on disk?
print('number of bytes on disk is {}'.format(os.path.getsize('data/alice_in_wonderland.txt')))
So this tells us that there are non-ASCII characters (characters that use more than 1 byte) in the file
In [5]:
# non-ASCI characters are characters that use more
# than 1 byte to represent the character
non_ascii = []
for character in alice:
# convert character to Unicode bytes and check how many bytes there are
if len(bytes(character, 'UTF-8')) > 1:
non_ascii.append(character)
# convert list to set to get only the unique characters
print('unique non-ASCII characters:', set(non_ascii))
In [8]:
import pickle
# open a file in write mode ('w') to write plain text
with open('data/alice_partial.txt', 'w') as file:
file.write(alice[:10000])
# open a file in write-binary ('wb') mode to write pickle protocol
with open('data/alice_partial.pickle', 'wb') as file:
pickle.dump(alice[:10000], file)
print('size of plain text file: {}'.format(os.path.getsize('data/alice_partial.txt')))
print('size of pickled file: {}'.format(os.path.getsize('data/alice_partial.pickle')))
In [12]:
import json
# use the `json` library to read json-structured plain text into Python objects
with open('data/good_movies.json', 'r') as file:
good_movies = json.loads(file.read())
In [14]:
# iterate over the movies, checking the list of stars for each
for movie in good_movies:
if 'Ben Affleck' in movie['stars']:
print(movie['title'])
In [16]:
# iterate over the movies, tallying the Oscars for movies in 2016
nominations_2016 = 0
for movie in good_movies:
if movie['year'] == 2016:
nominations_2016 += movie['oscar_nominations']
print(nominations_2016)
Create a NumPy array with 100,000 random integers between 1 and 100. Then, write two functions (in pure Python, not using built-in NumPy functions):
In [18]:
import numpy as np
rand_array = np.random.randint(1, high=100, size=100000)
In [19]:
def my_average(x):
the_sum = 0
for el in x:
the_sum += el
return the_sum / len(x)
def my_stdev(x):
the_sum = 0
the_avg = my_average(x)
for xi in x:
the_sum += (xi - the_avg) ** 2
return np.sqrt(the_sum / len(x))
def my_weighted_average(x, weights):
the_sum = 0
for el, weight in zip(x, weights):
the_sum += el * weight
return the_sum
In [20]:
print('average:', my_average(rand_array))
print('standard deviation:', my_stdev(rand_array))
A weight vector needs to sum to 1. So we'll create a vector of random numbers between 0 and 1 and normalize it (divide by its sum) so that it sums to 1.
In [23]:
rand_weights = np.random.random(size=100000)
rand_weights /= np.sum(rand_weights)
In [25]:
print('weighted average:', my_weighted_average(rand_array, rand_weights))
In [ ]: