In [5]:
%matplotlib inline
In [4]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
1. (25 points) Accelerating network bound procedures.
http://people.duke.edu/~ccc14/misc/
. (10 points)concurrent.futures
and a thread pool to download all images and time how long it takes (5 points)multiprocessing
and a process pool to download all images and time how long it takes (5 points)
In [41]:
import requests
from bs4 import BeautifulSoup
def listFD(url, ext=''):
page = requests.get(url).text
soup = BeautifulSoup(page, 'html.parser')
return [url + node.get('href') for node in soup.find_all('a')
if node.get('href').endswith(ext)]
site = 'http://people.duke.edu/~ccc14/misc/'
ext = 'png'
for i, file in enumerate(listFD(site, ext)):
if i == 5:
break
print(file)
In [38]:
def download_one(url, path):
r = requests.get(url, stream=True)
img = r.raw.read()
with open(path, 'wb') as f:
f.write(img)
In [27]:
%%time
for url in listFD(site, ext):
filename = os.path.split(url)[-1]
download_one(url, filename)
In [39]:
%%time
from concurrent.futures import ThreadPoolExecutor
args = [(url, os.path.split(url)[-1])
for url in listFD(site, ext)]
with ThreadPoolExecutor(max_workers=4) as pool:
pool.map(lambda x: download_one(x[0], x[1]), args)
In [40]:
%%time
from multiprocessing import Pool
args = [(url, os.path.split(url)[-1])
for url in listFD(site, ext)]
with Pool(processes=4) as pool:
pool.starmap(download_one, args)
In [ ]:
2. (25 points) Accelerating CPU bound procedures
y=0
or y=1
. (10 points)n=10^6
pins and time it (10 points)concurrent.futures
and a process pool to parallelize your solution and time it.
In [52]:
n = 100
p = 10
xs = np.random.random((n, p))
In [53]:
def dist(x, y):
return np.sqrt(np.sum((x - y)**2))
In [54]:
def pdist(xs):
m = np.empty((len(xs), len(xs)))
for i, x in enumerate(xs):
for j, y in enumerate(xs):
m[i, j] = dist(x, y)
return m
In [55]:
%timeit pdist(xs)
In [ ]:
In [ ]:
3. (25 points) Use C++ to
You may wish to use armadillo
or eigen
to solve this exercise.
In [ ]:
4. (25 points) 4. Write a C++ function that uses the eigen
library to solve the least squares linear problem
for a matrix $X$ and vector $y$ and returns the vector of coefficients $\beta$. Wrap the function for use in Python and call it like so
beta <- least_squares(X, y)
where $X$ and $y$ are given below.
Wrap the function so that it can be called from Python and compare with the np.linalg.lstsq
solution shown.
In [11]:
n = 10
x = np.linspace(0, 10, n)
y = 3*x**2 - 7*x + 2 + np.random.normal(0, 10, n)
X = np.c_[np.ones(n), x, x**2]
In [12]:
beta = np.linalg.lstsq(X, y)[0]
In [13]:
beta
Out[13]:
In [14]:
plt.scatter(x, y)
plt.plot(x, X @ beta, 'red')
pass
In [ ]: