In [ ]:
__author__ = "Christopher Potts, Will Monroe, and Lucy Li"
__version__ = "CS224u, Stanford, Spring 2020"
Why should we care about NumPy?
np_ in your cs224u directory). In Jupyter notebooks, NumPy documentation is two clicks away: Help -> NumPy reference.
In [ ]:
import numpy as np
In [ ]:
np.zeros(5)
In [ ]:
np.ones(5)
In [ ]:
# convert list to numpy array
np.array([1,2,3,4,5])
In [ ]:
# convert numpy array to list
np.ones(5).tolist()
In [ ]:
# one float => all floats
np.array([1.0,2,3,4,5])
In [ ]:
# same as above
np.array([1,2,3,4,5], dtype='float')
In [ ]:
# spaced values in interval
np.array([x for x in range(20) if x % 2 == 0])
In [ ]:
# same as above
np.arange(0,20,2)
In [ ]:
# random floats in [0, 1)
np.random.random(10)
In [ ]:
# random integers
np.random.randint(5, 15, size=10)
In [ ]:
x = np.array([10,20,30,40,50])
In [ ]:
x[0]
In [ ]:
# slice
x[0:2]
In [ ]:
x[0:1000]
In [ ]:
# last value
x[-1]
In [ ]:
# last value as array
x[[-1]]
In [ ]:
# last 3 values
x[-3:]
In [ ]:
# pick indices
x[[0,2,4]]
In [ ]:
#x2 = x # try this line instead
x2 = x.copy()
In [ ]:
x2[0] = 10
x2
In [ ]:
x2[[1,2]] = 10
x2
In [ ]:
x2[[3,4]] = [0, 1]
x2
In [ ]:
# check if the original vector changed
x
In [ ]:
x.sum()
In [ ]:
x.mean()
In [ ]:
x.max()
In [ ]:
x.argmax()
In [ ]:
np.log(x)
In [ ]:
np.exp(x)
In [ ]:
x + x # Try also with *, -, /, etc.
In [ ]:
x + 1
In [ ]:
# log every value as list, one by one
def listlog(vals):
return [np.log(y) for y in vals]
In [ ]:
# get random vector
samp = np.random.random_sample(int(1e7))+1
samp
In [ ]:
%time _ = np.log(samp)
In [ ]:
%time _ = listlog(samp)
In [ ]:
np.array([[1,2,3], [4,5,6]])
In [ ]:
np.array([[1,2,3], [4,5,6]], dtype='float')
In [ ]:
np.zeros((3,5))
In [ ]:
np.ones((3,5))
In [ ]:
np.identity(3)
In [ ]:
np.diag([1,2,3])
In [ ]:
X = np.array([[1,2,3], [4,5,6]])
X
In [ ]:
X[0]
In [ ]:
X[0,0]
In [ ]:
# get row
X[0, : ]
In [ ]:
# get column
X[ : , 0]
In [ ]:
# get multiple columns
X[ : , [0,2]]
In [ ]:
# X2 = X # try this line instead
X2 = X.copy()
X2
In [ ]:
X2[0,0] = 20
X2
In [ ]:
X2[0] = 3
X2
In [ ]:
X2[: , -1] = [5, 6]
X2
In [ ]:
# check if original matrix changed
X
In [ ]:
z = np.arange(1, 7)
z
In [ ]:
z.shape
In [ ]:
Z = z.reshape(2,3)
Z
In [ ]:
Z.shape
In [ ]:
Z.reshape(6)
In [ ]:
# same as above
Z.flatten()
In [ ]:
# transpose
Z.T
In [ ]:
A = np.array(range(1,7), dtype='float').reshape(2,3)
A
In [ ]:
B = np.array([1, 2, 3])
In [ ]:
# not the same as A.dot(B)
A * B
In [ ]:
A + B
In [ ]:
A / B
In [ ]:
# matrix multiplication
A.dot(B)
In [ ]:
B.dot(A.T)
In [ ]:
A.dot(A.T)
In [ ]:
# outer product
# multiplying each element of first vector by each element of the second
np.outer(B, B)
The following is a practical example of numerical operations on NumPy matrices.
In our class, we have a shallow neural network implemented in np_shallow_neural_network.py. See how the forward and backward passes use no for loops, and instead takes advantage of NumPy's ability to vectorize manipulations of data.
def forward_propagation(self, x):
h = self.hidden_activation(x.dot(self.W_xh) + self.b_xh)
y = softmax(h.dot(self.W_hy) + self.b_hy)
return h, y
def backward_propagation(self, h, predictions, x, labels):
y_err = predictions.copy()
y_err[np.argmax(labels)] -= 1
d_b_hy = y_err
h_err = y_err.dot(self.W_hy.T) * self.d_hidden_activation(h)
d_W_hy = np.outer(h, y_err)
d_W_xh = np.outer(x, h_err)
d_b_xh = h_err
return d_W_hy, d_b_hy, d_W_xh, d_b_xh
The forward pass essentially computes the following:
$$h = f(xW_{xh} + b_{xh})$$
$$y = \text{softmax}(hW_{hy} + b_{hy}),$$
where $f$ is self.hidden_activation.
The backward pass propagates error by computing local gradients and chaining them. Feel free to learn more about backprop here, though it is not necessary for our class. Also look at this neural networks case study to see another example of how NumPy can be used to implement forward and backward passes of a simple neural network.
In [ ]:
import pandas as pd
In [ ]:
count_df = pd.DataFrame(
np.array([
[1,0,1,0,0,0],
[0,1,0,1,0,0],
[1,1,1,1,0,0],
[0,0,0,0,1,1],
[0,0,0,0,0,1]], dtype='float64'),
index=['gnarly', 'wicked', 'awesome', 'lame', 'terrible'])
count_df
In [ ]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import datasets
In [ ]:
iris = datasets.load_iris()
X = iris.data
y = iris.target
print(type(X))
print("Dimensions of X:", X.shape)
print(type(y))
print("Dimensions of y:", y.shape)
In [ ]:
# split data into train/test
X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(
X, y, train_size=0.7, test_size=0.3)
print("X_iris_train:", type(X_iris_train))
print("y_iris_train:", type(y_iris_train))
print()
# start up model
maxent = LogisticRegression(
fit_intercept=True,
solver='liblinear',
multi_class='auto')
# train on train set
maxent.fit(X_iris_train, y_iris_train)
# predict on test set
iris_predictions = maxent.predict(X_iris_test)
fnames_iris = iris['feature_names']
tnames_iris = iris['target_names']
# how well did our model do?
print(classification_report(y_iris_test, iris_predictions, target_names=tnames_iris))
In [ ]:
from scipy.spatial.distance import cosine
from scipy.stats import pearsonr
from scipy import linalg
In [ ]:
# cosine distance
a = np.random.random(10)
b = np.random.random(10)
cosine(a, b)
In [ ]:
# pearson correlation (coeff, p-value)
pearsonr(a, b)
In [ ]:
# inverse of matrix
A = np.array([[1,3,5],[2,5,1],[2,3,8]])
linalg.inv(A)
To learn more about how NumPy can be combined with SciPy and Scikit-learn for machine learning, check out this notebook tutorial by Chris Potts and Will Monroe. (You may notice that over half of this current notebook is modified from theirs.) Their tutorial also has some interesting exercises in it!
In [ ]:
import matplotlib.pyplot as plt
In [ ]:
a = np.sort(np.random.random(30))
b = a**2
c = np.log(a)
plt.plot(a, b, label='y = x^2')
plt.plot(a, c, label='y = log(x)')
plt.legend()
plt.title("Some functions")
plt.show()