Outline

PyTorch
What are tensors
Initialising, slicing, reshaping tensors
Numpy and PyTorch interfacing
GPU support for PyTorch + Enabling GPUs on Google Colab
Speed comparisons, Numpy -- PyTorch -- PyTorch on GPU
Autodiff concepts and application
Writing a basic learning loop using autograd
Exercises



In [0]:

    
import torch
import numpy as np
import matplotlib.pyplot as plt

Initialise tensors



In [0]:

    
x = torch.ones(3, 2)
print(x)
x = torch.zeros(3, 2)
print(x)
x = torch.rand(3, 2)
print(x)



In [0]:

    
x = torch.empty(3, 2)
print(x)
y = torch.zeros_like(x)
print(y)



In [0]:

    
x = torch.linspace(0, 1, steps=5)
print(x)



In [0]:

    
x = torch.tensor([[1, 2], 
                 [3, 4], 
                 [5, 6]])
print(x)

Slicing tensors



In [0]:

    
print(x.size())
print(x[:, 1]) 
print(x[0, :])



In [0]:

    
y = x[1, 1]
print(y)
print(y.item())

Reshaping tensors



In [0]:

    
print(x)
y = x.view(2, 3)
print(y)



In [0]:

    
y = x.view(6,-1) 
print(y)

Simple Tensor Operations



In [0]:

    
x = torch.ones([3, 2])
y = torch.ones([3, 2])
z = x + y
print(z)
z = x - y
print(z)
z = x * y
print(z)



In [0]:

    
z = y.add(x)
print(z)
print(y)



In [0]:

    
z = y.add_(x)
print(z)
print(y)

Numpy <> PyTorch



In [0]:

    
x_np = x.numpy()
print(type(x), type(x_np))
print(x_np)



In [0]:

    
a = np.random.randn(5)
print(a)
a_pt = torch.from_numpy(a)
print(type(a), type(a_pt))
print(a_pt)



In [0]:

    
np.add(a, 1, out=a)
print(a)
print(a_pt)



In [0]:

    
%%time
for i in range(100):
  a = np.random.randn(100,100)
  b = np.random.randn(100,100)
  c = np.matmul(a, b)



In [0]:

    
%%time
for i in range(100):
  a = torch.randn([100, 100])
  b = torch.randn([100, 100])
  c = torch.matmul(a, b)



In [0]:

    
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  c = a + b



In [0]:

    
%%time
for i in range(10):
  a = torch.randn([10000, 10000])
  b = torch.randn([10000, 10000])
  c = a + b

CUDA support



In [0]:

    
print(torch.cuda.device_count())



In [0]:

    
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))



In [0]:

    
cuda0 = torch.device('cuda:0')



In [0]:

    
a = torch.ones(3, 2, device=cuda0)
b = torch.ones(3, 2, device=cuda0)
c = a + b
print(c)



In [0]:

    
print(a)



In [0]:

    
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.add(b, a)



In [0]:

    
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  b_cpu.add_(a_cpu)



In [0]:

    
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device=cuda0)
  b = torch.randn([10000, 10000], device=cuda0)
  b.add_(a)



In [0]:

    
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.matmul(b, a)



In [0]:

    
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  torch.matmul(a_cpu, b_cpu)



In [0]:

    
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device=cuda0)
  b = torch.randn([10000, 10000], device=cuda0)
  torch.matmul(a, b)

Autodiff



In [0]:

    
x = torch.ones([3, 2], requires_grad=True)
print(x)









    



tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)



In [0]:

    
y = x + 5
print(y)









    



tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)



In [0]:

    
z = y*y + 1
print(z)









    



tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)



In [0]:

    
t = torch.sum(z)
print(t)









    



tensor(222., grad_fn=<SumBackward0>)



In [0]:

    
t.backward()



In [0]:

    
print(x.grad)









    



tensor([[12., 12.],
        [12., 12.],
        [12., 12.]])

$t = \sum_i z_i, z_i = y_i^2 + 1, y_i = x_i + 5$

$\frac{\partial t}{\partial x_i} = \frac{\partial z_i}{\partial x_i} = \frac{\partial z_i}{\partial y_i} \frac{\partial y_i}{\partial x_i} = 2y_i \times 1$

At x = 1, y = 6, $\frac{\partial t}{\partial x_i} = 12$



In [0]:

    
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
print(r)
s = torch.sum(r)
s.backward()
print(x.grad)









    



tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])



In [0]:

    
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
a = torch.ones([3, 2])
r.backward(a)
print(x.grad)









    



tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])

$\frac{\partial{s}}{\partial{x}} = \frac{\partial{s}}{\partial{r}} \cdot \frac{\partial{r}}{\partial{x}}$

For the above code $a$ represents $\frac{\partial{s}}{\partial{r}}$ and then $x.grad$ gives directly $\frac{\partial{s}}{\partial{x}}$

Autodiff example that looks like what we have been doing



In [0]:

    
x = torch.randn([20, 1], requires_grad=True)
y = 3*x - 2



In [0]:

    
w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

y_hat = w*x + b

loss = torch.sum((y_hat - y)**2)



In [0]:

    
print(loss)









    



tensor(318.2823, grad_fn=<SumBackward0>)



In [0]:

    
loss.backward()



In [0]:

    
print(w.grad, b.grad)









    



tensor([-106.4956]) tensor([141.1912])

Do it in a loop



In [0]:

    
learning_rate = 0.01

w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

print(w.item(), b.item())

for i in range(10):
  
  x = torch.randn([20, 1])
  y = 3*x - 2
  
  y_hat = w*x + b
  loss = torch.sum((y_hat - y)**2)
  
  loss.backward()
  
  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad
    
    w.grad.zero_()
    b.grad.zero_()

  print(w.item(), b.item())









    



1.0 1.0
1.694516658782959 -0.32816600799560547
2.5244972705841064 -0.9011859893798828
2.6990771293640137 -1.3381099700927734
2.7810328006744385 -1.5905817747116089
2.821857213973999 -1.7378290891647339
2.943121910095215 -1.868725061416626
2.9525837898254395 -1.9191371202468872
2.9741718769073486 -1.9551563262939453
2.9911296367645264 -1.972025752067566
2.994936943054199 -1.9838125705718994

Do it for a large problem



In [0]:

    
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True)
b = torch.ones([1], requires_grad=True)

# print(torch.mean(w).item(), b.item())

for i in range(epochs):
  
  x = torch.randn([N])
  y = torch.dot(3*torch.ones([N]), x) - 2
  
  y_hat = torch.dot(w, x) + b
  loss = torch.sum((y_hat - y)**2)
  
  loss.backward()
  
  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad
    
    w.grad.zero_()
    b.grad.zero_()

#   print(torch.mean(w).item(), b.item())









    



CPU times: user 36.7 s, sys: 443 ms, total: 37.2 s
Wall time: 37.2 s



In [0]:

    
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True, device=cuda0)
b = torch.ones([1], requires_grad=True, device=cuda0)

# print(torch.mean(w).item(), b.item())

for i in range(epochs):
  
  x = torch.randn([N], device=cuda0)
  y = torch.dot(3*torch.ones([N], device=cuda0), x) - 2
  
  y_hat = torch.dot(w, x) + b
  loss = torch.sum((y_hat - y)**2)
  
  loss.backward()
  
  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad
    
    w.grad.zero_()
    b.grad.zero_()

  #print(torch.mean(w).item(), b.item())









    



CPU times: user 467 ms, sys: 305 ms, total: 772 ms
Wall time: 784 ms



In [0]: