All about Pytorch


In [0]:
import torch

 Tensors

Tensors are similar to NumPy’s ndarrays, with the addition being that Tensors can also be used on a GPU to accelerate computing.


In [2]:
x = torch.empty(5,3) ## empty
x


Out[2]:
tensor([[6.5694e-36, 0.0000e+00, 3.3631e-44],
        [0.0000e+00,        nan, 0.0000e+00],
        [1.1578e+27, 1.1362e+30, 7.1547e+22],
        [4.5828e+30, 1.2121e+04, 7.1846e+22],
        [9.2198e-39, 7.0374e+22, 0.0000e+00]])

In [3]:
x = torch.randn(5,3) ## random initialized
x


Out[3]:
tensor([[ 0.1648,  0.1203,  1.0604],
        [ 0.4551,  0.9178,  1.3501],
        [ 0.3136, -0.9561,  1.3928],
        [ 2.1873, -1.6890,  0.2575],
        [-0.6818, -0.4929,  0.5411]])

In [4]:
x = torch.zeros(5,3, dtype=torch.long)
x


Out[4]:
tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [5]:
type(x)


Out[5]:
torch.Tensor

In [6]:
x = torch.ones(5,2)
x


Out[6]:
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]])

In [7]:
myarr = [[10,20.2],[30,40]] ## sample data

x = torch.tensor(myarr)
x


Out[7]:
tensor([[10.0000, 20.2000],
        [30.0000, 40.0000]])

In [8]:
## create a tensor from an existing tensor

x = torch.tensor([[1,2],[3,4]], dtype=torch.int16)
print(f"X tensor: {x}")

y = torch.tensor(x, dtype=torch.float16)
print(f"Y tensor: {y}")


X tensor: tensor([[1, 2],
        [3, 4]], dtype=torch.int16)
Y tensor: tensor([[1., 2.],
        [3., 4.]], dtype=torch.float16)
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:6: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  

In [9]:
## size of tensor

x.size()


Out[9]:
torch.Size([2, 2])
  • Operation on Tensors

In [10]:
x = torch.randn(5,2)
x


Out[10]:
tensor([[-0.0173, -0.1190],
        [ 0.6001,  0.5685],
        [-1.3267, -0.1079],
        [-1.7419, -0.6463],
        [-0.7563, -1.1716]])

In [11]:
y = torch.ones(5,2)
y


Out[11]:
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]])

In [12]:
x + y ## sum of two tensors


Out[12]:
tensor([[ 0.9827,  0.8810],
        [ 1.6001,  1.5685],
        [-0.3267,  0.8921],
        [-0.7419,  0.3537],
        [ 0.2437, -0.1716]])

In [13]:
torch.add(x,y) ## alternative: sum of two tensors


Out[13]:
tensor([[ 0.9827,  0.8810],
        [ 1.6001,  1.5685],
        [-0.3267,  0.8921],
        [-0.7419,  0.3537],
        [ 0.2437, -0.1716]])

In [14]:
## In Place addition : Any operation that mutates a tensor in-place is post-fixed with an _. For example: x.copy_(y), x.t_(), will change x.

y.add_(x)


Out[14]:
tensor([[ 0.9827,  0.8810],
        [ 1.6001,  1.5685],
        [-0.3267,  0.8921],
        [-0.7419,  0.3537],
        [ 0.2437, -0.1716]])

In [15]:
### Standard numpy operations on tensors

x[:1]


Out[15]:
tensor([[-0.0173, -0.1190]])

In [16]:
type(x[:1])


Out[16]:
torch.Tensor

In [17]:
### Resize the tensor

x = torch.randn(4,4)
x


Out[17]:
tensor([[-1.6968,  1.9364, -2.1796, -0.0819],
        [ 1.8027,  0.3540,  1.3269,  0.1532],
        [-0.4969,  0.4169,  0.5677,  1.0968],
        [ 0.0742,  1.5354,  0.9387,  0.0343]])

In [18]:
y = x.view(16)
y


Out[18]:
tensor([-1.6968,  1.9364, -2.1796, -0.0819,  1.8027,  0.3540,  1.3269,  0.1532,
        -0.4969,  0.4169,  0.5677,  1.0968,  0.0742,  1.5354,  0.9387,  0.0343])

In [19]:
z = x.view([-1,8])
z


Out[19]:
tensor([[-1.6968,  1.9364, -2.1796, -0.0819,  1.8027,  0.3540,  1.3269,  0.1532],
        [-0.4969,  0.4169,  0.5677,  1.0968,  0.0742,  1.5354,  0.9387,  0.0343]])

In [20]:
## transpose an array

torch.transpose(x, 0,1)


Out[20]:
tensor([[-1.6968,  1.8027, -0.4969,  0.0742],
        [ 1.9364,  0.3540,  0.4169,  1.5354],
        [-2.1796,  1.3269,  0.5677,  0.9387],
        [-0.0819,  0.1532,  1.0968,  0.0343]])

In [21]:
## to get the value of a tensor

x = torch.randn(1)
print(x)
print(x.item())


tensor([0.7582])
0.7581729292869568

Numpy Bridge with Tensors


In [22]:
a = torch.ones(5)
a


Out[22]:
tensor([1., 1., 1., 1., 1.])

In [23]:
type(a)


Out[23]:
torch.Tensor

In [24]:
### convert to numpy

a.numpy()


Out[24]:
array([1., 1., 1., 1., 1.], dtype=float32)

In [25]:
## converting numpy array to torch tensors

import numpy as np

a = np.ones(5)
t = torch.tensor(a, dtype=torch.int)

print(a, type(a))
print(t, type(t))


[1. 1. 1. 1. 1.] <class 'numpy.ndarray'>
tensor([1, 1, 1, 1, 1], dtype=torch.int32) <class 'torch.Tensor'>
  • Running on the Device

In [26]:
### if CUDA is available or not

torch.cuda.is_available()


Out[26]:
True

In [27]:
if torch.cuda.is_available():
  device = torch.device("cuda") ## define device
  
  x = torch.ones(5) ## normal stuff
  print(x)
  
  y = torch.ones_like(x, device=device) ### running it on gpu
  print(y)

  x = x.to(device)  ## change the execution to device

  z = x + y

  print(z)
  
  print(z.to("cpu", dtype=torch.int32)) ## change the data type of z using .to and run it on cpu


tensor([1., 1., 1., 1., 1.])
tensor([1., 1., 1., 1., 1.], device='cuda:0')
tensor([2., 2., 2., 2., 2.], device='cuda:0')
tensor([2, 2, 2, 2, 2], dtype=torch.int32)

AUTOGRAD


In [28]:
x = torch.ones(2,2, requires_grad=True)
x


Out[28]:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [29]:
y = x + 2
y


Out[29]:
tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [30]:
y.grad_fn ### y was created as a result of an operation, hence it has a grad_fn


Out[30]:
<AddBackward0 at 0x7f99e6c6dba8>

In [31]:
## more operation on y

z = y*y*3
out = z.mean()

print(z, out)


tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)

In [32]:
### .requires_grad_( ... ) changes an existing Tensor’s requires_grad flag in-place. The input flag defaults to False if not given.

a = torch.randn(2,2)
a = ((a*2)/(a-1))
print(a.requires_grad)

a.requires_grad_(True) ## changing the grad inplace

print(a.requires_grad)

b = (a * a).sum()
print(b.grad_fn)


False
True
<SumBackward0 object at 0x7f99e6c77908>
  • Gradient

In [33]:
out ## out contains a single scalar, out.backward() is equivalent to out.backward(torch.tensor(1.)).


Out[33]:
tensor(27., grad_fn=<MeanBackward0>)

In [0]:
out.backward()

In [35]:
print(x.grad) ### Print gradients d(out)/dx


tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])

In [36]:
## another example

t1 = torch.ones(1, requires_grad= True)
t2 = torch.ones(1, requires_grad=True)

print(t1, t2)

s = t1+t2
print(s)


tensor([1.], requires_grad=True) tensor([1.], requires_grad=True)
tensor([2.], grad_fn=<AddBackward0>)

In [37]:
s.grad_fn


Out[37]:
<AddBackward0 at 0x7f99e6c77080>

In [0]:
s.backward()

In [39]:
t1.grad


Out[39]:
tensor([1.])

Vector Jacobian Product


In [40]:
x = torch.randn(3, requires_grad=True)

y = x * 2
while y.data.norm() < 1000:
    y = y * 2

print(y)


tensor([-674.6850, -785.7428, -353.3467], grad_fn=<MulBackward0>)

In [41]:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(v)

print(x.grad)


tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])

In [42]:
print(x.requires_grad)
y = x.detach()
print(y.requires_grad)
print(x.eq(y).all())


True
False
tensor(True)

Neural Network

A typical training procedure for a neural network is as follows:

  1. Define the neural network that has some learnable parameters (or weights)
  2. Iterate over a dataset of inputs
  3. Process input through the network
  4. Compute the loss (how far is the output from being correct)
  5. Propagate gradients back into the network’s parameters
  6. Update the weights of the network, typically using a simple update rule:
    weight = weight - learning_rate * gradient
  • Simple NN

In [0]:
import torch.nn as nn
import torch.nn.functional as F

In [52]:
X = torch.tensor(([2, 9], [5, 1], [3, 6]), dtype=torch.float) ## 3x2 tensor
y = torch.tensor(([92], [100], [89]), dtype=torch.float)  ## 3x1 tensor
xPredicted = torch.tensor(([4, 8]), dtype=torch.float) # 1 X 2 tensor

print(X)
print(y)


tensor([[2., 9.],
        [5., 1.],
        [3., 6.]])
tensor([[ 92.],
        [100.],
        [ 89.]])

In [59]:
X_max, X_max_ind = torch.max(X, 0) ## return max including indices and max values per col. 0 for col, 1 for row
print(X_max, X_max_ind)

xPredicted_max, _ = torch.max(xPredicted, 0)
print(xPredicted_max)

y_max = torch.max(y)
print(y_max)

## scaling
X = torch.div(X, X_max)
xPredicted = torch.div(xPredicted, xPredicted_max)
y = y/y_max

print(f"X is : {X}")
print(f"xPredicted is : {xPredicted}")
print(f"y is : {y}")


tensor([1., 1.]) tensor([1, 0])
tensor(1.)
tensor(1.)
X is : tensor([[0.4000, 1.0000],
        [1.0000, 0.1111],
        [0.6000, 0.6667]])
xPredicted is : tensor([0.5000, 1.0000])
y is : tensor([[0.9200],
        [1.0000],
        [0.8900]])

In [0]:
class SimpleNN(nn.Module):
  def __init__(self):
    super(SimpleNN, self).__init__()
    ## parameters
    self.input_size = 2
    self.hidden_layer = 3
    self.output_layer = 1

    ## initializing the weights
    self.W1 = torch.randn(self.input_size, self.hidden_layer) # 2x3 tensor
    self.W2 = torch.randn(self.hidden_layer, self.output_layer) # 3x1 tensor


  def forward(self, X):
    '''
      Forward propagation
    '''
    self.z = torch.matmul(X, self.W1)
    self.z2 = torch.sigmoid(self.z)
    
    self.z3 = torch.matmul(self.z2, self.W2)
    o = torch.sigmoid(self.z3) ## final activation function

    return o
  

  def sigmoid(self, s):
    return 1 / (1 + torch.exp(-s))
    
  
  def sigmoidPrime(self, s):
    # derivative of sigmoid
    return s * (1 - s)

  
  def backward(self, X, y, o):
    '''
      Backward propagation
    '''
    self.o_error = y - o ## calculate the difference b/w predicted and actual
    
    self.o_delta = self.o_error * self.sigmoidPrime(o) ## derivative of sig to error
    self.z2_error = torch.matmul(self.o_delta, torch.t(self.W2))

    self.z2_delta = self.z2_error * self.sigmoidPrime(self.z2)

    self.W1 += torch.matmul(torch.t(X), self.z2_delta)
    self.W2 += torch.matmul(torch.t(self.z2), self.o_delta)

  
  def train(self, X, y):
    # forward + backward pass for training
    o = self.forward(X)
    self.backward(X, y, o)

  def saveWeights(self, model):
    # we will use the PyTorch internal storage functions
    torch.save(model, "NN")
        
  def predict(self):
    print ("Predicted data based on trained weights: ")
    print ("Input (scaled):" + str(xPredicted))
    print ("Output:" + str(self.forward(xPredicted)))

In [73]:
NN = SimpleNN()

print(NN)


SimpleNN()

In [74]:
for i in range(10):  # trains the NN 10 times
    print ("#" + str(i) + " Loss: " + str(torch.mean((y - NN(X))**2).detach().item()))  # mean sum squared loss
    NN.train(X, y)
NN.saveWeights(NN)
NN.predict()


#0 Loss: 0.2509533762931824
#1 Loss: 0.178019180893898
#2 Loss: 0.12616543471813202
#3 Loss: 0.09139183163642883
#4 Loss: 0.068324513733387
#5 Loss: 0.05276729539036751
#6 Loss: 0.041979026049375534
#7 Loss: 0.03426816314458847
#8 Loss: 0.028596043586730957
#9 Loss: 0.02431400679051876
Predicted data based on trained weights: 
Input (scaled):tensor([0.5000, 1.0000])
Output:tensor([0.8484])
/usr/local/lib/python3.6/dist-packages/torch/serialization.py:402: UserWarning: Couldn't retrieve source code for container of type SimpleNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "

In [0]: