In [1]:
%matplotlib inline
import numpy as np

import matplotlib.pyplot as plt

import torch
from torch.autograd import Variable
import torch.nn as nn

In [2]:
X = np.arange(-5,5,0.01)
y = 4*X**3 + 2**X + 3
print(X.shape, y.shape)


(1000,) (1000,)

In [3]:
plt.plot(X,y)


Out[3]:
[<matplotlib.lines.Line2D at 0x7f252f9b9320>]

Creating variable from numpy arrays

Similar to tensorflow placeholders


In [4]:
batch = Variable(torch.from_numpy(X[:4, np.newaxis])) 
batch


Out[4]:
Variable containing:
-5.0000
-4.9900
-4.9800
-4.9700
[torch.DoubleTensor of size 4x1]

Concatanating along axis


In [5]:
torch.cat((batch, batch), 1)


Out[5]:
Variable containing:
-5.0000 -5.0000
-4.9900 -4.9900
-4.9800 -4.9800
-4.9700 -4.9700
[torch.DoubleTensor of size 4x2]

Convert tensors to type float for easy usage in nn layers


In [6]:
batch = Variable(torch.from_numpy(X[:4, np.newaxis])).float() ## Converting to float is important for GPU
batch


Out[6]:
Variable containing:
-5.0000
-4.9900
-4.9800
-4.9700
[torch.FloatTensor of size 4x1]

Matix multiplication as Layer operation


In [7]:
nn.Linear(1,3)(batch)


Out[7]:
Variable containing:
-2.5177 -1.7919 -2.4977
-2.5135 -1.7896 -2.4947
-2.5094 -1.7872 -2.4916
-2.5052 -1.7849 -2.4886
[torch.FloatTensor of size 4x3]

Do the same for target


In [8]:
target = Variable(torch.from_numpy(y[:4, np.newaxis])).float()
target


Out[8]:
Variable containing:
-496.9688
-493.9745
-490.9923
-488.0220
[torch.FloatTensor of size 4x1]

Broadcasting surrogate


In [9]:
hidden = Variable(torch.zeros(1,3))
hidden


Out[9]:
Variable containing:
 0  0  0
[torch.FloatTensor of size 1x3]

In [10]:
h = nn.Linear(3,3)(hidden)
h


Out[10]:
Variable containing:
 0.1454  0.2194 -0.2428
[torch.FloatTensor of size 1x3]

In [11]:
x = nn.Linear(1,3)(batch)
x


Out[11]:
Variable containing:
-2.4399  2.8048 -3.9366
-2.4362  2.7988 -3.9279
-2.4325  2.7927 -3.9192
-2.4288  2.7866 -3.9105
[torch.FloatTensor of size 4x3]

In [12]:
try:
    x + h ## Will give error
except RuntimeError as e:
    print(e)


inconsistent tensor size at /data/users/soumith/miniconda2/conda-bld/pytorch-cuda80-0.1.6_1485185046107/work/torch/lib/TH/generic/THTensorMath.c:601

In [13]:
h.expand_as(x) ## This makes h same size as x and compatible for addition


Out[13]:
Variable containing:
 0.1454  0.2194 -0.2428
 0.1454  0.2194 -0.2428
 0.1454  0.2194 -0.2428
 0.1454  0.2194 -0.2428
[torch.FloatTensor of size 4x3]

In [14]:
x + h.expand_as(x) ## Finally


Out[14]:
Variable containing:
-2.2945  3.0242 -4.1794
-2.2908  3.0182 -4.1707
-2.2871  3.0121 -4.1620
-2.2834  3.0060 -4.1533
[torch.FloatTensor of size 4x3]

Getting the size of the tensor of variable


In [15]:
x.size()


Out[15]:
torch.Size([4, 3])

In [16]:
x.size(0)


Out[16]:
4

In [17]:
x.size()[0], x.size()[1],


Out[17]:
(4, 3)

In [18]:
isinstance(x.size(), tuple)


Out[18]:
True

In [19]:
x


Out[19]:
Variable containing:
-2.4399  2.8048 -3.9366
-2.4362  2.7988 -3.9279
-2.4325  2.7927 -3.9192
-2.4288  2.7866 -3.9105
[torch.FloatTensor of size 4x3]

Get data-tensor inside a variable


In [20]:
x.data


Out[20]:
-2.4399  2.8048 -3.9366
-2.4362  2.7988 -3.9279
-2.4325  2.7927 -3.9192
-2.4288  2.7866 -3.9105
[torch.FloatTensor of size 4x3]

In [21]:
try:
    x.numpy()
except AttributeError as e:
    print("Numpy conversion happens only on tensors and not variables.")
    print(e)


Numpy conversion happens only on tensors and not variables.
numpy

In [22]:
x.data.numpy() ## succeeds


Out[22]:
array([[-2.43992019,  2.80484176, -3.93660831],
       [-2.43620563,  2.79876256, -3.9279108 ],
       [-2.43249106,  2.79268384, -3.91921329],
       [-2.4287765 ,  2.78660488, -3.91051579]], dtype=float32)

Simple linear regression


In [23]:
np.random.seed(1337)
X = np.random.randn(1000,1)*4
W = np.array([0.5,])
bias = -1.68

In [24]:
y_true = np.dot(X, W) + bias
y = y_true + np.random.randn(X.shape[0])

In [25]:
plt.scatter(X, y, s=1, label="data")
plt.scatter(X, y_true, s=1, color='r', label="true")
plt.legend()


Out[25]:
<matplotlib.legend.Legend at 0x7f252c83b518>

In [26]:
def get_variable_from_np(X):
    return Variable(torch.from_numpy(X)).float()

class LinearRegression(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinearRegression, self).__init__()
        self.x2o = nn.Linear(input_size, output_size)
        
    def forward(self, X):
        return self.x2o(X)

In [27]:
batch_size = 10
batch = get_variable_from_np(X[:batch_size])
batch


Out[27]:
Variable containing:
-2.8127
-1.9611
-1.2873
-7.0203
 0.8267
-8.0451
-2.2290
 1.3489
 6.1953
-5.4829
[torch.FloatTensor of size 10x1]

In [28]:
model = LinearRegression(1, 1)

In [29]:
y_pred = model.forward(batch)
y_pred


Out[29]:
Variable containing:
-0.3634
-0.2134
-0.0947
-1.1047
 0.2777
-1.2852
-0.2606
 0.3697
 1.2235
-0.8338
[torch.FloatTensor of size 10x1]

In [30]:
batch = get_variable_from_np(X[:])
y_pred = model.forward(batch)
y_pred_np = y_pred.squeeze().data.numpy()
plt.scatter(X, y, s=1, label="data")
plt.scatter(X, y_true, s=1, color='r', label="true")
plt.scatter(X, y_pred_np, s=1, color='k', alpha=0.5, label="fit")
plt.legend()


Out[30]:
<matplotlib.legend.Legend at 0x7f252c749fd0>

Define loss criterion and optimizer


In [31]:
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
losses = []

Train the model


In [32]:
batch_size = 10
epochs = 100
print_every = 10

for i in range(epochs):
    loss = 0
    optimizer.zero_grad() # Important for each epoch
    idx = np.random.randint(X.shape[0], size=batch_size)
    batch = get_variable_from_np(X[idx])
    target = get_variable_from_np(y[idx])
    output = model.forward(batch)
    loss += criterion(output, target)
    loss.backward() # Calculate the gradients
    optimizer.step() # Updates the parameters of the model
    if (i+1) % print_every == 0:
        print("Loss at epoch [%s]: %.3f" % (i, loss.data[0]))
    losses.append(loss.data[0])
    
plt.plot(losses, '-or')
plt.xlabel("Epoch")
plt.xlabel("Loss")


Loss at epoch [9]: 4.496
Loss at epoch [19]: 3.292
Loss at epoch [29]: 1.980
Loss at epoch [39]: 1.496
Loss at epoch [49]: 1.491
Loss at epoch [59]: 1.008
Loss at epoch [69]: 1.075
Loss at epoch [79]: 0.758
Loss at epoch [89]: 1.242
Loss at epoch [99]: 1.871
Out[32]:
<matplotlib.text.Text at 0x7f252c7725f8>

In [33]:
batch = get_variable_from_np(X[:])
y_pred = model.forward(batch)
y_pred_np = y_pred.squeeze().data.numpy()
plt.scatter(X, y, s=1, label="data")
plt.scatter(X, y_true, s=1, color='r', label="true")
plt.scatter(X, y_pred_np, s=1, color='k', alpha=0.5, label="fit")
plt.legend()


Out[33]:
<matplotlib.legend.Legend at 0x7f252c100ac8>

In [34]:
list(model.x2o.parameters())


Out[34]:
[Parameter containing:
  0.5226
 [torch.FloatTensor of size 1x1], Parameter containing:
 -1.5220
 [torch.FloatTensor of size 1]]

In [35]:
model.x2o.weight


Out[35]:
Parameter containing:
 0.5226
[torch.FloatTensor of size 1x1]

In [36]:
model.x2o.bias


Out[36]:
Parameter containing:
-1.5220
[torch.FloatTensor of size 1]

In [37]:
print("Model W: %.3f, True W: %.3f" % (model.x2o.weight.data.numpy(), W))
print("Model bias: %.3f, True bias: %.3f" % (model.x2o.bias.data.numpy(), bias))


Model W: 0.523, True W: 0.500
Model bias: -1.522, True bias: -1.680

Running on CUDA


In [38]:
batch = Variable(torch.randn(10,3))
target = Variable(torch.randn(10,1))

no_gpu_model = LinearRegression(input_size=3, output_size=1)

In [39]:
no_gpu_model.forward(batch).size()


Out[39]:
torch.Size([10, 1])

In [40]:
torch.cuda.is_available()


Out[40]:
True

In [41]:
if torch.cuda.is_available():
    gpu_model = no_gpu_model.cuda()
    try:
        print(gpu_model.forward(batch))
    except TypeError as e:
        print(e)


addmm_ received an invalid combination of arguments - got (int, int, torch.FloatTensor, torch.cuda.FloatTensor), but expected one of:
 * (torch.FloatTensor mat1, torch.FloatTensor mat2)
 * (torch.SparseFloatTensor mat1, torch.FloatTensor mat2)
 * (float beta, torch.FloatTensor mat1, torch.FloatTensor mat2)
 * (float alpha, torch.FloatTensor mat1, torch.FloatTensor mat2)
 * (float beta, torch.SparseFloatTensor mat1, torch.FloatTensor mat2)
 * (float alpha, torch.SparseFloatTensor mat1, torch.FloatTensor mat2)
 * (float beta, float alpha, torch.FloatTensor mat1, torch.FloatTensor mat2)
      didn't match because some of the arguments have invalid types: (int, int, torch.FloatTensor, torch.cuda.FloatTensor)
 * (float beta, float alpha, torch.SparseFloatTensor mat1, torch.FloatTensor mat2)
      didn't match because some of the arguments have invalid types: (int, int, torch.FloatTensor, torch.cuda.FloatTensor)

I have opened an issue related to the above error at: https://github.com/pytorch/pytorch/issues/584


In [42]:
if torch.cuda.is_available():
    gpu_model = no_gpu_model.cuda()
    try:
        print(gpu_model.forward(batch.cuda()).size())
    except TypeError as e:
        print(e)


torch.Size([10, 1])

GPU to CPU fallback supported model


In [43]:
class LinearRegression(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinearRegression, self).__init__()
        self.x2o = nn.Linear(input_size, output_size)
        
    def forward(self, X):
        if next(self.x2o.parameters()).is_cuda:
            if not X.is_cuda:
                X = X.cuda()
        return self.x2o(X)

In [44]:
batch = Variable(torch.randn(10,3))
target = Variable(torch.randn(10,1))

no_gpu_model = LinearRegression(input_size=3, output_size=1)
print("No GPU model: ", no_gpu_model.forward(batch).size())

if torch.cuda.is_available():
    gpu_model = no_gpu_model.cuda()
    try:
        print("GPU model: ", gpu_model.forward(batch.cuda()).size())
    except TypeError as e:
        print(e)


No GPU model:  torch.Size([10, 1])
GPU model:  torch.Size([10, 1])

In [ ]: