Intro to PyTorch


In [2]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(10)


Out[2]:
<torch._C.Generator at 0x10348edf8>

In [3]:
V = torch.Tensor([1., 2., 3.])
M = torch.Tensor([[1., 2., 3.], [4.,5.,6.]])

In [13]:
torch.randn((2, 3, 4, 5)).view(12, -1)


Out[13]:
 0.7585 -1.8523  0.2755 -0.2756 -0.5546 -1.3609 -0.5837 -0.2724  0.6450  1.0303
-2.1176  0.4471 -0.6392  0.9384  0.3027  1.9140 -1.5967 -1.0490 -0.2089  0.5063
 1.0783 -0.6258  0.9459 -0.6033 -0.0314  0.7018  0.8269  0.6919  0.4013  1.0714
-0.9004  0.0024 -0.0179 -0.1326  0.3081  1.1460  0.0185  1.4954  0.2014 -0.2712
-0.5057  0.2954 -1.0129 -0.9123  0.0898  0.8723  1.6862  1.2572 -1.7590 -0.1758
-0.6932  0.1002  0.3838  0.5473 -1.0893 -1.0464 -0.5607 -0.1591 -0.6771 -0.9848
 1.8307  2.3463  0.0054 -0.5591  0.0550  0.3098 -2.1258 -0.2598  0.3487 -1.5464
 0.8535  0.1206 -0.4500  0.8920  0.2864 -0.2940  0.5223 -0.7716 -0.0437 -0.0108
 0.9363 -1.0407  0.6356 -0.2074  0.6470 -1.2425  0.5359 -0.6989  0.2533  0.1810
-0.9317 -1.3047 -0.4859 -1.6161 -0.7858 -0.6521  0.4788 -1.4859  1.2957  0.2860
 0.3352 -2.6214 -0.0975 -0.8456 -0.0528  0.8662 -0.0318  0.7159 -1.0067 -1.1036
-0.9792  1.4351  0.3460 -1.1480 -1.3038  0.4588  0.0625 -0.2993  1.0850 -0.0650
[torch.FloatTensor of size 12x10]

In [21]:
data = autograd.Variable(torch.randn(2, 2))
print(data)
print(F.relu(data))
print(F.sigmoid(data))


Variable containing:
 0.1125 -0.8350
 1.1583  0.1519
[torch.FloatTensor of size 2x2]

Variable containing:
 0.1125  0.0000
 1.1583  0.1519
[torch.FloatTensor of size 2x2]

Variable containing:
 0.5281  0.3026
 0.7610  0.5379
[torch.FloatTensor of size 2x2]


In [25]:
print(data.view(-1))
print(F.softmax(data.view(-1)))
print(F.log_softmax(data.view(-1)))


Variable containing:
 0.1125
-0.8350
 1.1583
 0.1519
[torch.FloatTensor of size 4]

Variable containing:
 0.1896
 0.0735
 0.5396
 0.1972
[torch.FloatTensor of size 4]

Variable containing:
-1.6627
-2.6102
-0.6169
-1.6233
[torch.FloatTensor of size 4]

Logistic Regression with Bag of Words


In [3]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

# word_to_ix maps each word in the vocab to a unique integer, which will be its
# index into the Bag of words vector
word_to_ix = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2


class BoWClassifier(nn.Module):  # inheriting from nn.Module!

    def __init__(self, num_labels, vocab_size):
        # calls the init function of nn.Module.  Dont get confused by syntax,
        # just always do it in an nn.Module
        super(BoWClassifier, self).__init__()

        # Define the parameters that you will need.  In this case, we need A and b,
        # the parameters of the affine mapping.
        # Torch defines nn.Linear(), which provides the affine map.
        # Make sure you understand why the input dimension is vocab_size
        # and the output is num_labels!
        self.linear = nn.Linear(vocab_size, num_labels)

        # NOTE! The non-linearity log softmax does not have parameters! So we don't need
        # to worry about that here

    def forward(self, bow_vec):
        # Pass the input through the linear layer,
        # then pass that through log_softmax.
        # Many non-linearities and other functions are in torch.nn.functional
        return F.log_softmax(self.linear(bow_vec))


def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)


def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])


model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

# the model knows its parameters.  The first output below is A, the second is b.
# Whenever you assign a component to a class variable in the __init__ function
# of a module, which was done with the line
# self.linear = nn.Linear(...)
# Then through some Python magic from the Pytorch devs, your module
# (in this case, BoWClassifier) will store knowledge of the nn.Linear's parameters
for param in model.parameters():
    print(param)

# To run the model, pass in a BoW vector, but wrapped in an autograd.Variable
sample = data[0]
bow_vector = make_bow_vector(sample[0], word_to_ix)
log_probs = model(autograd.Variable(bow_vector))
print(log_probs)


{'me': 0, 'gusta': 1, 'comer': 2, 'en': 3, 'la': 4, 'cafeteria': 5, 'Give': 6, 'it': 7, 'to': 8, 'No': 9, 'creo': 10, 'que': 11, 'sea': 12, 'una': 13, 'buena': 14, 'idea': 15, 'is': 16, 'not': 17, 'a': 18, 'good': 19, 'get': 20, 'lost': 21, 'at': 22, 'Yo': 23, 'si': 24, 'on': 25}
Parameter containing:

Columns 0 to 9 
 0.1064 -0.0789 -0.1880 -0.0021  0.0524 -0.0224  0.0976  0.1302 -0.0006  0.0327
 0.0048  0.1533  0.1226 -0.1184  0.0441 -0.1841  0.0870  0.1481 -0.0816  0.0892

Columns 10 to 19 
-0.1079 -0.1862 -0.1184  0.0821  0.1022 -0.0920 -0.1298 -0.0927 -0.1615 -0.1371
 0.1639  0.0160  0.0842 -0.1446  0.0167 -0.0339 -0.1404  0.1093 -0.0497  0.0329

Columns 20 to 25 
 0.0727  0.0721  0.1778  0.1242 -0.1946 -0.0643
 0.0683 -0.1245 -0.0228  0.1279 -0.0259 -0.1548
[torch.FloatTensor of size 2x26]

Parameter containing:
1.00000e-02 *
  4.6192
 -8.4888
[torch.FloatTensor of size 2]

Variable containing:
-0.7051 -0.6813
[torch.FloatTensor of size 1x2]


In [4]:
label_to_ix = {"SPANISH": 0, "ENGLISH": 1}

In [5]:
# Run on test data before we train, just to see a before-and-after
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(log_probs)

# Print the matrix column corresponding to "creo"
print(next(model.parameters())[:, word_to_ix["creo"]])

loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Usually you want to pass over the training data several times.
# 100 is much bigger than on a real data set, but real datasets have more than
# two instances.  Usually, somewhere between 5 and 30 epochs is reasonable.
for epoch in range(100):
    for instance, label in data:
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Make our BOW vector and also we must wrap the target in a
        # Variable as an integer. For example, if the target is SPANISH, then
        # we wrap the integer 0. The loss function then knows that the 0th
        # element of the log probabilities is the log probability
        # corresponding to SPANISH
        bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
        target = autograd.Variable(make_target(label, label_to_ix))

        # Step 3. Run our forward pass.
        log_probs = model(bow_vec)

        # Step 4. Compute the loss, gradients, and update the parameters by
        # calling optimizer.step()
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()

for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(log_probs)

# Index corresponding to Spanish goes up, English goes down!
print(next(model.parameters())[:, word_to_ix["creo"]])


Variable containing:
-0.9837 -0.4683
[torch.FloatTensor of size 1x2]

Variable containing:
-0.4694 -0.9818
[torch.FloatTensor of size 1x2]

Variable containing:
-0.1079
 0.1639
[torch.FloatTensor of size 2]

Variable containing:
-0.2071 -1.6761
[torch.FloatTensor of size 1x2]

Variable containing:
-2.3088 -0.1047
[torch.FloatTensor of size 1x2]

Variable containing:
 0.3455
-0.2895
[torch.FloatTensor of size 2]


In [ ]: