In [2]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
torch.manual_seed(10)
Out[2]:
In [3]:
V = torch.Tensor([1., 2., 3.])
M = torch.Tensor([[1., 2., 3.], [4.,5.,6.]])
In [13]:
torch.randn((2, 3, 4, 5)).view(12, -1)
Out[13]:
In [21]:
data = autograd.Variable(torch.randn(2, 2))
print(data)
print(F.relu(data))
print(F.sigmoid(data))
In [25]:
print(data.view(-1))
print(F.softmax(data.view(-1)))
print(F.log_softmax(data.view(-1)))
In [3]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
("Give it to me".split(), "ENGLISH"),
("No creo que sea una buena idea".split(), "SPANISH"),
("No it is not a good idea to get lost at sea".split(), "ENGLISH")]
test_data = [("Yo creo que si".split(), "SPANISH"),
("it is lost on me".split(), "ENGLISH")]
# word_to_ix maps each word in the vocab to a unique integer, which will be its
# index into the Bag of words vector
word_to_ix = {}
for sent, _ in data + test_data:
for word in sent:
if word not in word_to_ix:
word_to_ix[word] = len(word_to_ix)
print(word_to_ix)
VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2
class BoWClassifier(nn.Module): # inheriting from nn.Module!
def __init__(self, num_labels, vocab_size):
# calls the init function of nn.Module. Dont get confused by syntax,
# just always do it in an nn.Module
super(BoWClassifier, self).__init__()
# Define the parameters that you will need. In this case, we need A and b,
# the parameters of the affine mapping.
# Torch defines nn.Linear(), which provides the affine map.
# Make sure you understand why the input dimension is vocab_size
# and the output is num_labels!
self.linear = nn.Linear(vocab_size, num_labels)
# NOTE! The non-linearity log softmax does not have parameters! So we don't need
# to worry about that here
def forward(self, bow_vec):
# Pass the input through the linear layer,
# then pass that through log_softmax.
# Many non-linearities and other functions are in torch.nn.functional
return F.log_softmax(self.linear(bow_vec))
def make_bow_vector(sentence, word_to_ix):
vec = torch.zeros(len(word_to_ix))
for word in sentence:
vec[word_to_ix[word]] += 1
return vec.view(1, -1)
def make_target(label, label_to_ix):
return torch.LongTensor([label_to_ix[label]])
model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)
# the model knows its parameters. The first output below is A, the second is b.
# Whenever you assign a component to a class variable in the __init__ function
# of a module, which was done with the line
# self.linear = nn.Linear(...)
# Then through some Python magic from the Pytorch devs, your module
# (in this case, BoWClassifier) will store knowledge of the nn.Linear's parameters
for param in model.parameters():
print(param)
# To run the model, pass in a BoW vector, but wrapped in an autograd.Variable
sample = data[0]
bow_vector = make_bow_vector(sample[0], word_to_ix)
log_probs = model(autograd.Variable(bow_vector))
print(log_probs)
In [4]:
label_to_ix = {"SPANISH": 0, "ENGLISH": 1}
In [5]:
# Run on test data before we train, just to see a before-and-after
for instance, label in test_data:
bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
log_probs = model(bow_vec)
print(log_probs)
# Print the matrix column corresponding to "creo"
print(next(model.parameters())[:, word_to_ix["creo"]])
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
# Usually you want to pass over the training data several times.
# 100 is much bigger than on a real data set, but real datasets have more than
# two instances. Usually, somewhere between 5 and 30 epochs is reasonable.
for epoch in range(100):
for instance, label in data:
# Step 1. Remember that Pytorch accumulates gradients.
# We need to clear them out before each instance
model.zero_grad()
# Step 2. Make our BOW vector and also we must wrap the target in a
# Variable as an integer. For example, if the target is SPANISH, then
# we wrap the integer 0. The loss function then knows that the 0th
# element of the log probabilities is the log probability
# corresponding to SPANISH
bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
target = autograd.Variable(make_target(label, label_to_ix))
# Step 3. Run our forward pass.
log_probs = model(bow_vec)
# Step 4. Compute the loss, gradients, and update the parameters by
# calling optimizer.step()
loss = loss_function(log_probs, target)
loss.backward()
optimizer.step()
for instance, label in test_data:
bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
log_probs = model(bow_vec)
print(log_probs)
# Index corresponding to Spanish goes up, English goes down!
print(next(model.parameters())[:, word_to_ix["creo"]])
In [ ]: