In [26]:
    
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
torch.manual_seed(1)
print(f'Torch Version {torch.__version__}')
assert int(torch.__version__.split('.')[1]) >= 4 # should be greater than equal to 0.4
print(f'Cuda is available: {torch.cuda.is_available()}')
print(f'CUDA_VISIBLE_DEVICES : {os.environ["CUDA_VISIBLE_DEVICES"]}')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
    
    
In [3]:
    
def prepare_sequence(seq, to_ix):
    return torch.tensor([to_ix[w] for w in seq], dtype=torch.long, device=device)
training_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]
word_to_ix = {}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
tag_to_ix = {"DET": 0, "NN": 1, "V": 2}
uniq_chars = set(ch for seq, _ in training_data for word in seq for ch in word) # read left to right
char_to_ix = {ch: ix for ix, ch in enumerate(sorted(uniq_chars))}
print(word_to_ix)
print(len(char_to_ix), char_to_ix)
# These will usually be more like 32 or 64 dimensional.
# We will keep them small, so we can see how the weights change as we train.
EMBEDDING_DIM = 6
HIDDEN_DIM = 6
    
    
In [30]:
    
class CharWordLSTMTagger(nn.Module):
    '''
    Character + Word Embedding based LSTM Tagger
    '''
    def __init__(self, embedding_dim, hidden_dim, word2ix, char2ix, tag2ix):
        super(CharWordLSTMTagger, self).__init__()
        self.num_dirs = 1
        self.num_layers = 2
        self.hidden_dim = hidden_dim
        self.char2ix = char2ix
        self.word2ix = word2ix
        self.tag2ix = tag2ix
        self.ix2tag = {v: k for k,v in tag2ix.items()}
        vocab_size, alphabet_size, tagset_size = len(word2ix), len(char2ix), len(tag2ix)
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.char_embeddings = nn.Embedding(alphabet_size, embedding_dim)
        self.char_lstm = nn.LSTM(embedding_dim, hidden_dim,
                                 num_layers=self.num_layers,
                                 bidirectional=self.num_dirs == 2)
        self.char_hidden = self.init_hidden() # Hidden state for char LSTM
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim. 
        # we concatenate character and word embeddings
        self.lstm = nn.LSTM(self.num_dirs * embedding_dim + hidden_dim, hidden_dim,
                            num_layers=self.num_layers,
                            bidirectional=self.num_dirs == 2)
        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(self.num_dirs * hidden_dim, tagset_size)
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers*directions, minibatch_size, hidden_dim)
        return (torch.zeros(self.num_layers * self.num_dirs, 1, self.hidden_dim, device=device),
                torch.zeros(self.num_layers * self.num_dirs, 1, self.hidden_dim, device=device))
    def forward(self, sentence):
        word_reprs = []
        for word in sentence:
            word_idx = torch.tensor([self.word2ix[word]], dtype=torch.long, device=device)
            word_embed = self.word_embeddings(word_idx)
            self.char_hidden = self.init_hidden()
            ch_ixs = [self.char2ix[ch] for ch in word]
            ch_seq = torch.tensor(ch_ixs, dtype=torch.long, device=device)
            ch_embeds = self.char_embeddings(ch_seq)
            ch_repr, self.char_hidden = self.char_lstm(ch_embeds.view(len(word), 1, -1), self.char_hidden)
            word_repr = torch.cat((word_embed, ch_repr[-1]), dim=1) # char LSTM output's last time stamp output
            word_reprs.append(word_repr)
        embeds = torch.cat(word_reprs)
        #print(len(sentence), embeds.size())
        lstm_out, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=0)
        return tag_scores
    
    def tag(self, sentence):
        vals, idx = self(sentence).max(dim=1)
        return [self.ix2tag[i.item()] for i in idx]
    
In [33]:
    
tagger = CharWordLSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, word_to_ix, char_to_ix, tag_to_ix)
tagger.to(device)
loss_func = nn.NLLLoss()
optimizer = optim.SGD(params=tagger.parameters(), lr=0.5)
print(tagger(training_data[0][0]))
for e in range(200):
    for seq, tags in training_data:
        y_gold = prepare_sequence(tags, tag_to_ix)
        tagger.zero_grad()
        tagger.hidden = tagger.init_hidden()
        
        y_pred = tagger(seq)
        loss = loss_func(y_pred, y_gold)
        
        loss.backward()
        optimizer.step()
xseq = training_data[0][0]
print(xseq)
print(tagger.tag(xseq))
print(tagger(xseq))
    
    
In [ ]: