This is the sixth (and final) toy example from Jason Brownlee's Long Short Term Memory Networks with Python. We will train a network to generate rectangles. Per section 11.2 of the book:
We can frame the problem of generating random shapes as a sequence generation problem. We can take drawing a rectangle as a sequence of points in the clockwise direction with 4 points in two-dimensional space. So to generatate a rectangle given by the points [BL, BR, TR, TL], the input sequence is [BL, BR, TR] and corresponding output sequence at these timesteps is [BR', TR', TL'].
Each point can be taken as one time step, with each of the x and y axes representing separate features. Starting from 0,0, the task is to draw the remaining 3 points of the rectangle with consistent widths and heights. We will frame this problem as a one-coordinate generation problem, e.g. a one-to-one sequence prediction problem. Given a coordinate, predict the next coordinate. Then given the coordinate predicted at the last time step, predict the next coordinate, and so on.
In [1]:
from __future__ import division, print_function
from sklearn.metrics import accuracy_score, confusion_matrix
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import matplotlib as mat
import matplotlib.pyplot as plt
import os
import shutil
%matplotlib inline
In [2]:
DATA_DIR = "../../data"
MODEL_FILE = os.path.join(DATA_DIR, "torch-11-shape-gen.model")
TRAIN_SIZE = 25000
# model expects 1 timestep input with (x, y) as features
# each batch is sequence of 3 input => 3 output coordinates
# we train for TRAIN_SIZE epochs
SEQ_LENGTH = 1
EMBED_SIZE = 2
BATCH_SIZE = 3
LEARNING_RATE = 1e-3
Coordinates corresponding to random rectangles are generated using rules for training the network. Data is generated in (num_epoch, num_batches, sequence_length, num_features) format. This is because we are training the network to generate one coordinate at a time, so each batch corresponds to a single rectangle (3 moves).
In [3]:
def draw_rectangles(points_seq):
fig, ax = plt.subplots()
polygons = []
for points in points_seq:
polygons.append(mat.patches.Polygon(points, closed=True, fill=True))
patches = mat.collections.PatchCollection(polygons, cmap=mat.cm.jet, alpha=0.4)
colors = 100 * np.random.rand(len(polygons))
patches.set_array(np.array(colors))
ax.add_collection(patches)
plt.show()
def random_rectangle():
bl = (0, 0)
br = (np.random.random(1), bl[1])
tr = (br[0], np.random.random(1))
tl = (0, tr[1])
return [bl, br, tr, tl]
rectangles = []
for i in range(5):
rectangles.append(random_rectangle())
draw_rectangles(rectangles)
In [4]:
def generate_data(num_recs):
xseq, yseq = [], []
for i in range(num_recs):
rect = random_rectangle()
xseq.append(np.array(rect[0:-1]))
yseq.append(np.array(rect[1:]))
X = np.expand_dims(np.array(xseq, dtype=np.float32), 2)
Y = np.expand_dims(np.array(yseq, dtype=np.float32), 2)
return X, Y
Xtrain, Ytrain = generate_data(TRAIN_SIZE)
print(Xtrain.shape, Ytrain.shape)
In [5]:
class ShapeGenerator(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super(ShapeGenerator, self).__init__()
self.hidden_dim = hidden_dim
# network layers
self.lstm = nn.LSTM(input_dim, hidden_dim, 1, batch_first=True)
self.fcn = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
if torch.cuda.is_available():
h = (Variable(torch.randn(1, x.size(0), self.hidden_dim).cuda()),
Variable(torch.randn(1, x.size(0), self.hidden_dim).cuda()))
else:
h = (Variable(torch.randn(1, x.size(0), self.hidden_dim)),
Variable(torch.randn(1, x.size(0), self.hidden_dim)))
x, h = self.lstm(x, h)
x = self.fcn(x)
return x
model = ShapeGenerator(EMBED_SIZE, 10, EMBED_SIZE)
if torch.cuda.is_available():
model.cuda()
print(model)
# size debugging
print("--- size debugging ---")
inp = Variable(torch.randn(BATCH_SIZE, SEQ_LENGTH, EMBED_SIZE))
outp = model(inp)
print(outp.size())
In [6]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
In [7]:
history = []
total_loss, prev_loss = 0., None
for epoch in range(TRAIN_SIZE):
Xbatch_data = Xtrain[epoch]
Ybatch_data = Ytrain[epoch]
Xbatch = Variable(torch.from_numpy(Xbatch_data).float())
Ybatch = Variable(torch.from_numpy(Ybatch_data).float())
if torch.cuda.is_available():
Xbatch = Xbatch.cuda()
Ybatch = Ybatch.cuda()
# initialize gradients
optimizer.zero_grad()
# forward
Ybatch_ = model(Xbatch)
loss = loss_fn(Ybatch_, Ybatch)
loss_data = loss.data[0]
total_loss += loss_data
# backward
loss.backward()
if epoch > 0 and (epoch % (TRAIN_SIZE // 10) == 0 or epoch == TRAIN_SIZE - 1):
cum_avg_loss = total_loss / epoch
history.append((epoch, cum_avg_loss))
if prev_loss is not None and prev_loss > cum_avg_loss:
torch.save(model.state_dict(), MODEL_FILE)
prev_loss = cum_avg_loss
print("Iteration {:5d}, cum. avg. loss: {:.3f}".format(epoch, cum_avg_loss))
In [8]:
plt.title("Loss")
epochs = [x[0] for x in history]
losses = [x[1] for x in history]
plt.plot(epochs, losses, color="r", label="train")
plt.legend(loc="best")
plt.show()
In [9]:
saved_model = ShapeGenerator(EMBED_SIZE, 10, EMBED_SIZE)
saved_model.load_state_dict(torch.load(MODEL_FILE))
if torch.cuda.is_available():
saved_model.cuda()
In [12]:
coord_seqs = []
for rect in range(5):
Xgen = None
coord_seq = [(0., 0.)]
for seq in range(3):
if Xgen is None:
Xgen_data = np.zeros((1, 1, 2), dtype=np.float32)
Xgen = Variable(torch.from_numpy(Xgen_data).float())
else:
Xgen = Ygen
Ygen = saved_model(Xgen)
if torch.cuda.is_available():
Ygen_data = Ygen.cpu().data.numpy()
else:
Ygen_data = Ygen.data.numpy()
coord_seq.append((Ygen_data[0, 0, 0], Ygen_data[0, 0, 1]))
coord_seqs.append(coord_seq)
draw_rectangles(coord_seqs)
In [13]:
os.remove(MODEL_FILE)
In [ ]: