In [12]:
    
import numpy as np
import matplotlib.pyplot as plt
x = [1, 5, 10, 10, 25, 50, 70, 75, 100]
y = [0, 0, 0, 0, 0, 1, 1, 1, 1]
colors = np.random.rand(len(x))
plt.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1))(np.unique(x)))
plt.ylabel('Fever')
plt.xlabel('Temperature')
plt.scatter(x, y, c=colors, alpha=0.5)
plt.show()
    
    
In [13]:
    
import math
print(- math.log(1 - 0.00001))
print(- math.log(1 - 0.99999))
    
    
In [14]:
    
print(math.log(0.999999))
print(math.log(0.000001))
    
    
In [15]:
    
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
    
In [37]:
    
train_dataset = dsets.MNIST(root='./data',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)
    
In [38]:
    
type(train_dataset)
    
    Out[38]:
In [39]:
    
len(train_dataset)
    
    Out[39]:
In [40]:
    
type(train_dataset[0])
    
    Out[40]:
In [41]:
    
train_dataset[0][0].size()
    
    Out[41]:
In [42]:
    
train_dataset[0][1]
    
    Out[42]:
In [43]:
    
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
    
In [44]:
    
train_dataset[0][0].numpy().shape
    
    Out[44]:
In [45]:
    
show_img = train_dataset[1][0].numpy().reshape(28, 28)
    
In [46]:
    
show_img.shape
    
    Out[46]:
In [47]:
    
plt.imshow(show_img, cmap='gray')
    
    Out[47]:
    
In [48]:
    
train_dataset[1][1]
    
    Out[48]:
In [49]:
    
test_dataset = dsets.MNIST(root='./data',
                           train=False,  # test data
                           transform=transforms.ToTensor())
    
In [50]:
    
len(test_dataset)
    
    Out[50]:
In [51]:
    
type(test_dataset[0])
    
    Out[51]:
In [52]:
    
test_dataset[0][0].size()
    
    Out[52]:
In [53]:
    
show_img = test_dataset[0][0].numpy().reshape(28, 28)
    
In [54]:
    
plt.imshow(show_img, cmap='gray')
    
    Out[54]:
    
In [55]:
    
test_dataset[0][1]
    
    Out[55]:
In [56]:
    
len(train_dataset)
    
    Out[56]:
In [57]:
    
batch_size = 100
n_iters = 3000
    
In [58]:
    
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
num_epochs
    
    Out[58]:
In [59]:
    
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
    
In [60]:
    
train_loader
    
    Out[60]:
In [62]:
    
import collections
isinstance(train_loader, collections.Iterable)
    
    Out[62]:
In [70]:
    
isinstance([1, 2, 3], collections.Iterable)
    
    Out[70]:
In [65]:
    
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)
    
In [66]:
    
isinstance(test_loader, collections.Iterable)
    
    Out[66]:
In [71]:
    
img_1 = np.ones((28, 28))
img_2 = np.ones((28, 28))
lst = [img_1, img_2]
    
In [73]:
    
for i in lst:
    print(i.shape)
    
    
In [87]:
    
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        out = self.linear(x)
        return out
    
In [88]:
    
train_dataset[0][0].size()
    
    Out[88]:
In [89]:
    
input_dim = 28 * 28
output_dim = 10
model = LogisticRegressionModel(input_dim, output_dim)
    
In [90]:
    
model
    
    Out[90]:
In [91]:
    
criterion = nn.CrossEntropyLoss()
    
In [92]:
    
learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    
In [94]:
    
print(model.parameters())
    
    
In [96]:
    
len(list(model.parameters()))
    
    Out[96]:
In [100]:
    
list(model.parameters())[0].size()
    
    Out[100]:
In [103]:
    
list(model.parameters())[1].size()
    
    Out[103]:
In [106]:
    
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1, 28 * 28))
        labels = Variable(labels)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iter += 1
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, 28 * 28))
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
    
    
In [118]:
    
iter_test = 0
for images, labels in test_loader:
    iter_test += 1
    images = Variable(images.view(-1, 28 * 28))
    outputs = model(images)
    _, predicted = torch.max(outputs.data, dim=1)
    if iter_test == 1:
        print('PREDICTION')
        print(predicted[2])
        
        print('LABEL SIZE')
        print(labels.size())
        
        print('LABEL FOR IMAGE 0')
        print(labels[2])
    
    
In [119]:
    
correct = 0
total = 0
iter_test = 0
for images, labels in test_loader:
    iter_test += 1
    images = Variable(images.view(-1, 28 * 28))
    outputs = model(images)
    _, predicted = torch.max(outputs.data, dim=1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
accuracy = 100 * (correct / total)
print(accuracy)
    
    
In [121]:
    
import numpy as np
a = np.ones((10))
print(a)
b = np.ones((10))
print(b)
print(a == b)
print((a == b).sum())
    
    
In [122]:
    
save_model = False
if save_model is True:
    torch.save(model.state_dict(), 'awesome_model.pkl')
    
In [127]:
    
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
train_dataset = dsets.MNIST(root='./data',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)
test_dataset = dsets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())
batch_size = 100
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        out = self.linear(x)
        return out
input_dim = 28 * 28
output_dim = 10
model = LogisticRegressionModel(input_dim, output_dim)
if torch.cuda.is_available():
    model.cuda()
criterion = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.view(-1, 28 * 28).cuda())
            labels = Variable(labels)
        else:
            images = Variable(images.view(-1, 28 * 28))
            labels = Variable(labels)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iter += 1
        
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                if torch.cuda.is_available():
                    images = Variable(images.view(-1, 28 * 28).cuda())
                else:
                    images = Variable(images.view(-1, 28 * 28))
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
    
    
In [128]:
    
import torch
import torch.nn as nn
    
In [129]:
    
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        out = self.linear(x)
        return out
    
In [130]:
    
input_dim = 28 * 28
output_dim = 10
model = LogisticRegressionModel(input_dim, output_dim)
    
In [131]:
    
model
    
    Out[131]:
In [132]:
    
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
    
In [133]:
    
train_dataset = dsets.MNIST(root='./data',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)
    
In [134]:
    
test_dataset = dsets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())
    
In [135]:
    
len(train_dataset)
    
    Out[135]:
In [136]:
    
len(test_dataset)
    
    Out[136]:
In [139]:
    
batch_size = 100
n_iters = 3000
num_epochs = int(n_iters / (len(train_dataset) / batch_size))
    
In [140]:
    
num_epochs
    
    Out[140]:
In [141]:
    
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
    
In [142]:
    
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)
    
In [145]:
    
len(train_loader)
    
    Out[145]:
In [180]:
    
class FeedforwardNeuralNetModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FeedforwardNeuralNetModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out
    
In [181]:
    
input_dim = 28 * 28
hidden_dim = 100
output_dim = 10
model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
    
In [182]:
    
model
    
    Out[182]:
In [183]:
    
criterion = nn.CrossEntropyLoss()
    
In [184]:
    
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    
In [185]:
    
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1, 28 * 28))
        labels = Variable(labels)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iter += 1
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, 28 * 28))
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
    
    
In [186]:
    
class FeedforwardNeuralNetModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FeedforwardNeuralNetModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        
        out = self.fc3(out)
        return out
    
In [188]:
    
input_dim = 28 * 28
hidden_dim = 100
output_dim = 10
model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
print(model)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1, 28 * 28))
        labels = Variable(labels)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iter += 1
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, 28 * 28))
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
    
    
In [189]:
    
class FeedforwardNeuralNetModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FeedforwardNeuralNetModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        
        out = self.fc3(out)
        out = self.relu3(out)
        
        out = self.fc4(out)
        return out
    
In [193]:
    
input_dim = 28 * 28
hidden_dim = 100
output_dim = 10
model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
print(model)
if torch.cuda.is_available():
    model.cuda()
criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.view(-1, 28 * 28).cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images.view(-1, 28 * 28))
            labels = Variable(labels)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iter += 1
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                if torch.cuda.is_available():
                    images = Variable(images.view(-1, 28 * 28).cuda())
                else:
                    images = Variable(images.view(-1, 28 * 28))
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
    
    
In [194]:
    
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
    
In [195]:
    
train_dataset = dsets.MNIST(root='./data',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)
test_dataset = dsets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())
    
In [203]:
    
train_dataset.train_data.size()
    
    Out[203]:
In [207]:
    
train_dataset.train_labels.size()
    
    Out[207]:
In [205]:
    
test_dataset.test_data.size()
    
    Out[205]:
In [208]:
    
test_dataset.test_labels.size()
    
    Out[208]:
In [210]:
    
batch_size = 100
n_iters = 3000
num_epochs = int(n_iters / (len(train_dataset) / batch_size))
    
In [211]:
    
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)
    
In [214]:
    
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        self.fc1 = nn.Linear(32 * 7 * 7, 10)
    
    def forward(self, x):
        out = self.cnn1(x)
        out = self.relu1(out)
        out = self.maxpool1(out)
        
        out = self.cnn2(out)
        out = self.relu2(out)
        out = self.maxpool2(out)
        
        out = out.view(out.size(0), -1)
        
        out = self.fc1(out)
        
        return out
    
In [215]:
    
model = CNNModel()
    
In [216]:
    
model
    
    Out[216]:
In [223]:
    
len(list(model.parameters()))
    
    Out[223]:
In [222]:
    
list(model.parameters())[0].size()
    
    Out[222]:
In [224]:
    
list(model.parameters())[1].size()
    
    Out[224]:
In [225]:
    
list(model.parameters())[2].size()
    
    Out[225]:
In [226]:
    
list(model.parameters())[3].size()
    
    Out[226]:
In [227]:
    
list(model.parameters())[4].size()
    
    Out[227]:
In [228]:
    
list(model.parameters())[5].size()
    
    Out[228]:
In [217]:
    
criterion = nn.CrossEntropyLoss()
    
In [230]:
    
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images)
        labels = Variable(labels)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iter += 1
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
    
    
In [231]:
    
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()
        self.avgpool1 = nn.AvgPool2d(kernel_size=2)
        
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()
        self.avgpool2 = nn.AvgPool2d(kernel_size=2)
        
        self.fc1 = nn.Linear(32 * 7 * 7, 10)
    
    def forward(self, x):
        out = self.cnn1(x)
        out = self.relu1(out)
        out = self.avgpool1(out)
        
        out = self.cnn2(out)
        out = self.relu2(out)
        out = self.avgpool2(out)
        
        out = out.view(out.size(0), -1)
        
        out = self.fc1(out)
        
        return out
model = CNNModel()
criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images)
        labels = Variable(labels)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iter += 1
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
    
    
In [233]:
    
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        self.fc1 = nn.Linear(32 * 4 * 4, 10)
    
    def forward(self, x):
        out = self.cnn1(x)
        out = self.relu1(out)
        out = self.maxpool1(out)
        
        out = self.cnn2(out)
        out = self.relu2(out)
        out = self.maxpool2(out)
        
        out = out.view(out.size(0), -1)
        
        out = self.fc1(out)
        
        return out
model = CNNModel()
if torch.cuda.is_available():
    model.cuda()
criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images)
            labels = Variable(labels)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iter += 1
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                if torch.cuda.is_available():
                    images = Variable(images.cuda())
                else:
                    images = Variable(images)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
    
    
In [1]:
    
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
    
In [2]:
    
train_dataset = dsets.MNIST(root='./data',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)
test_dataset = dsets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())
    
In [3]:
    
print(train_dataset.train_data.size())
print(train_dataset.train_labels.size())
print(test_dataset.test_data.size())
print(test_dataset.test_labels.size())
    
    
In [45]:
    
batch_size = 100
n_iters = 3000
num_epochs = int(n_iters / (len(train_dataset) / batch_size))
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)
    
In [79]:
    
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='tanh')
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        # h0は (layer, batch, hidden) の順
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        # 出力は (batch, seq, features) - batch_first=Trueの場合
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out
    
In [80]:
    
input_dim = 28
hidden_dim = 100
layer_dim = 2
output_dim = 10
    
In [81]:
    
model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)
    
In [82]:
    
model
    
    Out[82]:
In [83]:
    
criterion = nn.CrossEntropyLoss()
    
In [84]:
    
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    
In [71]:
    
len(list(model.parameters()))
    
    Out[71]:
In [72]:
    
list(model.parameters())[0].size()
    
    Out[72]:
In [73]:
    
list(model.parameters())[2].size()
    
    Out[73]:
In [74]:
    
list(model.parameters())[1].size()
    
    Out[74]:
In [75]:
    
list(model.parameters())[3].size()
    
    Out[75]:
In [76]:
    
list(model.parameters())[4].size()
    
    Out[76]:
In [77]:
    
list(model.parameters())[5].size()
    
    Out[77]:
In [85]:
    
seq_dim = 28
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1, seq_dim, input_dim))
        labels = Variable(labels)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iter += 1
        
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, seq_dim, input_dim))
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
    
    
In [88]:
    
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        #self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='tanh')
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))  # hidden state
        c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))  # cell state
        
        # 28 time steps
        out, (hn, cn) = self.lstm(x, (h0, c0))
        
        # out.size() ==> 100, 28, 100
        out = self.fc(out[:, -1, :])
        return out
    
In [89]:
    
input_dim = 28
hidden_dim = 100
layer_dim = 1
output_dim = 10
model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    
In [90]:
    
len(list(model.parameters()))
    
    Out[90]:
In [91]:
    
for i in range(len(list(model.parameters()))):
    print(list(model.parameters())[i].size())
    
    
In [92]:
    
seq_dim = 28
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1, seq_dim, input_dim))
        labels = Variable(labels)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iter += 1
        
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, seq_dim, input_dim))
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
    
    
In [ ]: