In [1]:
##########################
### DATASET
##########################

import numpy as np

data = np.genfromtxt('toydata.txt', delimiter='\t')
X, y = data[:, :2], data[:, 2]
y = y.astype(np.int)

print('Class label counts:', np.bincount(y))
print('X.shape:', X.shape)
print('y.shape:', y.shape)

# Shuffling & train/test split
shuffle_idx = np.arange(y.shape[0])
shuffle_rng = np.random.RandomState(123)
shuffle_rng.shuffle(shuffle_idx)
X, y = X[shuffle_idx], y[shuffle_idx]

X_train, X_test = X[shuffle_idx[:70]], X[shuffle_idx[70:]]
y_train, y_test = y[shuffle_idx[:70]], y[shuffle_idx[70:]]

# Normalize (mean zero, unit variance)
mu, sigma = X_train.mean(axis=0), X_train.std(axis=0)
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma


Class label counts: [50 50]
X.shape: (100, 2)
y.shape: (100,)

Low-level implementation with manual gradients


In [2]:
import torch
import torch.nn.functional as F


def custom_where(cond, x_1, x_2):
    return (cond * x_1) + ((1-cond) * x_2)


class LogisticRegression1():
    def __init__(self, num_features):
        self.num_features = num_features
        self.weights = torch.zeros(num_features, 1)
        self.bias = torch.zeros(1)

    def forward(self, x):
        linear = torch.add(torch.mm(x, self.weights), self.bias)
        probas = self._sigmoid(linear)
        return probas
        
    def backward(self, x, y):  
        probas = self.forward(x)
        errors = y - probas.view(-1)
        return errors
            
    def predict_labels(self, x):
        probas = self.forward(x)
        labels = custom_where(probas >= .5, 1, 0)
        return labels    
            
    def evaluate(self, x, y):
        labels = self.predict_labels(x).float()
        accuracy = torch.sum(labels.view(-1) == y) / y.size()[0]
        return accuracy
    
    def _sigmoid(self, z):
        return 1. / (1. + torch.exp(-z))
    
    def _logit_cost(self, y, proba):
        tmp1 = torch.mm(-y.view(1, -1), torch.log(proba))
        tmp2 = torch.mm((1 - y).view(1, -1), torch.log(1 - proba))
        return tmp1 - tmp2
    
    def train(self, x, y, num_epochs, learning_rate=0.01):
        for e in range(num_epochs):
            
            print('Epoch: %03d' % (e+1), end="")
            errors = self.backward(x, y)
            neg_grad = torch.mm(x.transpose(0, 1), errors.view(-1, 1))
            self.weights += learning_rate * neg_grad
            self.bias += learning_rate * torch.sum(errors)
            print(' | Train ACC: %.3f' % self.evaluate(x, y), end="")
            print(' | Cost: %.3f' % self._logit_cost(y, self.forward(x)))

In [3]:
logr = LogisticRegression1(num_features=2)
X_train_tensor, y_train_tensor = torch.Tensor(X_train), torch.Tensor(y_train)
logr.train(X_train_tensor, y_train_tensor, num_epochs=10, learning_rate=0.1)

print('\nModel parameters:')
print('  Weights: %s' % logr.weights)
print('  Bias: %s' % logr.bias)


Epoch: 001 | Train ACC: 0.971 | Cost: 4.986
Epoch: 002 | Train ACC: 0.971 | Cost: 4.469
Epoch: 003 | Train ACC: 0.971 | Cost: 4.114
Epoch: 004 | Train ACC: 0.971 | Cost: 3.862
Epoch: 005 | Train ACC: 0.971 | Cost: 3.673
Epoch: 006 | Train ACC: 0.971 | Cost: 3.525
Epoch: 007 | Train ACC: 0.971 | Cost: 3.404
Epoch: 008 | Train ACC: 0.971 | Cost: 3.301
Epoch: 009 | Train ACC: 0.971 | Cost: 3.210
Epoch: 010 | Train ACC: 0.986 | Cost: 3.128

Model parameters:
  Weights: 
 3.5362
 3.0126
[torch.FloatTensor of size 2x1]

  Bias: 
-1.4038
[torch.FloatTensor of size 1]


In [4]:
X_test_tensor, y_test_tensor = torch.Tensor(X_test), torch.Tensor(y_test)

test_acc = logr.evaluate(X_test_tensor, y_test_tensor)
print('Test set accuracy: %.3f%%' % (test_acc*100))


Test set accuracy: 100.000%

Low-level implementation using autograd


In [5]:
dtype = torch.FloatTensor
# dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU

In [6]:
import torch
import torch.nn.functional as F
from torch.autograd import Variable


def custom_where(cond, x_1, x_2):
    return (cond * x_1) + ((1-cond) * x_2)


class LogisticRegression2():
    def __init__(self, num_features):
        self.num_features = num_features
        self.weights = Variable(torch.zeros(num_features, 1).type(dtype),
                                requires_grad=True)
        self.bias = Variable(torch.zeros(1).type(dtype),
                             requires_grad=True)

    def forward(self, x):
        linear = torch.add(torch.mm(x, self.weights), self.bias)
        probas = self._sigmoid(linear)
        return probas
                    
    def predict_labels(self, x):
        probas = self.forward(x)
        labels = custom_where((probas >= .5).float(), 1, 0)
        return labels    
            
    def evaluate(self, x, y):
        labels = self.predict_labels(x)
        accuracy = (torch.sum(labels.view(-1) == y.view(-1))).float() / y.size()[0]
        return accuracy
    
    def _sigmoid(self, z):
        return 1. / (1. + torch.exp(-z))
    
    def _logit_cost(self, y, proba):
        tmp1 = torch.mm(-y.view(1, -1), torch.log(proba))
        tmp2 = torch.mm((1 - y).view(1, -1), torch.log(1 - proba))
        return tmp1 - tmp2
    
    def train(self, x, y, num_epochs, learning_rate=0.01):
        
        x_var = Variable(x.type(dtype), requires_grad=False)
        y_var = Variable(y.type(dtype), requires_grad=False)
        
        for e in range(num_epochs):
            
            print('Epoch: %03d' % (e+1), end="")
            proba = self.forward(x_var)
            cost = self._logit_cost(y_var, proba)
            cost.backward()
            
            self.weights.data -= learning_rate * self.weights.grad.data
            self.bias.data -= learning_rate * self.bias.grad.data
            
            print(' | Train ACC: %.3f' % self.evaluate(x_var, y_var), end="")
            print(' | Cost: %.3f' % self._logit_cost(y_var, self.forward(x_var)))
            
            self.weights.grad.data.zero_()
            self.bias.grad.data.zero_()

In [7]:
logr = LogisticRegression2(num_features=2)
X_train_tensor, y_train_tensor = torch.Tensor(X_train), torch.Tensor(y_train)
logr.train(X_train_tensor, y_train_tensor, num_epochs=10, learning_rate=0.1)

print('\nModel parameters:')
print('  Weights: %s' % logr.weights)
print('  Bias: %s' % logr.bias)


Epoch: 001 | Train ACC: 0.971 | Cost: 4.986
Epoch: 002 | Train ACC: 0.971 | Cost: 4.469
Epoch: 003 | Train ACC: 0.971 | Cost: 4.114
Epoch: 004 | Train ACC: 0.971 | Cost: 3.862
Epoch: 005 | Train ACC: 0.971 | Cost: 3.673
Epoch: 006 | Train ACC: 0.971 | Cost: 3.525
Epoch: 007 | Train ACC: 0.971 | Cost: 3.404
Epoch: 008 | Train ACC: 0.971 | Cost: 3.301
Epoch: 009 | Train ACC: 0.971 | Cost: 3.210
Epoch: 010 | Train ACC: 0.986 | Cost: 3.128

Model parameters:
  Weights: Variable containing:
 3.5362
 3.0126
[torch.FloatTensor of size 2x1]

  Bias: Variable containing:
-1.4038
[torch.FloatTensor of size 1]


In [8]:
X_test_var = Variable(torch.Tensor(X_test).type(dtype), requires_grad=False)
y_test_var = Variable(torch.Tensor(y_test).type(dtype), requires_grad=False)

test_acc = logr.evaluate(X_test_var, y_test_var)
print('Test set accuracy: %.2f%%' % (test_acc*100))


Test set accuracy: 100.00%

High-level Module API


In [9]:
class LogisticRegression3(torch.nn.Module):

    def __init__(self, num_features):
        super(LogisticRegression3, self).__init__()
        self.linear = torch.nn.Linear(num_features, 1)
        # initialize weights to zeros here:
        self.linear.weight.data.zero_()
        self.linear.bias.data.zero_()
        
    def forward(self, x):
        logits = self.linear(x)
        probas = F.sigmoid(logits)
        return probas

model = LogisticRegression3(num_features=2)

In [10]:
cost_fn = torch.nn.BCELoss(size_average=False)
# set size_average=False to match the manual approach
#   since there, we didn't normalize the loss by the 
#   number of training examples

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [11]:
def comp_accuracy(label_var, pred_probas):
    pred_labels = custom_where((pred_probas > 0.5).float(), 1, 0).view(-1)
    acc = torch.sum(pred_labels == label_var.view(-1)).float() / label_var.size(0)
    return acc


num_epochs = 10

X_train_var = Variable(torch.Tensor(X_train), requires_grad=False)
y_train_var = Variable(torch.Tensor(y_train), requires_grad=False).view(-1, 1)


for epoch in range(num_epochs):

    out = model(X_train_var)
    cost = cost_fn(out, y_train_var)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    print('Epoch: %03d' % (epoch + 1), end="")
    pred_probas = model(X_train_var)
    acc = comp_accuracy(y_train_var, pred_probas)
    print(' | Train ACC: %.3f' % acc, end="")
    print(' | Cost: %.3f' % cost_fn(pred_probas, y_train_var))


    
print('\nModel parameters:')
print('  Weights: %s' % model.linear.weight.data)
print('  Bias: %s' % model.linear.bias.data)


Epoch: 001 | Train ACC: 0.971 | Cost: 4.986
Epoch: 002 | Train ACC: 0.971 | Cost: 4.469
Epoch: 003 | Train ACC: 0.971 | Cost: 4.114
Epoch: 004 | Train ACC: 0.971 | Cost: 3.862
Epoch: 005 | Train ACC: 0.971 | Cost: 3.673
Epoch: 006 | Train ACC: 0.971 | Cost: 3.525
Epoch: 007 | Train ACC: 0.971 | Cost: 3.404
Epoch: 008 | Train ACC: 0.971 | Cost: 3.301
Epoch: 009 | Train ACC: 0.971 | Cost: 3.210
Epoch: 010 | Train ACC: 0.986 | Cost: 3.128

Model parameters:
  Weights: 
 3.5362  3.0126
[torch.FloatTensor of size 1x2]

  Bias: 
-1.4038
[torch.FloatTensor of size 1]


In [12]:
X_test_var = Variable(torch.Tensor(X_test).type(dtype), requires_grad=False)
y_test_var = Variable(torch.Tensor(y_test).type(dtype), requires_grad=False)

pred_probas = model(X_test_var)
test_acc = comp_accuracy(y_test_var, pred_probas)

print('Test set accuracy: %.2f%%' % (test_acc*100))


Test set accuracy: 100.00%