Linear regression from scratch

Generating random data


In [1]:
from __future__ import print_function
import matplotlib.pyplot as plt
from tqdm import tqdm
import mxnet as mx
from mxnet import gluon

In [2]:
data_ctx = mx.cpu()
model_ctx = mx.cpu()

In [3]:
num_inputs = 2
num_outputs = 1
num_examples = 10000

In [4]:
w1_true = 2
w2_true = -3.4
b_true = 4.2

In [5]:
# Defining an example function that parameters we are trying to find
def real_fn(X):
    # Equation
    # 2 * x1 -3.4 * x2 + 4.2
    return w1_true * X[:, 0] + w2_true * X[:, 1] + b_true

In [6]:
# Generating random X
X = mx.nd.random_normal(shape=(num_examples, num_inputs), 
                     ctx=data_ctx)

In [7]:
# Generating random noise
noise = 0.1 * mx.nd.random_normal(shape=(num_examples, ), ctx=data_ctx)

In [8]:
noise.shape


Out[8]:
(10000,)

In [9]:
# Generating Y
y = real_fn(X) + noise

In [10]:
print(X[0])
print(y[0])


[2.2122064 0.7740038]
<NDArray 2 @cpu(0)>

[6.053678]
<NDArray 1 @cpu(0)>

In [11]:
print(w1_true * X[0, 0] - w2_true * X[0, 1] + b_true)


[11.256025]
<NDArray 1 @cpu(0)>

Data iterator


In [12]:
# Defining batch_size
batch_size = 4

In [13]:
# Creating a data iterator
train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y),
                                   batch_size=batch_size, 
                                   shuffle=True)

In [14]:
# Getting a single batch
for i, (data, label) in enumerate(train_data):
    print(data, label)
    break


[[-0.03586762 -0.72321445]
 [ 0.39837354  1.3839029 ]
 [-0.05032287 -0.19343433]
 [-0.44942716  1.4952304 ]]
<NDArray 4x2 @cpu(0)> 
[ 6.5313764   0.44909072  4.9427285  -1.7898526 ]
<NDArray 4 @cpu(0)>

In [15]:
# When shuffle=True, each time the batch will be different
for i, (data, label) in enumerate(train_data):
    print(data, label)
    break


[[-0.38888022 -0.5888279 ]
 [-0.04182246  0.17726438]
 [ 0.34541106 -0.33166552]
 [ 1.3457807   0.4406432 ]]
<NDArray 4x2 @cpu(0)> 
[5.398409  3.646068  6.02336   5.6694674]
<NDArray 4 @cpu(0)>

In [16]:
# 10000 samples batches into 4 samples per batch yields 2500 batches
counter = 0
for i, (data, label) in enumerate(train_data):
    pass
print(i + 1)


2500

Defining the model

Defining model parameters


In [17]:
w = mx.nd.random_normal(shape=(num_inputs, num_outputs), 
                        ctx=model_ctx)
b = mx.nd.random_normal(shape=num_outputs, 
                        ctx=model_ctx)
params = [w, b]

In [18]:
w.shape


Out[18]:
(2, 1)

In [19]:
b.shape


Out[19]:
(1,)

In [20]:
# Attaching gradients
for param in params:
    param.attach_grad()

In [21]:
# Defining network
def net(X):
    return mx.nd.dot(X, w) + b

In [22]:
# Defining the loss function
def square_loss(yhat, y):
    return mx.nd.mean((yhat - y) ** 2)

In [23]:
# Defining Stochastic Gradient Descent
def SGD(params, lr):
    for param in params:
        param[:] = param - lr * param.grad

Training


In [24]:
# Definint training parameters
epochs = 10
learning_rate = .0001

In [25]:
num_batches = num_examples / batch_size
num_batches


Out[25]:
2500.0

In [26]:
for e in range(epochs):
    cumulative_loss = 0
    # Batch training
    for i, (data, label) in tqdm(enumerate(train_data), ascii=True):
        data = data.as_in_context(model_ctx)
        label = label.as_in_context(model_ctx).reshape((-1, 1))
        with mx.autograd.record():
            output = net(data)
            loss = square_loss(output, label)
        loss.backward()
        # Applying the change
        SGD(params, learning_rate)
        cumulative_loss += loss.asscalar()
    print('Epoch: {}'.format(e))
    print(cumulative_loss / num_batches)


2500it [00:04, 575.26it/s]
45it [00:00, 448.44it/s]
Epoch: 0
17.787389125978947
2500it [00:04, 565.95it/s]
39it [00:00, 388.39it/s]
Epoch: 1
6.528129469433427
2500it [00:04, 564.98it/s]
44it [00:00, 433.83it/s]
Epoch: 2
2.399974269490689
2500it [00:04, 507.98it/s]
43it [00:00, 429.16it/s]
Epoch: 3
0.8864629945185035
2500it [00:04, 524.35it/s]
37it [00:00, 366.68it/s]
Epoch: 4
0.3314312654912472
2500it [00:04, 537.91it/s]
42it [00:00, 418.91it/s]
Epoch: 5
0.1278467336665839
2500it [00:04, 557.15it/s]
45it [00:00, 442.21it/s]
Epoch: 6
0.05314170874669216
2500it [00:04, 500.69it/s]
40it [00:00, 394.40it/s]
Epoch: 7
0.025746813264163212
2500it [00:04, 532.79it/s]
38it [00:00, 378.55it/s]
Epoch: 8
0.01570879513991531
2500it [00:04, 519.32it/s]
Epoch: 9
0.012016978109814226


In [27]:
print('True values:')
print(w1_true)
print(w2_true)
print(b_true)


True values:
2
-3.4
4.2

In [28]:
w1_predicted = params[0][0]
w2_predicted = params[0][1]
b1_predicted = params[1][0]

In [29]:
print('Predicted values:')
print(w1_predicted.asscalar())
print(w2_predicted.asscalar())
print(b1_predicted.asscalar())


Predicted values:
1.9835123
-3.3783004
4.1766834