In [1]:
%matplotlib inline
from fastai.basics import *
In [2]:
n = 100
x = torch.ones(n,2)
x[:,0].uniform_(-1.,1)
x[:5]
Out[2]:
In [3]:
x[:,0].uniform_(-1.,1)
Out[3]:
In [4]:
x[...,0].uniform_(-1.,1)
Out[4]:
In [6]:
a = tensor(3.,2); a
Out[6]:
In [7]:
y = x@a + torch.rand(n)
In [8]:
plt.scatter(x[:,0],y);
In [9]:
def mse(y_hat, y,): return ((y_hat - y)**2).mean()
In [10]:
a = tensor(-1.,1)
In [11]:
y_hat = x@a
In [16]:
plt.scatter(x[:,0],y), plt.scatter(x[:,0],y_hat);
In [17]:
mse(y_hat, y)
Out[17]:
In [18]:
a = nn.Parameter(a); a
Out[18]:
In [19]:
def update():
y_hat = x@a
loss = mse(y, y_hat)
if t % 10 == 0: print(loss)
loss.backward()
with torch.no_grad():
a.sub_(lr * a.grad)
a.grad.zero_()
In [20]:
lr = 1e-1
for t in range(100): update()
In [23]:
plt.scatter(x[:,0],y), plt.scatter(x[:,0], x@a);
In [24]:
a
Out[24]:
In [25]:
from matplotlib import animation, rc
rc('animation', html='html5')
In [36]:
a = nn.Parameter(tensor(-1.,1))
fig = plt.figure()
plt.scatter(x[:,0], y, c='orange')
line, = plt.plot(x[:,0], x@a)
plt.close()
In [37]:
def animate(i):
update()
line.set_ydata(x@a)
return line,
In [38]:
animation.FuncAnimation(fig, animate, np.arange(0,100), interval=20)
Out[38]:
the only difference between SGD and this (GD) is minibatches. We calculated the value of the loss on the whole dataset on each iteration. SGD grabs a minibatch at random, calculates the loss on them and updates the weights, and continues.
So in update, the lines:
y_hat = x@a
loss = mse(y, y_hat)
instead looks more like:
y_hat = x[rand_idx]@a
loss = mse(y[rand_idx], y_hat)
The architecture is the mathematical function you're trying fitting the parameters to (a bunch of matrix products). The architecture and model are almost the same thing.
In [ ]: