First, we implement a gradient descent algorithm for reference, using numpy only
In [3]:
%matplotlib inline
import matplotlib.pylab as plt
import time
from IPython import display
import numpy as np
#y = np.array([7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73])
y = np.array([8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68])
#y = np.array([9.14, 8.14, 8.74, 8.77, 9.26, 8.10, 6.13, 3.10, 9.13, 7.26, 4.74])
x = np.array([10., 8., 13., 9., 11., 14., 6., 4., 12., 7., 5.])
N = len(x)
# Design matrix
A = np.vstack((np.ones(N), x)).T
# Learning rate
eta = 0.01
# initial parameters
w = np.array([2., 1.])
for epoch in range(10):
# Error
err = y-A.dot(w)
# Average error
E = np.sum(err**2)/N
# Gradient
dE = -2.*A.T.dot(err)/N
if epoch%1 == 0:
print(epoch,':',E)
# print(w)
# Perfom one descent step
w = w - eta*dE
In [7]:
eta = 0.0005
w = np.array([2., 1.])
f = A.dot(w)
fig = plt.figure(figsize=(5,5))
ax = fig.gca()
ax.set_xlim((4,14))
ax.set_ylim((4,14))
ln = plt.Line2D(xdata=x, ydata=f, linestyle='-',linewidth=2)
ax.add_line(ln)
plt.plot(x,y,'bo', alpha=0.5, markersize=5)
for epoch in range(30):
f = A.dot(w)
err = y-f
ln.set_xdata(x)
ln.set_ydata(f)
E = np.sum(err**2)/N
dE = -2.*A.T.dot(err)/N
if epoch%1 == 0:
print(epoch,':',E)
# print(w)
w = w - eta*dE
display.clear_output(wait=True)
display.display(plt.gcf())
time.sleep(0.1)
In [19]:
%matplotlib inline
import matplotlib.pylab as plt
import numpy as np
import torch
import torch.autograd
from torch.autograd import Variable
x = np.array([10., 8., 13., 9., 11., 14., 6., 4., 12., 7., 5.])
yy = np.array([8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68])
y = Variable(torch.DoubleTensor(yy))
# Setup the feature (vandermonde) matrix
N = len(x)
degree = 3
xx = np.vstack((np.power(x,i) for i in range(degree+1))).T
# Create the system matrix
A = Variable(torch.from_numpy(xx).double())
w = Variable(torch.randn(degree+1).double(), requires_grad=True)
# learning rate
eta = 0.00000005
for epoch in range(1000):
## Compute the forward pass
f = torch.matmul(A, w)
#print(f)
E = torch.sum((y-f)**2)/N
if epoch%10000 == 0:
print(epoch,':',E.data[0])
# Compute the gradients by automated differentiation
E.backward()
# For each adjustable parameter
# Move along the negative gradient direction
w.data.add_(-eta * w.grad.data)
#print(w.grad.data)
# Reset the gradients, as otherwise they are accumulated in param.grad
w.grad.zero_()
print(epoch,':',E.data[0])
In [20]:
plt.plot(x, y.data.numpy().squeeze(),'o')
x2 = np.arange(3,15,0.5)
xx = np.vstack((np.power(x2,i) for i in range(degree+1))).T
AA = Variable(torch.from_numpy(xx).double())
f = torch.matmul(AA, w)
plt.plot(x2, f.data.numpy(),'r-')
plt.show()
In [5]:
import torch
import torch.autograd
from torch.autograd import Variable
## The rows correspond to examples and the columns to features.
## There is only one feature so the Tensors are actually just vectors
x = torch.FloatTensor([[10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5]]).transpose_(0,1)
#yy = [7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73]
yy = [8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68]
y = torch.FloatTensor([yy]).transpose_(0,1)
# This is a linear unit that implements the function f(x) = weight*x + bias
f = torch.nn.Linear(1, 1, bias=True)
# Set w_1
f.weight.data = torch.FloatTensor([[1.]])
# Set w_0
f.bias.data = torch.FloatTensor([[2.]])
# learning rate
eta = 0.01
# This is the error function E(x, y) = (1/N) \sum_{i=1}^N (x_i-y_i)^2
EuclidianLoss = torch.nn.MSELoss(size_average=True)
for epoch in range(10):
## Compute the forward pass
E = EuclidianLoss(f(Variable(x)), Variable(y))
if epoch%1 == 0:
print(epoch,':',E.data[0])
# print(f.bias.data.numpy())
# print(f.weight.data.numpy())
# Compute the gradients by automated differentiation
E.backward()
# For each adjustable parameter
# Move along the negative gradient direction
for param in f.parameters():
param.data.add_(-eta * param.grad.data)
# Reset the gradients, as otherwise they are accumulated in param.grad
f.zero_grad()
#print('Weights')
#print(f.weight.data, f.bias.data)
In [6]:
import torch
import torch.autograd
from torch.autograd import Variable
## The rows correspond to examples and the columns to features.
## There is only one feature so the Tensors are actually just vectors
x = np.array([10., 8., 13., 9., 11., 14., 6., 4., 12., 7., 5.])
#yy = [7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73]
yy = [8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68]
y = torch.DoubleTensor([yy]).transpose_(0,1)
# Setup the feature (vandermonde) matrix
N = len(x)
degree = 2
xx = np.vstack((np.power(x,i) for i in range(degree+1))).T
A = torch.from_numpy(xx)
# This is a linear unit that implements the function f(x) = weight*x + bias
f = torch.nn.Linear(degree+1, 1, bias=False).double()
# learning rate
eta = 0.00005
# This is the error function E(f, y) = (1/N) \sum_{i=1}^N (f_i-y_i)^2
EuclidianLoss = torch.nn.MSELoss(size_average=True)
for epoch in range(10000):
## Compute the forward pass
E = EuclidianLoss(f(Variable(A)), Variable(y))
if epoch%1000 == 0:
print(epoch,':',E.data[0])
# print(f.bias.data.numpy())
# print(f.weight.data.numpy())
# Compute the gradients by automated differentiation
E.backward()
# For each adjustable parameter
# Move along the negative gradient direction
for param in f.parameters():
param.data.add_(-eta * param.grad.data)
# Reset the gradients, as otherwise they are accumulated in param.grad
f.zero_grad()
print(epoch,':',E.data[0])
In [7]:
plt.plot(x, y.numpy(),'o')
x2 = np.arange(3,15,0.5)
xx = np.vstack((np.power(x2,i) for i in range(degree+1))).T
A = torch.from_numpy(xx)
plt.plot(x2, f(Variable(A)).data.numpy(),'r-')
plt.show()
In [8]:
import torch
from torch.autograd import Variable
x = Variable(torch.ones(2, 2), requires_grad=True)
print(x)
y = x + 2
print(y)
z = y * y * 3
out = z.mean()
print(z, out)
In [210]:
out.backward()
print(x.grad)
Calculating and plotting the the derivative of a function using autodiff
In [22]:
%matplotlib inline
import matplotlib.pylab as plt
x = Variable(torch.arange(-5,5,0.2), requires_grad=True)
#print(x)
#y = torch.sum(torch.sigmoid(0.3*x))
y = torch.sum(x*torch.cos(x)**2)
#print(y)
#plt.plot(x.data.numpy(), y.data.numpy() )
#plt.show()
y.backward()
plt.plot(x.data.numpy(), x.grad.data.numpy() )
plt.show()
In [7]:
import torch
from torch.autograd import Variable
x_1 = Variable( torch.FloatTensor([0.5]) , requires_grad=True)
x_2 = Variable( torch.FloatTensor([3.5]) , requires_grad=True)
u1 = x_1 ** 2
u2 = 0.5 * u1
u3 = x_2 ** 2
u4 =-0.25 * u3
u5 = u2 + u4
u6 = u5 + 3
u7 = tr.sin(u6)
u8 = 2 * x_1
u9 = u8 + 1
u10 = tr.exp(x_2)
u11 = -1 * u10
u12 = u9 + u11
u13 = tr.cos(u12)
f = u7 * u13
print(f.data)
In [46]:
f.backward()
print(x_1.grad.data)
print(x_2.grad.data)
In [49]:
u7.backward()
In [50]:
x_1.grad.data
Out[50]:
In [32]:
x_1 = Variable( tr.FloatTensor([-4]) , requires_grad=True)
x_2 = Variable( tr.FloatTensor([3]) , requires_grad=True)
f = x_1**2 + 2*x_2**2
print(f.data)
f.backward()
print(x_1.grad.data)
print(x_2.grad.data)
In [ ]:
A = torch.DoubleTensor([[1,2,3],[4,5,6]])
T = torch.rand([3,5,2])
u = torch.DoubleTensor([[7],[8]])
w = torch.rand([5,3]).double()
PyTorch has several Loss function.
Its good to know a few:
$$E(f, y) = (1/N) \sum_{i=1}^N (f_i-y_i)^2$$EuclidianLoss = torch.nn.MSELoss(size_average=True)
CE_Loss = torch.nn.CrossEntropyLoss(reduce=False)
In [8]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy as sc
import pandas as pd
In [13]:
# Generate sz[0] example points of dimension sz[1]
# We have sz[1] classes
sz = (3,5)
th = np.random.randn(*sz)
# Generate random targets
c = np.random.choice(range(sz[1]),size=sz[0])
inp = Variable(torch.FloatTensor(th), requires_grad=True)
target = Variable(torch.LongTensor(c), requires_grad=False)
In [14]:
CE_Loss = torch.nn.CrossEntropyLoss(reduce=False)
E = CE_Loss(inp, target)
print(E)
#E.backward()
In [15]:
from functools import reduce
for i,j in enumerate(c):
res = -th[i,j] + reduce(np.logaddexp, th[i,:])
print(res)