Outline
In [0]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from tqdm import tqdm_notebook
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import make_blobs
In [0]:
class SigmoidNeuron:
def __init__(self):
self.w = None
self.b = None
def perceptron(self, x):
return np.dot(x, self.w.T) + self.b
def sigmoid(self, x):
return 1.0/(1.0 + np.exp(-x))
def grad_w_mse(self, x, y):
y_pred = self.sigmoid(self.perceptron(x))
return (y_pred - y) * y_pred * (1 - y_pred) * x
def grad_b_mse(self, x, y):
y_pred = self.sigmoid(self.perceptron(x))
return (y_pred - y) * y_pred * (1 - y_pred)
def grad_w_ce(self, x, y):
y_pred = self.sigmoid(self.perceptron(x))
if y == 0:
return y_pred * x
elif y == 1:
return -1 * (1 - y_pred) * x
else:
raise ValueError("y should be 0 or 1")
def grad_b_ce(self, x, y):
y_pred = self.sigmoid(self.perceptron(x))
if y == 0:
return y_pred
elif y == 1:
return -1 * (1 - y_pred)
else:
raise ValueError("y should be 0 or 1")
def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, loss_fn="mse", display_loss=False):
# initialise w, b
if initialise:
self.w = np.random.randn(1, X.shape[1])
self.b = 0
if display_loss:
loss = {}
for i in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
dw = 0
db = 0
for x, y in zip(X, Y):
if loss_fn == "mse":
dw += self.grad_w_mse(x, y)
db += self.grad_b_mse(x, y)
elif loss_fn == "ce":
dw += self.grad_w_ce(x, y)
db += self.grad_b_ce(x, y)
m = X.shape[1]
self.w -= learning_rate * dw/m
self.b -= learning_rate * db/m
if display_loss:
Y_pred = self.sigmoid(self.perceptron(X))
if loss_fn == "mse":
loss[i] = mean_squared_error(Y, Y_pred)
elif loss_fn == "ce":
loss[i] = log_loss(Y, Y_pred)
if display_loss:
plt.plot(loss.values())
plt.xlabel('Epochs')
if loss_fn == "mse":
plt.ylabel('Mean Squared Error')
elif loss_fn == "ce":
plt.ylabel('Log Loss')
plt.show()
def predict(self, X):
Y_pred = []
for x in X:
y_pred = self.sigmoid(self.perceptron(x))
Y_pred.append(y_pred)
return np.array(Y_pred)
In [0]:
my_cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["red","yellow","green"])
In [0]:
np.random.seed(0)
In [0]:
data, labels = make_blobs(n_samples=1000, centers=4, n_features=2, random_state=0)
print(data.shape, labels.shape)
In [0]:
plt.scatter(data[:,0], data[:,1], c=labels, cmap=my_cmap)
plt.show()
In [0]:
labels_orig = labels
labels = np.mod(labels_orig, 2)
In [0]:
plt.scatter(data[:,0], data[:,1], c=labels, cmap=my_cmap)
plt.show()
In [0]:
X_train, X_val, Y_train, Y_val = train_test_split(data, labels, stratify=labels, random_state=0)
print(X_train.shape, X_val.shape)
In [0]:
sn = SigmoidNeuron()
sn.fit(X_train, Y_train, epochs=1000, learning_rate=0.5, display_loss=True)
In [0]:
Y_pred_train = sn.predict(X_train)
Y_pred_binarised_train = (Y_pred_train >= 0.5).astype("int").ravel()
Y_pred_val = sn.predict(X_val)
Y_pred_binarised_val = (Y_pred_val >= 0.5).astype("int").ravel()
accuracy_train = accuracy_score(Y_pred_binarised_train, Y_train)
accuracy_val = accuracy_score(Y_pred_binarised_val, Y_val)
print("Training accuracy", round(accuracy_train, 2))
print("Validation accuracy", round(accuracy_val, 2))
In [0]:
plt.scatter(X_train[:,0], X_train[:,1], c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train-Y_train)+.2))
plt.show()
In [0]:
!pip install ipdb
In [0]:
ffn = FirstFFNetwork()
rt=np.mean([0.3*0.5,0.1,0.03*0.25])
print( rt )
ffn.fit(X_train, Y_train, epochs=2000, learning_rate=rt, initialise=True, display_loss=True)
In [0]:
import ipdb;
class FirstFFNetwork:
def __init__(self):
self.w1 = np.ones((2,2))
self.w2 = np.ones((2,1))
self.b1 = np.zeros((1,2))
self.b2 = 0
def grad_sigmoid(self, X):
return X*(1-X)
def sigmoid(self, x):
return 1.0/(1.0 + np.exp(-x))
def forward_pass(self, x):
#ipdb.set_trace()
self.a1 = np.matmul(x,self.w1) + self.b1 #(n,2)*(2,2)
self.h1 = self.sigmoid(self.a1)
#print(self.h1.shape)
self.a2 = np.matmul(x,self.w2) + self.b2
self.h2 = self.sigmoid(self.a2)
#ipdb.set_trace()
return self.h2
"""
self.x1, self.x2 = x
self.a1 = self.w1*self.x1 + self.w2*self.x2 + self.b1 <=> W1=[w1,w2] W2= [w3,w4] W3=[w5,w6]T
self.h1 = self.sigmoid(self.a1)
self.a2 = self.w3*self.x1 + self.w4*self.x2 + self.b2
self.h2 = self.sigmoid(self.a2)
self.a3 = self.w5*self.h1 + self.w6*self.h2 + self.b3
self.h3 = self.sigmoid(self.a3)
"""
def grad_sigmoid(self, x):
return x*(1-x)
def grad(self, x, y):
self.forward_pass(x)
#self.dw5 = (self.h3-y) * self.h3*(1-self.h3) * self.h1
# self.db3 = (self.h3-y) * self.h3*(1-self.h3)
#self.dw6 = (self.h3-y) * self.h3*(1-self.h3) * self.h2
y=y.reshape(-1,1)
self.dW2=(self.h2-y) * self.grad_sigmoid(self.h2) * self.h1 #n,2
self.dB2 = (self.h2-y) * self.grad_sigmoid(self.h2) #(N,1)
#ipdb.set_trace()
#self.dw1 = (self.h3-y) * self.h3*(1-self.h3)[n,1] * self.w5[2,2] * self.h1*(1-self.h1)[n,2] * self.x1
#self.dw2 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1) * self.x2
#self.db1 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1)
#self.dw3 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2) * self.x1
#self.dw4 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2) * self.x2
#self.db2 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2)
self.dW1 = np.hstack((((self.h2-y)* self.grad_sigmoid(self.h2)* self.w2.T*self.grad_sigmoid(self.h1)*x[0]),
((self.h2-y)* self.grad_sigmoid(self.h2)* self.w2.T*self.grad_sigmoid(self.h1)*x[1]))) #[n,4]
#self.dW1 = (self.h2-y)* self.grad_sigmoid(self.h2)* self.w2.T*self.grad_sigmoid(self.h1)*x #(n,2)
self.dB1 = (self.h2-y)* self.grad_sigmoid(self.h2)* self.w2.T*self.grad_sigmoid(self.h1) #(n,2)
#print(self.dW.shape)
#print(self.dB.shape)
"""
self.forward_pass(X)
m = X.shape[0]
self.dA2 = self.H2 - Y # (N, 4) - (N, 4) -> (N, 4)
self.dW2 = np.matmul(self.H1.T, self.dA2) # (2, N) * (N, 4) -> (2, 4)
self.dB2 = np.sum(self.dA2, axis=0).reshape(1, -1) # (N, 4) -> (1, 4)
self.dH1 = np.matmul(self.dA2, self.W2.T) # (N, 4) * (4, 2) -> (N, 2)
self.dA1 = np.multiply(self.dH1, self.grad_sigmoid(self.H1)) # (N, 2) .* (N, 2) -> (N, 2)
self.dW1 = np.matmul(X.T, self.dA1) # (2, N) * (N, 2) -> (2, 2)
self.dB1 = np.sum(self.dA1, axis=0).reshape(1, -1) # (N, 2) -> (1, 2)
"""
def fitFF(self, X, Y, epochs=50, learning_rate=1, initialise=True, display_loss=False):
#print(X.shape)
# initialise w, b
if initialise:
self.w1 = np.ones((2,1))
self.w2 = np.ones((2,1))
self.w3 = np.ones((2,1))
self.b1 = 0
self.b2 = 0
self.b3 = 0
if display_loss:
loss = {}
for i in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
y_pred = self.forward_pass(X)
return y_pred.shape
def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, display_loss=False):
self.w1 = np.ones((2,2))
self.w2 = np.ones((2,1))
self.b1 = np.zeros((1,2))
self.b2 = 0
# initialise w, b
if initialise:
self.w1 = np.ones((2,2))
self.w2 = np.ones((2,1))
self.b1 = np.zeros((1,2))
self.b2 = 0
if display_loss:
loss = {}
for i in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
"""
self.a1 = np.matmul(x,self.w1) + self.b1 #(n,2)*(2,1)
self.h1 = self.sigmoid(self.a1)
self.a2 = np.matmul(x,self.w2) + self.b2
self.h2 = self.sigmoid(self.a2)
self.h=np.array([self.h1,self.h2])
self.a3 = np.matmul(self.h,self.w3.T) + self.b3 #(n,1)* (1,2)
self.h3 = self.sigmoid(self.a3)
(self.h3-y)* self.grad_sigmoid(self.h3)* self.w3
self.dW3=(self.h3-y) * self.h3*(1-self.h3) * self.h
self.dB3 = (self.h3-y) * self.h3*(1-self.h3) #(N,1)
self.dW = (self.h3-y)* grad_sigmoid(self.h3)* w3.T*grad_sigmoid(self.h)*X #(2,2)
self.dB = (self.h3-y)* grad_sigmoid(self.h3)* w3.T*grad_sigmoid(self.h) #(N,2)
W=[h1,h]
"""
self.grad(X,Y)
m = X.shape[0]
#ipdb.set_trace()
#self.w1 = np.random.randn(2,2)
#self.w2 = np.random.randn(2,1)
#self.b1 = np.zeros((1,2))
#self.b2 = 0
#dW2=n,2
#dB1=n,2
#dW1=n,4 => np.mean(self.dW1,axis=0) implies average by column => (4,)
#dB2=n,1
avg = np.mean(self.dW1,axis=0)
dw1 = np.array([[avg[0],avg[1]],[avg[2],avg[3]]])
dw2 = np.mean(self.dW2,axis=0)
dw2=dw2.reshape(2,1)
db1 = np.mean(self.dB1,axis=0).reshape(1,2)
db2 = np.mean(self.dB2)
#ipdb.set_trace()
self.w1 -= learning_rate * dw1
self.w2 -= learning_rate * dw2
self.b1 -= learning_rate * db1
self.b2 -= learning_rate * db2
if display_loss:
Y_pred = self.predict(X)
loss[i] = mean_squared_error(Y_pred, Y)
if display_loss:
plt.plot(loss.values())
plt.xlabel('Epochs')
plt.ylabel('Mean Squared Error')
plt.show()
print(self.w1)
print(self.w2)
print(self.b1)
print(self.b2)
def predict(self, X):
Y_pred = self.forward_pass(X)
return np.array(Y_pred)
In [0]:
ffn = FirstFFNetwork()
ffn.fitFF(X_train, Y_train, epochs=2000, learning_rate=.01, display_loss=True)
In [0]:
Y_pred_train = ffn.predict(X_train)
Y_pred_binarised_train = (Y_pred_train >= 0.5).astype("int").ravel()
Y_pred_val = ffn.predict(X_val)
Y_pred_binarised_val = (Y_pred_val >= 0.5).astype("int").ravel()
accuracy_train = accuracy_score(Y_pred_binarised_train, Y_train)
accuracy_val = accuracy_score(Y_pred_binarised_val, Y_val)
print("Training accuracy", round(accuracy_train, 2))
print("Validation accuracy", round(accuracy_val, 2))
In [0]:
plt.scatter(X_train[:,0], X_train[:,1], c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train-Y_train)+.2))
plt.show()
In [0]:
class FFSNNetwork:
def __init__(self, n_inputs, hidden_sizes=[2]):
self.nx = n_inputs
self.ny = 1
self.nh = len(hidden_sizes)
self.sizes = [self.nx] + hidden_sizes + [self.ny]
self.W = {}
self.B = {}
for i in range(self.nh+1):
self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
self.B[i+1] = np.zeros((1, self.sizes[i+1]))
def sigmoid(self, x):
return 1.0/(1.0 + np.exp(-x))
def forward_pass(self, x):
self.A = {}
self.H = {}
self.H[0] = x.reshape(1, -1)
for i in range(self.nh+1):
self.A[i+1] = np.matmul(self.H[i], self.W[i+1]) + self.B[i+1]
self.H[i+1] = self.sigmoid(self.A[i+1])
return self.H[self.nh+1]
def grad_sigmoid(self, x):
return x*(1-x)
def grad(self, x, y):
self.forward_pass(x)
self.dW = {}
self.dB = {}
self.dH = {}
self.dA = {}
L = self.nh + 1
self.dA[L] = (self.H[L] - y)
for k in range(L, 0, -1):
self.dW[k] = np.matmul(self.H[k-1].T, self.dA[k])
self.dB[k] = self.dA[k]
self.dH[k-1] = np.matmul(self.dA[k], self.W[k].T)
self.dA[k-1] = np.multiply(self.dH[k-1], self.grad_sigmoid(self.H[k-1]))
def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, display_loss=False):
# initialise w, b
if initialise:
for i in range(self.nh+1):
self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
self.B[i+1] = np.zeros((1, self.sizes[i+1]))
if display_loss:
loss = {}
for e in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
dW = {}
dB = {}
for i in range(self.nh+1):
dW[i+1] = np.zeros((self.sizes[i], self.sizes[i+1]))
dB[i+1] = np.zeros((1, self.sizes[i+1]))
for x, y in zip(X, Y):
self.grad(x, y)
for i in range(self.nh+1):
dW[i+1] += self.dW[i+1]
dB[i+1] += self.dB[i+1]
m = X.shape[1]
for i in range(self.nh+1):
self.W[i+1] -= learning_rate * dW[i+1] / m
self.B[i+1] -= learning_rate * dB[i+1] / m
if display_loss:
Y_pred = self.predict(X)
loss[e] = mean_squared_error(Y_pred, Y)
if display_loss:
plt.plot(loss.values())
plt.xlabel('Epochs')
plt.ylabel('Mean Squared Error')
plt.show()
def predict(self, X):
Y_pred = []
for x in X:
y_pred = self.forward_pass(x)
Y_pred.append(y_pred)
return np.array(Y_pred).squeeze()
In [0]:
ffsnn = FFSNNetwork(2, [2, 3])
ffsnn.fit(X_train, Y_train, epochs=1000, learning_rate=.001, display_loss=True)
In [0]:
Y_pred_train = ffsnn.predict(X_train)
Y_pred_binarised_train = (Y_pred_train >= 0.5).astype("int").ravel()
Y_pred_val = ffsnn.predict(X_val)
Y_pred_binarised_val = (Y_pred_val >= 0.5).astype("int").ravel()
accuracy_train = accuracy_score(Y_pred_binarised_train, Y_train)
accuracy_val = accuracy_score(Y_pred_binarised_val, Y_val)
print("Training accuracy", round(accuracy_train, 2))
print("Validation accuracy", round(accuracy_val, 2))
In [0]:
plt.scatter(X_train[:,0], X_train[:,1], c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train-Y_train)+.2))
plt.show()
In [0]:
class FFSN_MultiClass:
def __init__(self, n_inputs, n_outputs, hidden_sizes=[3]):
self.nx = n_inputs
self.ny = n_outputs
self.nh = len(hidden_sizes)
self.sizes = [self.nx] + hidden_sizes + [self.ny]
self.W = {}
self.B = {}
for i in range(self.nh+1):
self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
self.B[i+1] = np.zeros((1, self.sizes[i+1]))
def sigmoid(self, x):
return 1.0/(1.0 + np.exp(-x))
def softmax(self, x):
exps = np.exp(x)
return exps / np.sum(exps)
def forward_pass(self, x):
self.A = {}
self.H = {}
self.H[0] = x.reshape(1, -1)
for i in range(self.nh):
self.A[i+1] = np.matmul(self.H[i], self.W[i+1]) + self.B[i+1]
self.H[i+1] = self.sigmoid(self.A[i+1])
self.A[self.nh+1] = np.matmul(self.H[self.nh], self.W[self.nh+1]) + self.B[self.nh+1]
self.H[self.nh+1] = self.softmax(self.A[self.nh+1])
return self.H[self.nh+1]
def predict(self, X):
Y_pred = []
for x in X:
y_pred = self.forward_pass(x)
Y_pred.append(y_pred)
return np.array(Y_pred).squeeze()
def grad_sigmoid(self, x):
return x*(1-x)
def cross_entropy(self,label,pred):
yl=np.multiply(pred,label)
yl=yl[yl!=0]
yl=-np.log(yl)
yl=np.mean(yl)
return yl
def grad(self, x, y):
self.forward_pass(x)
self.dW = {}
self.dB = {}
self.dH = {}
self.dA = {}
L = self.nh + 1
self.dA[L] = (self.H[L] - y)
for k in range(L, 0, -1):
self.dW[k] = np.matmul(self.H[k-1].T, self.dA[k])
self.dB[k] = self.dA[k]
self.dH[k-1] = np.matmul(self.dA[k], self.W[k].T)
self.dA[k-1] = np.multiply(self.dH[k-1], self.grad_sigmoid(self.H[k-1]))
def fit(self, X, Y, epochs=100, initialize='True', learning_rate=0.01, display_loss=False):
if display_loss:
loss = {}
if initialize:
for i in range(self.nh+1):
self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
self.B[i+1] = np.zeros((1, self.sizes[i+1]))
for epoch in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
dW = {}
dB = {}
for i in range(self.nh+1):
dW[i+1] = np.zeros((self.sizes[i], self.sizes[i+1]))
dB[i+1] = np.zeros((1, self.sizes[i+1]))
for x, y in zip(X, Y):
self.grad(x, y)
for i in range(self.nh+1):
dW[i+1] += self.dW[i+1]
dB[i+1] += self.dB[i+1]
m = X.shape[1]
for i in range(self.nh+1):
self.W[i+1] -= learning_rate * (dW[i+1]/m)
self.B[i+1] -= learning_rate * (dB[i+1]/m)
if display_loss:
Y_pred = self.predict(X)
loss[epoch] = self.cross_entropy(Y, Y_pred)
if display_loss:
plt.plot(loss.values())
plt.xlabel('Epochs')
plt.ylabel('CE')
plt.show()
In [0]:
X_train, X_val, Y_train, Y_val = train_test_split(data, labels_orig, stratify=labels_orig, random_state=0)
print(X_train.shape, X_val.shape, labels_orig.shape)
In [0]:
enc = OneHotEncoder()
# 0 -> (1, 0, 0, 0), 1 -> (0, 1, 0, 0), 2 -> (0, 0, 1, 0), 3 -> (0, 0, 0, 1)
y_OH_train = enc.fit_transform(np.expand_dims(Y_train,1)).toarray()
y_OH_val = enc.fit_transform(np.expand_dims(Y_val,1)).toarray()
print(y_OH_train.shape, y_OH_val.shape)
In [0]:
ffsn_multi = FFSN_MultiClass(2,4,[2,3])
ffsn_multi.fit(X_train,y_OH_train,epochs=2000,learning_rate=.005,display_loss=True)
In [0]:
Y_pred_train = ffsn_multi.predict(X_train)
Y_pred_train = np.argmax(Y_pred_train,1)
Y_pred_val = ffsn_multi.predict(X_val)
Y_pred_val = np.argmax(Y_pred_val,1)
accuracy_train = accuracy_score(Y_pred_train, Y_train)
accuracy_val = accuracy_score(Y_pred_val, Y_val)
print("Training accuracy", round(accuracy_train, 2))
print("Validation accuracy", round(accuracy_val, 2))
In [0]:
plt.scatter(X_train[:,0], X_train[:,1], c=Y_pred_train, cmap=my_cmap, s=15*(np.abs(np.sign(Y_pred_train-Y_train))+.1))
plt.show()
In [0]:
from sklearn.datasets import make_moons, make_circles
In [0]:
data, labels = make_moons(n_samples=1000, random_state=0, noise=0.15)
print(data.shape, labels.shape)
In [0]:
plt.scatter(data[:,0], data[:,1], c=labels, cmap=my_cmap)
plt.show()
In [0]:
data, labels = make_circles(n_samples=1000, random_state=0, noise=0.2, factor=0.3)
print(data.shape, labels.shape)
In [0]:
plt.scatter(data[:,0], data[:,1], c=labels, cmap=my_cmap)
plt.show()
In [0]: