In [2]:
from __future__ import division
import numpy as np
import itertools
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from tabulate import tabulate
In [3]:
# Symbolic partial differentiation -- examples
from sympy import symbols, diff
x, y, z = symbols('x y z', real=True)
f = 4*x*y + x**3 + z**8*y
diff(f, x)
Out[3]:
In [4]:
theta0, theta1, theta2, theta3, theta4, x0, x1 = symbols('theta0, theta1, theta2, theta3, theta4, x0, x1', real=True)
g = theta0*x0 + theta1*x1 + theta2*x1**2 + theta3*x1**3 + theta4*x1**(2.5)
diff(g, theta4)
Out[4]:
In [5]:
# Take a data set and split it into training, cross-validation, and test data sets
def splitDataSet(data, trainRatio, valRatio):
# Randomly shuffle the data --
# Split up into train (trainRatio of all data), cross validate(valRatio), and test data sets
# NOTE - there's also a sklearn function train_test_split to do a similar thing but not quite
dataArr = np.array(data)
dataDim = dataArr.shape
#print dataArr[0:4,:]
np.random.shuffle(dataArr)
#print dataArr[0:4,:]
# Find out the number of data rows so they can be split up in the right proportion
num_train_rows = int(dataDim[0] * trainRatio)
num_val_rows = int(dataDim[0] * valRatio)
num_test_rows = dataDim[0] - (num_train_rows + num_val_rows)
# Training Data
X_train = dataArr[0:num_train_rows, 0].reshape((num_train_rows, 1))
y_train = dataArr[0:num_train_rows, 1].reshape((num_train_rows, 1))
# Cross Validation Data
X_val = dataArr[num_train_rows:num_train_rows + num_val_rows, 0].reshape((num_val_rows, 1))
y_val = dataArr[num_train_rows:num_train_rows + num_val_rows, 1].reshape((num_val_rows, 1))
# Test Data
X_test = dataArr[:num_test_rows, 0].reshape((num_test_rows, 1))
y_test = dataArr[:num_test_rows, 1].reshape((num_test_rows, 1))
return [X_train, y_train, X_val, y_val, X_test, y_test]
In [6]:
def computeCost(X, y, theta):
# Cost of being wrong calculated over the entire data set
# Assumes that X has a first column of 1s added to it to simplify the notation
# Therefore: X is an m x n matrix and theta is a n x 1 matrix
# costPerOutput is an m x 1 matrix where each element is the cost for
# the input and its associated output for a particular value of theta
costPerOutput = np.power(((X * theta) - y),2)
# totalCost is the cost over the entire dataset
totalCost = np.sum(costPerOutput)
# The cost of getting it wrong is 1/2m of the totalCost (normalized cost)
cost = totalCost / (2 * len(X))
return cost
In [7]:
# Implement Gradient Descent
def gradientDescent(X, y, theta, alpha, iters):
# NOTE: X must already have a column of 1s added to it
# X is a m x n matrix
# y is a m x 1 matrix
# Theta is a n x 1 matrix
# Keep track of everything
sumError = np.zeros(shape=(len(theta),1))
sumErrorNorm = np.zeros(shape=(len(theta),1))
temp = np.matrix(np.zeros(theta.shape))
cost = np.zeros(iters)
for i in range(iters):
# Calculate the non-normalized values for each theta parameter
error = (X * theta) - y
for j in range(len(theta)):
# Multiply the error vector by the appropriate column of the X matrix and sum it
sumError[j] = np.sum(np.multiply(error, X[:,j]))
# Normalize the sumError using alpha and m
sumErrorNorm[j] = np.divide(np.multiply(sumError[j], alpha), len(X))
temp[j,0] = theta[j,0] - sumErrorNorm[j]
theta = temp
cost[i] = computeCost(X,y,theta)
return theta, cost
In [8]:
from matplotlib.colors import ListedColormap
In [9]:
# Visualize the decision boundary
# From Sebastian Raschka
def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):
# set up marker generator and color map
markers = ('s', 'x', 'o', '^', 'v')
colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
cmap = ListedColormap(colors[:len(np.unique(y))])
# plot the decision surface
x1_min, x1_max = X[:,0].min() - 1, X[:,0].max() + 1
x2_min, x2_max = X[:,1].min() - 1, X[:,1].max() + 1
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution))
Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
Z = Z.reshape(xx1.shape)
fig, ax = plt.subplots(figsize=(8,5))
plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
plt.xlim(xx1.min(), xx1.max())
plt.ylim(xx2.min(), xx2.max())
plt.xlabel('Exam 1 Score')
plt.ylabel('Exam 2 Score')
plt.title('Classifying the Data')
# Plot all samples
for idx, cl in enumerate(np.unique(y)):
plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=cmap(idx), marker=markers[idx], label=cl)
return plt
In [10]:
# Define the sigmoid function or transformation
def sigmoid(z):
return 1 / (1 + np.exp(-z))