In [705]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# import data
data = pd.read_csv('turnstile_data_master_with_weather.csv', nrows=1000)
In [715]:
# initial data vectors
x = data[['Hour', 'maxpressurei', 'maxdewpti', 'mindewpti', 'meandewpti', 'meanpressurei', 'mintempi', 'maxtempi']]
y = data['ENTRIESn_hourly'].values
# training sample size
m = len(y)
# proper vector shape
y.shape = (m,1)
# Add UNIT to features using dummy variables
dummy_units = pd.get_dummies(data['UNIT'], prefix='unit')
x = x.join(dummy_units)
x = x.values
# number of features
n = len(x[0,:])
print n
In [707]:
# Feature Scaling
# mean normalization
#for j in xrange(0,n):
# xbar = np.mean(x[:,j])
# s_x = np.std(x[:,j])
# normalized features
#x = np.divide((np.subtract(x,xbar)),s_x)
In [708]:
# design matrix
X = np.hstack((np.ones((m,1)),x))
# theta parameters
theta = np.zeros(((n+1),1))
#gradient descent, number of iterations
iterations = 1000
# learning rates to try
alpha = [-0.3, -0.1, -0.03, -0.01, -0.003, -0.001, 0, 0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3]
a_index = 7
In [709]:
# cost function: vectorized implementation
def J():
return (1.0/(2.0*m)) * (((X.dot(theta)) - y).T).dot(X.dot(theta) - y)
# delta-vector function for derivatives
def deltas():
delta = np.zeros(((n+1),1))
####for j in xrange(0,(n+1)):
####summation = 0
#for i in xrange(0,m):
####summation += ((theta.T.dot(X[i]) - y[i]) * X[i][j])
####summation = np.sum((X.dot(theta) - y) * X[:,j])
####delta[j] = (1.0/m) * summation
#print np.sum((X.dot(theta) - y).T.dot(X)).shape
#delta = (1.0/m) * np.sum((X.dot(theta) - y) * X.T[:])
delta = ((1.0/m) * (X.dot(theta) - y).T.dot(X)).T
return delta
In [710]:
# store cost function values for plotting
J_values = np.zeros((iterations,1))
# gradient descent
for iteration in xrange(0,iterations):
theta = theta - (alpha[a_index] * deltas())
J_values[iteration] = J()
# visualize the cost function (2-D)
print J_values[0::10]
cost_x = np.arange(iterations)
cost_x.shape = (iterations,1)
plt.plot(cost_x,J_values)
plt.title("Learning Rate: " + str(alpha[a_index]))
plt.xlabel('iterations')
plt.ylabel(r"$J(\theta)$")
plt.show()
In [711]:
# calculate r^2 (after gradient descent)
r_squared = 1 - np.sum((np.square(y - np.dot(X, theta))))/np.sum(np.square(y - np.mean(y)))
print r_squared
In [712]:
# Normal Equation (an alternative to gradient descent)
normal_eq_theta = (np.linalg.pinv(X.T.dot(X)).dot(X.T)).dot(y)
#print normal_eq_theta
In [713]:
# calculate r^2 (after normal equation)
r_squared = 1 - np.sum((np.square(y - np.dot(X, normal_eq_theta))))/np.sum(np.square(y - np.mean(y)))
print r_squared
In [713]:
In [714]:
# matrix operations
a = np.array([[1, 2],
[1, 4],
[1, 3]])
b = np.array([[5],
[2],
[4]])
c = np.array([[0],
[0]])
#b * a
print b.shape
print a.shape
print b.T.dot(a), (b.T.dot(a)).shape, (b.T.dot(a)).T.shape
np.sum(b.T.dot(a))
Out[714]: