In [705]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# import data
data = pd.read_csv('turnstile_data_master_with_weather.csv', nrows=1000)

In [715]:
# initial data vectors
x = data[['Hour', 'maxpressurei', 'maxdewpti', 'mindewpti', 'meandewpti', 'meanpressurei', 'mintempi', 'maxtempi']]
y = data['ENTRIESn_hourly'].values

# training sample size
m = len(y)

# proper vector shape
y.shape = (m,1)

# Add UNIT to features using dummy variables
dummy_units = pd.get_dummies(data['UNIT'], prefix='unit')
x = x.join(dummy_units)
x = x.values

# number of features
n = len(x[0,:])
print n


171

In [707]:
# Feature Scaling
# mean normalization
#for j in xrange(0,n):
#    xbar = np.mean(x[:,j])
#    s_x = np.std(x[:,j])

# normalized features
#x = np.divide((np.subtract(x,xbar)),s_x)

In [708]:
# design matrix
X = np.hstack((np.ones((m,1)),x))

# theta parameters 
theta = np.zeros(((n+1),1))

#gradient descent, number of iterations
iterations = 1000

# learning rates to try
alpha = [-0.3, -0.1, -0.03, -0.01, -0.003, -0.001, 0, 0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3]
a_index = 7

In [709]:
# cost function: vectorized implementation
def J():
    return (1.0/(2.0*m)) * (((X.dot(theta)) - y).T).dot(X.dot(theta) - y)

# delta-vector function for derivatives
def deltas():
    delta = np.zeros(((n+1),1))
####for j in xrange(0,(n+1)):
    ####summation = 0
        #for i in xrange(0,m):
        ####summation += ((theta.T.dot(X[i]) - y[i]) * X[i][j])
    ####summation = np.sum((X.dot(theta) - y) * X[:,j])
        
    ####delta[j] = (1.0/m) * summation
    
    #print np.sum((X.dot(theta) - y).T.dot(X)).shape

    #delta = (1.0/m) * np.sum((X.dot(theta) - y) * X.T[:])
    delta = ((1.0/m) * (X.dot(theta) - y).T.dot(X)).T
    return delta

In [710]:
# store cost function values for plotting
J_values = np.zeros((iterations,1))

# gradient descent
for iteration in xrange(0,iterations):
    theta = theta - (alpha[a_index] * deltas())
    J_values[iteration] = J()
    
# visualize the cost function (2-D)
print J_values[0::10]
cost_x = np.arange(iterations)
cost_x.shape = (iterations,1)
plt.plot(cost_x,J_values)
plt.title("Learning Rate: " + str(alpha[a_index]))
plt.xlabel('iterations')
plt.ylabel(r"$J(\theta)$")
plt.show()


[[ 1960751.59740465]
 [ 1822296.38616408]
 [ 1791292.83955494]
 [ 1763087.48724464]
 [ 1737427.70660213]
 [ 1714083.68213878]
 [ 1692846.3462248 ]
 [ 1673525.50596202]
 [ 1655948.13916357]
 [ 1639956.84417335]
 [ 1625408.42963659]
 [ 1612172.63158663]
 [ 1600130.94635378]
 [ 1589175.56883975]
 [ 1579208.42664518]
 [ 1570140.30139665]
 [ 1561890.02940074]
 [ 1554383.77446343]
 [ 1547554.36635974]
 [ 1541340.69902665]
 [ 1535687.1830875 ]
 [ 1530543.24780276]
 [ 1525862.88798489]
 [ 1521604.25181801]
 [ 1517729.26588941]
 [ 1514203.29407337]
 [ 1510994.82721118]
 [ 1508075.20080694]
 [ 1505418.33820999]
 [ 1503000.51698292]
 [ 1500800.15636197]
 [ 1498797.62390569]
 [ 1496975.05959941]
 [ 1495316.21583964]
 [ 1493806.31186474]
 [ 1492431.90132777]
 [ 1491180.75182478]
 [ 1490041.73529947]
 [ 1489004.72834208]
 [ 1488060.52148945]
 [ 1487200.73671339]
 [ 1486417.75235837]
 [ 1485704.63485576]
 [ 1485055.07660306]
 [ 1484463.33945128]
 [ 1483924.20329446]
 [ 1483432.91930044]
 [ 1482985.16736403]
 [ 1482577.01740134]
 [ 1482204.89413843]
 [ 1481865.54507893]
 [ 1481556.01136347]
 [ 1481273.60126   ]
 [ 1481015.86604742]
 [ 1480780.57807642]
 [ 1480565.71081096]
 [ 1480369.42067174]
 [ 1480190.03051872]
 [ 1480026.01462492]
 [ 1479875.98500674]
 [ 1479738.67898847]
 [ 1479612.94788941]
 [ 1479497.74673229]
 [ 1479392.12488097]
 [ 1479295.21752319]
 [ 1479206.2379224 ]
 [ 1479124.470369  ]
 [ 1479049.26376804]
 [ 1478980.02580574]
 [ 1478916.2176427 ]
 [ 1478857.34908623]
 [ 1478802.97419848]
 [ 1478752.68730109]
 [ 1478706.11934055]
 [ 1478662.93458169]
 [ 1478622.82759968]
 [ 1478585.5205436 ]
 [ 1478550.76064704]
 [ 1478518.31796349]
 [ 1478487.9833061 ]
 [ 1478459.56637355]
 [ 1478432.89404499]
 [ 1478407.80882902]
 [ 1478384.16745263]
 [ 1478361.8395776 ]
 [ 1478340.70663269]
 [ 1478320.66075132]
 [ 1478301.60380517]
 [ 1478283.44652501]
 [ 1478266.10770086]
 [ 1478249.51345443]
 [ 1478233.59657719]
 [ 1478218.29592823]
 [ 1478203.55588648]
 [ 1478189.32585238]
 [ 1478175.55979448]
 [ 1478162.21583712]
 [ 1478149.25588518]
 [ 1478136.64528277]
 [ 1478124.35250276]]

In [711]:
# calculate r^2 (after gradient descent) 
r_squared = 1 - np.sum((np.square(y - np.dot(X, theta))))/np.sum(np.square(y - np.mean(y)))
print r_squared


0.207739199797

In [712]:
# Normal Equation (an alternative to gradient descent)
normal_eq_theta = (np.linalg.pinv(X.T.dot(X)).dot(X.T)).dot(y)
#print normal_eq_theta

In [713]:
# calculate r^2 (after normal equation)
r_squared = 1 - np.sum((np.square(y - np.dot(X, normal_eq_theta))))/np.sum(np.square(y - np.mean(y)))
print r_squared


0.581030010829

In [713]:


In [714]:
# matrix operations

a = np.array([[1, 2],
              [1, 4],
              [1, 3]])
b = np.array([[5],
              [2],
              [4]])
c = np.array([[0],
              [0]])

#b * a
print b.shape
print a.shape
print b.T.dot(a), (b.T.dot(a)).shape, (b.T.dot(a)).T.shape
np.sum(b.T.dot(a))


(3, 1)
(3, 2)
[[11 30]] (1, 2) (2, 1)
Out[714]:
41