In [1]:
from imports import *
import avg_clf_train
import import_data
from theano import *
from theano import tensor as T
%matplotlib inline
In [2]:
'''
X = np.array([[0,0,1],
[0,1,1],
[1,0,1],
[1,1,1]])
print X.shape
y = np.array([[0,0,1,1]]).T
print y.shape
'''
Out[2]:
In [3]:
'''
X = T.dmatrix()
w = T.vector()
y = T.vector()
Xw = X * w
sigmoid = 1 / (1+T.exp(-Xw))
layer1 = function([Xw], sigmoid)
'''
Out[3]:
In [4]:
tickers = [filename[:-4] for filename in os.listdir('quandl_data')]
stock_df, prediction_df = import_data.import_data(tickers)
print stock_df.shape
stock_df.tail()
Out[4]:
In [5]:
y = stock_df['label'].values
y = y.reshape(y.shape[0], 1)
stock_df = stock_df.drop('label', axis=1)
X = stock_df.values
print X.shape, y.shape
In [6]:
new_y = []
for i in xrange(y.shape[0]):
if y[i] == 0:
new_y.append(np.array([[1],[0]]))
elif y[i] == 1:
new_y.append(np.array([[0],[1]]))
y = new_y
In [7]:
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
X
Out[7]:
In [8]:
def add_bias(matrix):
if matrix.ndim == 1:
matrix = matrix.reshape(matrix.shape[0], 1)
else:
if matrix.shape[1] == 1:
matrix = np.insert(matrix, 0, 1, axis=0)
else:
matrix = np.hstack((np.ones((matrix.shape[0], 1)), matrix))
return matrix
In [9]:
def g(z):
return np.true_divide(1, (1+np.exp(-z)))
In [40]:
def create_thetas(L, s):
init_seed_value = 42
thetas = []
# skip last (output layer)
for l in xrange(L-1):
np.random.seed(init_seed_value+1)
s_j = s[l]
s_jplus1 = s[l+1]
theta = np.random.standard_normal((s_jplus1,s_j+1))
thetas.append(theta)
return thetas
In [308]:
def J(m, K, thetas, h_thetas, y):
summations = 0
for i in xrange(m):
for k in xrange(K):
summations += y[i][k] * np.log(h_thetas[i][k]) + (1 - y[i][k]) * np.log(1 - h_thetas[i][k])
left_J = np.true_divide(summations, -m)
theta_sums = 0
for l in xrange(L-1):
for i in xrange(1,s[l]):
for j in xrange(1,s[l+1]):
# skip bias-related weights
theta_sums = thetas[l][j][i]**2
right_J = np.true_divide(lambda_reg,(2.0*m)) * theta_sums
return left_J + right_J
In [430]:
def forward_propagation(L, x, thetas):
# a1
a = [x.reshape(x.shape[0], 1)]
for l in xrange(L-1):
print l
a_l = a[l]
# if not on first level, add bias
if l != 0:
a_l = add_bias(a_l)
# z2, z3, z4, ...
z = thetas[l].dot(a_l)
# a2, a3, a4, ...
a_l = g(z)
# keep a_l for next iteration
a.append(a_l)
print "a shapes:", [a_l.shape for a_l in a]
return a
In [13]:
X = add_bias(X)
In [39]:
# number of features (minus bias)
n = X.shape[1]-1
# number of output classes
K = 2
# number of layers
L = 4
# number of units in each layer
s = [n if (l < (L-1)) else K for l in xrange(L)]
# create thetas based on L and s
thetas = create_thetas(L, s)
[theta.shape for theta in thetas]
Out[39]:
In [15]:
#indices = np.random.choice(X.shape[0], 200)
#input_X = X[indices,:]
#output_y = y[indices, :]
#print input_X.shape, output_y.shape
In [418]:
def back_propagation(L, thetas, a, y_i, Deltas):
deltas = []
# output layer error
deltas.append(a[L-1] - y_i)
# indexes are weird
for l in xrange(L-2, 0, -1):
a_l = a[l]
# skip bias
theta_l_T = thetas[l][:,1:].T
# elementwise multiplication
g_prime = np.multiply(a_l, (1 - a_l))
d = np.multiply(theta_l_T.dot(deltas[0]), g_prime)
deltas.insert(0,d)
for l in xrange(len(deltas)):
a_l = a[l]
print Deltas[l].shape, deltas[l].dot(a_l.T).shape
# start with a1 and d2
Deltas[l] += deltas[l].dot(a_l.T)
return Deltas
In [431]:
lambda_reg = 100
h_thetas = []
Deltas = []
for theta in thetas:
Deltas.append(np.zeros(theta.shape))
print "Delta shapes:", [Delta.shape for Delta in Deltas]
D = []
m = 1
# train NN via gradient descent
for i in xrange(m):
a = forward_propagation(L, X[i], thetas)
h_theta_i = a[L-1]
h_thetas.append(h_theta_i)
Deltas = back_propagation(L, thetas, a, y[i], Deltas)
#for l in xrange(L-1):
#print np.true_divide(Deltas[l], (lambda_reg * thetas[l][:,1:])).shape
#reg_D = np.true_divide(Deltas[l][:,1:] + (lambda_reg * thetas[l][:,1:]), m)
#noreg_D = np.true_divide(Deltas[l][:,0] + (lambda_reg * thetas[l][:,0]), m)
#D[l] = np.vstack(noreg_D, reg_D)
print "cost:", J(m, K, thetas, h_thetas, y)
In [19]:
print [theta.shape for theta in thetas]
thetas
Out[19]:
In [20]:
pred_df = prediction_df[prediction_df['label'].apply(np.isnan) == True]
In [21]:
pred_tickers = pred_df['ticker'].unique()
In [22]:
pred_X = pred_df.drop(['ticker','label'], axis=1).values
print pred_X.shape
print pred_X[0]
In [23]:
pred_X = add_bias(pred_X)
pred_X[0]
Out[23]:
In [24]:
for i in xrange(pred_X.shape[0]):
a1 = X[i,:].reshape(1,n+1)
a2, a3 = forward_propagation(a1, thetas)
print str(i).rjust(2), str(pred_tickers[i]).rjust(4), np.round(a3[0][0], 2)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [25]:
positive_tickers = []
for i in xrange(len(pred_tickers)):
print i, pred_tickers[i], y_predictions[i]
if y_predictions[i] == 1:
positive_tickers.append(pred_tickers[i])
In [ ]:
for ticker in positive_tickers:
past_days = 100
oc = prediction_df[prediction_df['ticker'] == ticker]["OC%"][-past_days:]
num_days = oc.shape[0]
day_range = np.arange(num_days)
plt.plot(day_range, oc, alpha=0.5)
plt.plot(day_range, [0.05 for x in day_range], color='r')
plt.title("{0} (previous {1} days)".format(ticker, num_days))
plt.show()
print "\t", ticker, "{}-day freq probability:".format(past_days), np.true_divide(np.sum(oc.values > 0.05), past_days)
print "~"*50, "\n"
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
'''
#errors = []
iterations = 1
for i in xrange(iterations):
#plt.plot([x for x in xrange(iterations)], errors)
#plt.show()
'''
In [ ]:
'''
X = np.array([[1,2,3,4],
[1,3,4,5],
[1,6,7,8],
[1,5,4,3],
[1,2,3,3]])
print X.shape
theta1 = np.array([[2,2,3,4],
[4,3,4,5],
[6,6,7,8]])
print theta1.T
print theta1.shape
print X.dot(theta1.T)
print X.dot(theta1.T).shape
'''