In [13]:
import numpy as np
from numpy.linalg import pinv, inv
from solver import solve
In [6]:
np.random.seed(3)
x = np.random.rand(3,4)
y = np.random.rand(2,4)
A, b = solve(x, y)
In [9]:
# Z.dot(x) = y-b
(y - b).dot(pinv(x))
Out[9]:
In [11]:
pinv(x)
Out[11]:
In [14]:
x.transpose().dot(inv(x.dot(x.transpose())))
Out[14]:
In [ ]:
from __future__ import absolute_import, division, print_function
from tensorflow import keras
from tensorflow.keras import layers
import bisect
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pathlib
import seaborn as sns
import tensorflow as tf
In [ ]:
from solver import solve, solve_logistic, sigmoid
In [ ]:
ls /Users/felipe/bitcoin/data/ | grep training
In [ ]:
def read_data(dates):
t = []
for date in dates:
tmp = pd.read_csv('/Users/felipe/bitcoin/data/{}-training.csv'.format(str(date)),
index_col='time',
parse_dates=True)
t.append(tmp)
return pd.concat(t)
In [ ]:
#train_dataset = t.sample(frac=0.8,random_state=0)
#test_dataset = t.drop(train_dataset.index)
train_dataset = read_data([20190515, 20190516, 20190517])
test_dataset = read_data([20190521])
train_stats = train_dataset.describe().transpose()
# train_stats
In [ ]:
x_cols = [i for i in train_dataset.columns if i not in ('longPnlAvg', 'shortPnlAvg')]
y_cols = ['longPnlAvg', 'shortPnlAvg']
train_labels = train_dataset[y_cols].values.transpose()
test_labels = test_dataset[y_cols].values.transpose()
train_dataset = train_dataset[x_cols].values.transpose()
test_dataset = test_dataset[x_cols].values.transpose()
In [ ]:
A, b = solve(train_dataset, train_labels)
def predict(dataset, A, b):
return A.dot(dataset) + b
In [ ]:
train_predictions = predict(train_dataset, A, b)
In [ ]:
def get_good_x(labels, predictions, max_pnl_diff=8.15):
assert labels.shape == predictions.shape
y = np.abs(predictions - labels) <= max_pnl_diff
y = np.prod(y, axis=0)
return np.where(y > 0)[0]
In [ ]:
good_x = get_good_x(train_labels, train_predictions)
In [ ]:
In [ ]:
Yg = np.zeros(train_dataset.shape[1]).reshape(1, train_dataset.shape[1])
Yg[:, good_x] = 1.
In [ ]:
In [ ]:
Ag, bg = solve()
In [ ]:
test_dataset[:, good_x].shape
In [ ]:
y = np.abs(test_predictions - test_labels)
In [ ]:
del yh
In [ ]:
np.sum(y[0] * y[1])
In [ ]:
y, yh = is_good(test_labels, test_predictions)
In [ ]:
test_dataset.mean()
In [ ]:
test_predictions = A.dot(test_dataset) + b
plt.scatter(test_labels.flatten(), test_predictions.flatten())
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])
In [ ]:
error = test_predictions.flatten() - test_labels.flatten()
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error")
_ = plt.ylabel("Count")
In [ ]:
max(error), min(error)
In [ ]:
max(error), min(error)
In [ ]:
test_predictions
In [ ]:
sign_error = test_predictions.flatten() * test_labels.flatten()
sum(sign_error < 0) / len(sign_error)
In [ ]:
compare = pd.DataFrame({'buy_pred': test_predictions[:,0],
'buy_label': test_labels.as_matrix()[:,0],
'sell_pred': test_predictions[:,1],
'sell_label': test_labels.as_matrix()[:,1]})
In [ ]:
(compare['buy_label'].corr(compare['buy_pred']),
compare['sell_label'].corr(compare['sell_pred']))
In [ ]:
len(compare[(compare['buy_label'] >= 5) & (compare['buy_pred'] >= 5)])/ len(compare)
In [ ]:
# tf.trainable_variables()
In [ ]:
pd.DataFrame(model.get_weights()[0])\
.set_index(test_dataset.keys())\
.rename(columns={0: 'buyPnl', 1: 'sellPnl'})\
.apply(lambda x: abs(x))\
.sort_values(by='buyPnl')
In [ ]:
def solve_logistic(x, y, learning_rate=0.1, max_iters=1000, tol=1.e-10, patience=10):
assert x.shape[1] == y.shape[1]
assert patience > 0
Nx = x.shape[0]
Ny = y.shape[0]
Nt = x.shape[1]
A = np.zeros(Nx * Ny).reshape(Ny, Nx)
b = np.zeros(Ny).reshape(Ny, 1)
ones = np.ones(Nt).reshape(Nt, 1)
r = []
k = 0
is_diverging = False
patience_left = patience
while k < max_iters:
S = sigmoid(A.dot(x) + b.dot(ones.transpose()))
R = S - y
dA = R.dot(x.transpose())
db = R.dot(ones)
A -= learning_rate * dA
b -= learning_rate * db
J = np.sum(-y * np.log(S) - (1. - y) * np.log(1 - S))
A_max = np.max(np.abs(A))
b_max = np.max(np.abs(b))
dA_err = np.max(np.abs(dA)) / max(A_max, tol)
db_err = np.max(np.abs(db)) / max(b_max, tol)
d_err = max(dA_err, db_err)
k += 1
if J <= tol:
r.append([J, d_err])
break
if r and r[-1][1] < d_err + tol:
is_diverging = True
patience_left -= 1
if patience_left == 0:
r.append([J, d_err])
break
else:
is_diverging = False
patience_left = patience
r.append([J, d_err])
return A, b, r
In [ ]:
help(np.linalg.norm)
In [ ]:
A_e = np.array([[10., 0.]])
b_e = np.array([[.5]])
x = np.array([[-1., 0, 1., 2.],
[0., 1., 2., 3.]])
y = sigmoid(A_e.dot(x) + b_e)
A, b, r = solve_logistic(x, y, learning_rate=1, max_iters=1000000, tol=1.e-14)
def J(A, b, x, y):
J_ = -y * np.log(sigmoid(A.dot(x) + b)) - (1. - y) * np.log(1 - sigmoid(A.dot(x) + b))
return np.linalg.norm(J_)
def R(A, b, x, y):
Nt = x.shape[1]
return sigmoid(A.dot(x) + b.dot(np.ones(Nt).reshape(1,Nt))) - y
In [ ]:
J(A, b, x, y) - J(A_e, b_e, x, y)
In [ ]:
np.linalg.norm(R(A, b, x, y), 'fro')
In [ ]:
np.linalg.norm(R(A_e, b_e, x, y), 'fro')
In [ ]:
A, A_e
In [ ]:
b, b_e
In [ ]:
In [ ]:
np.amax(x, 0)
In [ ]:
np.vectorize(max)(x, 0.)
In [ ]:
np.max(np.abs(x))
In [ ]: