In [13]:
import numpy as np
from numpy.linalg import pinv, inv
from solver import solve

In [6]:
np.random.seed(3)
x = np.random.rand(3,4)
y = np.random.rand(2,4)
A, b = solve(x, y)

In [9]:
# Z.dot(x) = y-b
(y - b).dot(pinv(x))


Out[9]:
array([[ 14.7325907 ,  -4.83187832,  -6.8645464 ],
       [-18.4885509 ,   5.70767008,   8.79673898]])

In [11]:
pinv(x)


Out[11]:
array([[ 0.79307392,  0.45717429, -1.61648149],
       [-1.58384314,  1.16240731,  1.8113318 ],
       [ 3.68827256, -1.84737309, -2.76015822],
       [ 1.19773412, -1.05232617,  0.80380748]])

In [14]:
x.transpose().dot(inv(x.dot(x.transpose())))


Out[14]:
array([[ 0.79307392,  0.45717429, -1.61648149],
       [-1.58384314,  1.16240731,  1.8113318 ],
       [ 3.68827256, -1.84737309, -2.76015822],
       [ 1.19773412, -1.05232617,  0.80380748]])

In [ ]:
from __future__ import absolute_import, division, print_function

from tensorflow import keras
from tensorflow.keras import layers
import bisect
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pathlib
import seaborn as sns
import tensorflow as tf

In [ ]:
from solver import solve, solve_logistic, sigmoid

In [ ]:
ls /Users/felipe/bitcoin/data/ | grep training

In [ ]:
def read_data(dates):
    t = []
    for date in dates:
        tmp = pd.read_csv('/Users/felipe/bitcoin/data/{}-training.csv'.format(str(date)),
                          index_col='time',
                          parse_dates=True)
        t.append(tmp)
    return pd.concat(t)

In [ ]:
#train_dataset = t.sample(frac=0.8,random_state=0)
#test_dataset = t.drop(train_dataset.index)
train_dataset = read_data([20190515, 20190516, 20190517])
test_dataset = read_data([20190521])
train_stats = train_dataset.describe().transpose()
# train_stats

In [ ]:
x_cols = [i for i in train_dataset.columns if i not in ('longPnlAvg', 'shortPnlAvg')]
y_cols = ['longPnlAvg', 'shortPnlAvg']

train_labels = train_dataset[y_cols].values.transpose()
test_labels = test_dataset[y_cols].values.transpose()
train_dataset = train_dataset[x_cols].values.transpose()
test_dataset = test_dataset[x_cols].values.transpose()

In [ ]:
A, b = solve(train_dataset, train_labels)
def predict(dataset, A, b):
    return A.dot(dataset) + b

In [ ]:
train_predictions = predict(train_dataset, A, b)

In [ ]:
def get_good_x(labels, predictions, max_pnl_diff=8.15):
    assert labels.shape == predictions.shape
    y = np.abs(predictions - labels) <= max_pnl_diff
    y = np.prod(y, axis=0)
    return np.where(y > 0)[0]

In [ ]:
good_x = get_good_x(train_labels, train_predictions)

In [ ]:


In [ ]:
Yg = np.zeros(train_dataset.shape[1]).reshape(1, train_dataset.shape[1])
Yg[:, good_x] = 1.

In [ ]:


In [ ]:
Ag, bg = solve()

In [ ]:
test_dataset[:, good_x].shape

In [ ]:
y = np.abs(test_predictions - test_labels)

In [ ]:
del yh

In [ ]:
np.sum(y[0] * y[1])

In [ ]:
y, yh = is_good(test_labels, test_predictions)

In [ ]:
test_dataset.mean()

In [ ]:
test_predictions = A.dot(test_dataset) + b

plt.scatter(test_labels.flatten(), test_predictions.flatten())
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])

In [ ]:
error = test_predictions.flatten() - test_labels.flatten()
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error")
_ = plt.ylabel("Count")

In [ ]:
max(error), min(error)

In [ ]:
max(error), min(error)

In [ ]:
test_predictions

In [ ]:
sign_error = test_predictions.flatten() * test_labels.flatten()
sum(sign_error < 0) / len(sign_error)

In [ ]:
compare = pd.DataFrame({'buy_pred': test_predictions[:,0],
                        'buy_label': test_labels.as_matrix()[:,0],
                        'sell_pred': test_predictions[:,1],
                        'sell_label': test_labels.as_matrix()[:,1]})

In [ ]:
(compare['buy_label'].corr(compare['buy_pred']),
compare['sell_label'].corr(compare['sell_pred']))

In [ ]:
len(compare[(compare['buy_label'] >= 5) & (compare['buy_pred'] >= 5)])/ len(compare)

In [ ]:
# tf.trainable_variables()

In [ ]:
pd.DataFrame(model.get_weights()[0])\
.set_index(test_dataset.keys())\
.rename(columns={0: 'buyPnl', 1: 'sellPnl'})\
.apply(lambda x: abs(x))\
.sort_values(by='buyPnl')

In [ ]:
def solve_logistic(x, y, learning_rate=0.1, max_iters=1000, tol=1.e-10, patience=10):
    assert x.shape[1] == y.shape[1]
    assert patience > 0
    Nx = x.shape[0]
    Ny = y.shape[0]
    Nt = x.shape[1]
    A = np.zeros(Nx * Ny).reshape(Ny, Nx)
    b = np.zeros(Ny).reshape(Ny, 1)
    ones = np.ones(Nt).reshape(Nt, 1)
    r = []
    k = 0
    is_diverging = False
    patience_left = patience
    while k < max_iters:
        S = sigmoid(A.dot(x) + b.dot(ones.transpose()))
        R = S - y
        dA = R.dot(x.transpose())
        db = R.dot(ones)
        A -= learning_rate * dA
        b -= learning_rate * db
        J = np.sum(-y * np.log(S) - (1. - y) * np.log(1 - S))
        A_max = np.max(np.abs(A))
        b_max = np.max(np.abs(b))
        dA_err = np.max(np.abs(dA)) / max(A_max, tol)
        db_err = np.max(np.abs(db)) / max(b_max, tol)
        d_err = max(dA_err, db_err)
        k += 1
        if J <= tol:
            r.append([J, d_err])
            break
        if r and r[-1][1] < d_err + tol:
            is_diverging = True
            patience_left -= 1
            if patience_left == 0:
                r.append([J, d_err])
                break
        else:
            is_diverging = False
            patience_left = patience
            r.append([J, d_err])
            
    return A, b, r

In [ ]:
help(np.linalg.norm)

Testing if $\theta$ theory is valid


In [ ]:
A_e = np.array([[10., 0.]])
b_e = np.array([[.5]])
x = np.array([[-1., 0, 1., 2.],
              [0., 1., 2., 3.]])
y = sigmoid(A_e.dot(x) + b_e)

A, b, r = solve_logistic(x, y, learning_rate=1, max_iters=1000000, tol=1.e-14)

def J(A, b, x, y):
    J_ = -y * np.log(sigmoid(A.dot(x) + b)) - (1. - y) * np.log(1 - sigmoid(A.dot(x) + b))
    return np.linalg.norm(J_)

def R(A, b, x, y):
    Nt = x.shape[1]
    return sigmoid(A.dot(x) + b.dot(np.ones(Nt).reshape(1,Nt))) - y

In [ ]:
J(A, b, x, y) - J(A_e, b_e, x, y)

In [ ]:
np.linalg.norm(R(A, b, x, y), 'fro')

In [ ]:
np.linalg.norm(R(A_e, b_e, x, y), 'fro')

In [ ]:
A, A_e

In [ ]:
b, b_e

In [ ]:


In [ ]:
np.amax(x, 0)

In [ ]:
np.vectorize(max)(x, 0.)

In [ ]:
np.max(np.abs(x))

In [ ]: