In [1]:
import sys
sys.path.insert(0, '../models')
In [2]:
import pandas as pd
import numpy as np
In [3]:
from sklearn.decomposition import PCA
In [4]:
%matplotlib inline
In [5]:
import matplotlib.pyplot as plt
In [6]:
from UFCNN1 import draw_model, print_nodes_shapes, save_neuralnet, ufcnn_model_concat, ufcnn_model_sum, ufcnn_model, \
prepare_tradcom_classification, train_and_predict_classification, get_tradcom_normalization
In [7]:
from UFCNN1 import standardize_inputs, ufcnn_model_seq, print_layers_shapes
In [8]:
from keras.optimizers import SGD
In [9]:
sgd = SGD(lr=0.0005, decay=1e-6, momentum=0.9, nesterov=True)
In [10]:
model_concat = ufcnn_model_concat(regression = False, output_dim=3, features=32,
loss="categorical_crossentropy", sequence_length=500, optimizer=sgd )
In [11]:
print_nodes_shapes(model_concat)
In [9]:
model_concat = ufcnn_model_concat(regression = False, output_dim=3, features=32,
loss="categorical_crossentropy", sequence_length=500, optimizer=sgd )
In [13]:
print_nodes_shapes(model_concat)
In [11]:
model_concat_regression = ufcnn_model_concat(regression = True, output_dim=1, features=32,
loss="mse", sequence_length=500, optimizer=sgd )
In [12]:
print_nodes_shapes(model_concat_regression)
In [9]:
model_concat = ufcnn_model(regression = False, output_dim=3, features=32,
loss="categorical_crossentropy", sequence_length=500, optimizer=sgd, mode='concat' )
In [10]:
print_nodes_shapes(model_concat)
In [10]:
draw_model(model_concat)
In [ ]:
model_sum = ufcnn_model_sum(regression = False, output_dim=3, features=32,
loss="categorical_crossentropy", sequence_length=500, optimizer=sgd, mode='sum' )
In [14]:
model_sum = ufcnn_model(regression = False, output_dim=3, features=32,
loss="categorical_crossentropy", sequence_length=500, optimizer=sgd, mode='sum' )
In [15]:
print_nodes_shapes(model_sum)
In [18]:
print_nodes_shapes(model_sum)
In [30]:
Xdf = pd.read_csv('../models/training_data_large/prod_data_20130729v.txt', sep=" ", index_col = 0, header = None,)
In [12]:
ydf = pd.read_csv('../models/training_data_large/signal_20130729v.csv', index_col = 0, names = ['signal',], )
In [13]:
ydf
Out[13]:
In [14]:
Xdf.describe()
Out[14]:
In [11]:
indicators = Xdf[range(6,33)]
In [15]:
indicators = indicators.sub(indicators.mean())
In [16]:
indicators = indicators.div(indicators.std())
In [17]:
indicators.describe()
Out[17]:
In [53]:
plt.figure(figsize=(18,12))
plt.plot(indicators[-5000:][[6, 7, 8, 9, 10]])
Out[53]:
In [40]:
pca = PCA(n_components=15)
pca.fit(indicators.values)
Out[40]:
In [41]:
pca.explained_variance_ratio_
Out[41]:
In [33]:
pca.explained_variance_
Out[33]:
In [42]:
np.sum(pca.explained_variance_ratio_)
Out[42]:
In [52]:
plt.figure(figsize=(18,12))
plt.plot(pca_ind[-5000:, 0:5])
Out[52]:
In [44]:
pca.components_.shape
Out[44]:
In [45]:
pca_ind = pca.transform(indicators.values)
In [79]:
pca_ind_df = pd.DataFrame(data=pca_ind, index=indicators.index)
In [78]:
Xdf.shape
Out[78]:
In [121]:
plt.figure(figsize=(18,12))
#plt.plot(Xdf[0:10000][[2, 4]])
#plt.plot(ydf.add(4516)[0:10000])
plt.plot(ba_df_std.add(4520)[0:10000][[2, 4]])
#plt.plot(Xdf_std.add(4520)[0:10000][[2, 4]])
#plt.plot(Xdf.div(Xdf.std()).add(4510)[0:10000][[3, 5]])
#plt.plot(pca_ind_df.add(4505)[0:10000][[0,]])
#plt.plot(indicators.add(4505)[0:10000][[6,]])
Out[121]:
In [110]:
plt.figure(figsize=(18,12))
plt.plot(Xdf[0:1000][[2, 4]])
plt.plot(ydf.add(4512)[0:1000])
Out[110]:
In [97]:
Xdf_std = Xdf.sub(Xdf.mean())
Xdf_std = Xdf_std.div(Xdf_std.std())
In [107]:
ba_df_std = Xdf[[2, 4]].sub(Xdf[[2, 4]].mean(axis=1).mean())
ba_df_std = ba_df_std.div(ba_df_std.std())
In [108]:
ba_df_std.describe()
Out[108]:
In [99]:
Xdf_std.describe()
Out[99]:
In [15]:
(mean, std) = get_tradcom_normalization(filename = '../models/training_data_large/prod_data_20130729v.txt', mean = None, std = None)
In [34]:
ba_std = standardize_columns(Xdf[[2, 4]])
In [35]:
ba_std.describe()
Out[35]:
In [35]:
Xdf_std = standardize_inputs(source='../models/training_data_large/prod_data_20130729v.txt', colgroups = [[2, 4], [3, 5]])
In [36]:
Xdf_std = standardize_inputs(source=Xdf, colgroups = [[2, 4], [3, 5]])
In [15]:
Xdf_std.describe()
Out[15]:
In [18]:
type(Xdf_std)
Out[18]:
In [15]:
(X, y) = prepare_tradcom_classification(training = True, sequence_length = 500, features = 4, output_dim = 3, filename='../models/training_data_large/prod_data_20130729v.txt')
In [17]:
plt.figure(figsize=(18,6))
plt.plot(X[-24000][:, 0:2]+3)
plt.plot(y[-24000][:, 0:2])
Out[17]:
In [11]:
(X, y) = prepare_tradcom_classification(training = True, sequence_length = 5000, features = 4, output_dim = 3, filename='../models/training_data_large/prod_data_20130729v.txt')
In [13]:
plt.figure(figsize=(18,6))
plt.plot(X[-20000][:, 0:2]+3)
plt.plot(y[-20000][:, 0:2])
Out[13]:
In [12]:
(X, y) = prepare_tradcom_classification(training = True, stack=False, sequence_length = 500, features = 4, output_dim = 3, filename='../models/training_data_large/prod_data_20130729v.txt')
In [29]:
X.shape
Out[29]:
In [13]:
X = X.reshape((1, X.shape[0], X.shape[1]))
In [14]:
X.shape
Out[14]:
In [30]:
y.shape
Out[30]:
In [15]:
y = y.reshape((1, y.shape[0], y.shape[1]))
In [16]:
y.shape
Out[16]:
In [10]:
model = ufcnn_model_concat(day_data_length=50000, regression = False, output_dim=3, features=4,
loss="categorical_crossentropy", sequence_length=500, optimizer=sgd )
In [11]:
print_nodes_shapes(model)
In [17]:
history = model.fit({'input': X, 'output': y},
verbose=2,
nb_epoch=1,
shuffle=False,
batch_size=1)
In [19]:
history.params
Out[19]:
In [20]:
print_nodes_shapes(model)
In [16]:
model = ufcnn_model_seq(regression = False, output_dim=3, features=4,
loss="categorical_crossentropy", sequence_length=500, optimizer=sgd )
In [17]:
print_layers_shapes(model)
In [23]:
history = model.fit(X, y,
verbose=2,
nb_epoch=1,
batch_size=1
)
In [ ]: