In [1]:
import numpy as np
from sklearn import svm, cross_validation
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import h5py
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import sys
sys.path.append("../")
In [2]:
# Data to use
Ndata = 10000
Nside = 30
# First we load the file
file_location = '../results_database/text_wall_street_columns_30_Ndata20.hdf5'
# Now we need to get the letters and align them
text_directory = '../data/wall_street_letters_30.npy'
letters_sequence = np.load(text_directory)
Nletters = len(letters_sequence)
symbols = set(letters_sequence)
targets = []
for index in range(Ndata):
letter_index = index // Nside
targets.append(letters_sequence[letter_index])
# Transform to array
targets = np.array(targets)
print(list(enumerate(targets[0:40])))
In [3]:
# Calculate the predictions
scores_mixed = []
scores_indp = []
max_lags = np.arange(2, 17, 2)
# Nexa parameters
Ntime_clusters = 20
Nembedding = 3
In [4]:
for max_lag in max_lags:
print(max_lag)
Nspatial_clusters = max_lag
# Here calculate the scores for the mixes
run_name = '/test' + str(max_lag)
f = h5py.File(file_location, 'r')
parameters_string = '/' + str(Nspatial_clusters)
parameters_string += '-' + str(Ntime_clusters)
parameters_string += '-' + str(Nembedding)
nexa = f[run_name + parameters_string]
cluster_to_index = nexa['cluster_to_index']
code_vectors_softmax = np.array(nexa['code-vectors-softmax'])
# Now we need to classify
X = code_vectors_softmax[:Ndata]
y = targets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
clf_linear = LinearDiscriminantAnalysis()
clf_linear.fit(X_train, y_train)
score = clf_linear.score(X_test, y_test) * 100.0
scores_mixed.append(score)
# Here calculate the scores for the independent
run_name = '/indep' + str(max_lag)
f = h5py.File(file_location, 'r')
parameters_string = '/' + str(Nspatial_clusters)
parameters_string += '-' + str(Ntime_clusters)
parameters_string += '-' + str(Nembedding)
nexa = f[run_name + parameters_string]
cluster_to_index = nexa['cluster_to_index']
code_vectors_softmax = np.array(nexa['code-vectors-softmax'])
# Now we need to classify
X = code_vectors_softmax[:Ndata]
y = targets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
clf_linear = LinearDiscriminantAnalysis()
clf_linear.fit(X_train, y_train)
score = clf_linear.score(X_test, y_test) * 100.0
scores_indp.append(score)
In [5]:
fig = plt.figure(figsize=(16, 12))
ax = fig.add_subplot(111)
ax.plot(max_lags, scores_indp, 'o-', label='independent', lw=2, markersize=10)
ax.plot(max_lags, scores_mixed, 'o-', label='mixed', lw=2, markersize=10)
ax.set_ylim(0, 105)
ax.set_ylabel('Accuracy')
ax.set_xlabel('Max lags')
ax.set_title('Accuracy vs Max Lags (Inclusive Policy)')
ax.legend()
Out[5]:
In [6]:
# Data to use
Ndata = 10000
Nside = 30
# Calculate the predictions
scores_mixed = []
scores_indp = []
max_lags = np.arange(2, 17, 2)
# Nexa parameters
Ntime_clusters = 20
Nembedding = 3
# First we load the file
file_location = '../results_database/text_wall_street_columns_30_Ndata20.hdf5'
# Now we need to get the letters and align them
text_directory = '../data/wall_street_letters_30.npy'
letters_sequence = np.load(text_directory)
Nletters = len(letters_sequence)
symbols = set(letters_sequence)
In [7]:
for max_lag in max_lags:
targets = []
aux = Nside - max_lag
for index in range(Ndata):
letter_index = index // aux
targets.append(letters_sequence[letter_index])
# Transform to array
targets = np.array(targets)
print(max_lag)
Nspatial_clusters = max_lag
# Here calculate the scores for the mixes
run_name = '/test' + str(max_lag)
f = h5py.File(file_location, 'r')
parameters_string = '/' + str(Nspatial_clusters)
parameters_string += '-' + str(Ntime_clusters)
parameters_string += '-' + str(Nembedding)
nexa = f[run_name + parameters_string]
cluster_to_index = nexa['cluster_to_index']
code_vectors_softmax = np.array(nexa['code-vectors-softmax'])
# Take only the ondes you need
index = np.arange(Ndata * 3)
index = index[(index % 30) < aux]
code_vectors_softmax = code_vectors_softmax[index, :]
if False:
print(index[0:40])
print(code_vectors_softmax.shape)
print(list(enumerate(targets[0:40])))
print(targets.size)
print(code_vectors_softmax.shape)
print(Ndata)
# Now we need to classify
X = code_vectors_softmax[:Ndata]
y = targets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
clf_linear = LinearDiscriminantAnalysis()
clf_linear.fit(X_train, y_train)
score = clf_linear.score(X_test, y_test) * 100.0
scores_mixed.append(score)
# Here calculate the scores for the independent
run_name = '/indep' + str(max_lag)
f = h5py.File(file_location, 'r')
parameters_string = '/' + str(Nspatial_clusters)
parameters_string += '-' + str(Ntime_clusters)
parameters_string += '-' + str(Nembedding)
nexa = f[run_name + parameters_string]
cluster_to_index = nexa['cluster_to_index']
code_vectors_softmax = np.array(nexa['code-vectors-softmax'])
# Take only the ondes you need
index = np.arange(Ndata * 3)
index = index[(index % 30) < aux]
code_vectors_softmax = code_vectors_softmax[index, :]
# Now we need to classify
X = code_vectors_softmax[:Ndata]
y = targets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
clf_linear = LinearDiscriminantAnalysis()
clf_linear.fit(X_train, y_train)
score = clf_linear.score(X_test, y_test) * 100.0
scores_indp.append(score)
In [8]:
fig = plt.figure(figsize=(16, 12))
ax = fig.add_subplot(111)
ax.plot(max_lags, scores_indp, 'o-', label='independent', lw=2, markersize=10)
ax.plot(max_lags, scores_mixed, 'o-', label='mixed', lw=2, markersize=10)
ax.set_ylim(0, 105)
ax.set_ylabel('Accuracy')
ax.set_xlabel('Max lags')
ax.set_title('Accuracy vs Max Lags (Exclusive Policy)')
ax.legend()
Out[8]:
In [9]:
# Data to use
Ndata = 10000
Nside = 30
# First we load the file
file_location = '../results_database/text_wall_street_columns_30_semi_constantNdata.hdf5'
# Now we need to get the letters and align them
text_directory = '../data/wall_street_letters_30.npy'
letters_sequence = np.load(text_directory)
Nletters = len(letters_sequence)
symbols = set(letters_sequence)
targets = []
for index in range(Ndata):
letter_index = index // Nside
targets.append(letters_sequence[letter_index])
# Transform to array
targets = np.array(targets)
print(list(enumerate(targets[0:40])))
In [10]:
# Calculate the predictions
scores_mixed = []
scores_indp = []
max_lags = np.arange(2, 17, 2)
# Nexa parameters
Nembedding = 3
In [11]:
for max_lag in max_lags:
print('lag', max_lag)
Nspatial_clusters = max_lag
Ntime_clusters = 60 // max_lag
print('Ndata_clusters', Ntime_clusters)
# Here calculate the scores for the mixes
run_name = '/test' + str(max_lag)
f = h5py.File(file_location, 'r')
parameters_string = '/' + str(Nspatial_clusters)
parameters_string += '-' + str(Ntime_clusters)
parameters_string += '-' + str(Nembedding)
nexa = f[run_name + parameters_string]
cluster_to_index = nexa['cluster_to_index']
code_vectors_softmax = np.array(nexa['code-vectors-softmax'])
# Now we need to classify
X = code_vectors_softmax[:Ndata]
y = targets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
clf_linear = LinearDiscriminantAnalysis()
clf_linear.fit(X_train, y_train)
score = clf_linear.score(X_test, y_test) * 100.0
scores_mixed.append(score)
# Here calculate the scores for the independent
run_name = '/indep' + str(max_lag)
f = h5py.File(file_location, 'r')
parameters_string = '/' + str(Nspatial_clusters)
parameters_string += '-' + str(Ntime_clusters)
parameters_string += '-' + str(Nembedding)
nexa = f[run_name + parameters_string]
cluster_to_index = nexa['cluster_to_index']
code_vectors_softmax = np.array(nexa['code-vectors-softmax'])
# Now we need to classify
X = code_vectors_softmax[:Ndata]
y = targets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
clf_linear = LinearDiscriminantAnalysis()
clf_linear.fit(X_train, y_train)
score = clf_linear.score(X_test, y_test) * 100.0
scores_indp.append(score)
In [12]:
fig = plt.figure(figsize=(16, 12))
ax = fig.add_subplot(111)
ax.plot(max_lags, scores_indp, 'o-', label='independent', lw=2, markersize=10)
ax.plot(max_lags, scores_mixed, 'o-', label='mixed', lw=2, markersize=10)
ax.set_ylim(0, 105)
ax.set_ylabel('Accuracy')
ax.set_xlabel('Max lags')
ax.set_title('Accuracy vs Max Lags (Inclusive Policy)')
ax.legend()
Out[12]:
In [13]:
# Data to use
Ndata = 10000
Nside = 30
# Calculate the predictions
scores_mixed = []
scores_indp = []
max_lags = np.arange(2, 17, 2)
# Nexa parameters
Nembedding = 3
# First we load the file
file_location = '../results_database/text_wall_street_columns_30_semi_constantNdata.hdf5'
# Now we need to get the letters and align them
text_directory = '../data/wall_street_letters_30.npy'
letters_sequence = np.load(text_directory)
Nletters = len(letters_sequence)
symbols = set(letters_sequence)
In [14]:
for max_lag in max_lags:
targets = []
aux = Nside - max_lag
#Take the corresponding letters
for index in range(Ndata):
letter_index = index // aux
targets.append(letters_sequence[letter_index])
# Transform to array
targets = np.array(targets)
print('lags', max_lag)
Nspatial_clusters = max_lag
Ntime_clusters = 60 // max_lag
print('Ndata clusters', Ntime_clusters)
# Here calculate the scores for the mixes
run_name = '/test' + str(max_lag)
f = h5py.File(file_location, 'r')
parameters_string = '/' + str(Nspatial_clusters)
parameters_string += '-' + str(Ntime_clusters)
parameters_string += '-' + str(Nembedding)
nexa = f[run_name + parameters_string]
cluster_to_index = nexa['cluster_to_index']
code_vectors_softmax = np.array(nexa['code-vectors-softmax'])
# Take only the ondes you need
index = np.arange(Ndata * 3)
index = index[(index % 30) < aux]
code_vectors_softmax = code_vectors_softmax[index, :]
if False:
print(index[0:40])
print(code_vectors_softmax.shape)
print(list(enumerate(targets[0:40])))
print(targets.size)
print(code_vectors_softmax.shape)
print(Ndata)
# Now we need to classify
X = code_vectors_softmax[:Ndata]
y = targets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
clf_linear = LinearDiscriminantAnalysis()
clf_linear.fit(X_train, y_train)
score = clf_linear.score(X_test, y_test) * 100.0
scores_mixed.append(score)
# Here calculate the scores for the independent
run_name = '/indep' + str(max_lag)
f = h5py.File(file_location, 'r')
parameters_string = '/' + str(Nspatial_clusters)
parameters_string += '-' + str(Ntime_clusters)
parameters_string += '-' + str(Nembedding)
nexa = f[run_name + parameters_string]
cluster_to_index = nexa['cluster_to_index']
code_vectors_softmax = np.array(nexa['code-vectors-softmax'])
# Take only the ondes you need
index = np.arange(Ndata * 3)
index = index[(index % 30) < aux]
code_vectors_softmax = code_vectors_softmax[index, :]
# Now we need to classify
X = code_vectors_softmax[:Ndata]
y = targets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
clf_linear = LinearDiscriminantAnalysis()
clf_linear.fit(X_train, y_train)
score = clf_linear.score(X_test, y_test) * 100.0
scores_indp.append(score)
In [15]:
fig = plt.figure(figsize=(16, 12))
ax = fig.add_subplot(111)
ax.plot(max_lags, scores_indp, 'o-', label='independent', lw=2, markersize=10)
ax.plot(max_lags, scores_mixed, 'o-', label='mixed', lw=2, markersize=10)
ax.set_ylim(0, 105)
ax.set_ylabel('Accuracy')
ax.set_xlabel('Max lags')
ax.set_title('Accuracy vs Max Lags (Exclusive Policy)')
ax.legend()
Out[15]:
In [ ]:
g