In [56]:
import numpy as np
import h5py
from sklearn import svm, cross_validation
from sklearn.naive_bayes import MultinomialNB
In [57]:
# First we load the file
file_location = '../results_database/text_wall_street.hdf5'
run_name = '/low-resolution'
In [58]:
# Nexa parameters
Nspatial_clusters = 5
Ntime_clusters = 15
Nembedding = 3
parameters_string = '/' + str(Nspatial_clusters)
parameters_string += '-' + str(Ntime_clusters)
parameters_string += '-' + str(Nembedding)
f = h5py.File(file_location, 'r')
nexa = f[run_name + parameters_string]
In [59]:
# Now we extract the time and the code vectors
time = nexa['time']
code_vectors = nexa['code-vectors']
code_vectors_distance = nexa['code-vectors-distance']
code_vectors_softmax = nexa['code-vectors-softmax']
code_vectors_winner = nexa['code-vectors-winner']
In [60]:
# Now we need to get the letters and align them
text_directory = '../data/wall_street_letters.npy'
letters_sequence = np.load(text_directory)
Nletters = len(letters_sequence)
symbols = set(letters_sequence)
In [61]:
# Parameters
N = 5000
delay = 5
In [62]:
# Make prediction with scikit-learn
X = code_vectors_winner[:(N-delay)]
y = letters_sequence[delay:N]
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
In [63]:
clf = svm.SVC(C=1.0, cache_size=200, kernel='linear')
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test) * 100
print('SVM linear score', score)
clf_rbf = svm.SVC(C=1.0, cache_size=200, kernel='rbf')
clf_rbf.fit(X_train, y_train)
score = clf_rbf.score(X_test, y_test) * 100
print('SVM RBF score', score)
clf_b = MultinomialNB()
clf_b.fit(X_train, y_train)
score = clf_b.score(X_test, y_test) * 100
print('Multinomial score', score)
In [64]:
# Make prediction with scikit-learn
X = code_vectors_softmax[:(N-delay)]
y = letters_sequence[delay:N]
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
In [65]:
clf = svm.SVC(C=1.0, cache_size=200, kernel='linear')
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test) * 100
print('SVM linear score', score)
clf_rbf = svm.SVC(C=1.0, cache_size=200, kernel='rbf')
clf_rbf.fit(X_train, y_train)
score = clf_rbf.score(X_test, y_test) * 100
print('SVM RBF score', score)
clf_b = MultinomialNB()
clf_b.fit(X_train, y_train)
score = clf_b.score(X_test, y_test) * 100
print('Multinomial score', score)
In [66]:
# Make prediction with scikit-learn
X = code_vectors[:(N-delay)]
y = letters_sequence[delay:N]
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
In [67]:
clf = svm.SVC(C=1.0, cache_size=200, kernel='linear')
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test) * 100
print('SVM linear score', score)
clf_rbf = svm.SVC(C=1.0, cache_size=200, kernel='rbf')
clf_rbf.fit(X_train, y_train)
score = clf_rbf.score(X_test, y_test) * 100
print('SVM RBF score', score)
clf_b = MultinomialNB()
clf_b.fit(X_train, y_train)
score = clf_b.score(X_test, y_test) * 100
print('Multinomial score', score)
In [68]:
# Make prediction with scikit-learn
X = code_vectors_distance[:(N-delay)]
y = letters_sequence[delay:N]
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
In [69]:
clf = svm.SVC(C=1.0, cache_size=200, kernel='linear')
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test) * 100
print('SVM linear score', score)
clf_rbf = svm.SVC(C=1.0, cache_size=200, kernel='rbf')
clf_rbf.fit(X_train, y_train)
score = clf_rbf.score(X_test, y_test) * 100
print('SVM RBF score', score)
clf_b = MultinomialNB()
clf_b.fit(X_train, y_train)
score = clf_b.score(X_test, y_test) * 100
print('Multinomial score', score)
In [70]:
from sklearn import preprocessing
In [71]:
# Make prediction with scikit-learn
X = code_vectors_winner[:(N-delay)]
y = letters_sequence[delay:N]
X = preprocessing.scale(X)
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
clf = svm.SVC(C=1.0, cache_size=200, kernel='linear')
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test) * 100
print('SVM linear score', score)
clf_rbf = svm.SVC(C=1.0, cache_size=200, kernel='rbf')
clf_rbf.fit(X_train, y_train)
score = clf_rbf.score(X_test, y_test) * 100
print('SVM RBF score', score)
In [72]:
# Make prediction with scikit-learn
X = code_vectors_softmax[:(N-delay)]
y = letters_sequence[delay:N]
X = preprocessing.scale(X)
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
clf = svm.SVC(C=1.0, cache_size=200, kernel='linear')
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test) * 100
print('SVM linear score', score)
clf_rbf = svm.SVC(C=1.0, cache_size=200, kernel='rbf')
clf_rbf.fit(X_train, y_train)
score = clf_rbf.score(X_test, y_test) * 100
print('SVM RBF score', score)
In [73]:
# Make prediction with scikit-learn
X = code_vectors_distance[:(N-delay)]
y = letters_sequence[delay:N]
X = preprocessing.scale(X)
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
clf = svm.SVC(C=1.0, cache_size=200, kernel='linear')
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test) * 100
print('SVM linear score', score)
clf_rbf = svm.SVC(C=1.0, cache_size=200, kernel='rbf')
clf_rbf.fit(X_train, y_train)
score = clf_rbf.score(X_test, y_test) * 100
print('SVM RBF score', score)
In [74]:
# Make prediction with scikit-learn
X = code_vectors_distance[:(N-delay)]
y = letters_sequence[delay:N]
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.10)
clf = svm.SVC(C=1.0, cache_size=200, kernel='linear')
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test) * 100
print('SVM linear score', score)
clf_rbf = svm.SVC(C=1.0, cache_size=200, kernel='rbf')
clf_rbf.fit(X_train, y_train)
score = clf_rbf.score(X_test, y_test) * 100
print('SVM RBF score', score)
In [ ]: