In [1]:
from __future__ import print_function
In [2]:
%matplotlib inline
In [3]:
import numpy as np
import matplotlib.pyplot as plt
In [4]:
with np.load("data_files.npz") as data:
X_train = data['X_train']
Y_train = data['Y_train']
X_test = data['X_test']
Y_test = data['Y_test']
X_comp = data['X_comp']
del data
Work in floating point this time
In [5]:
X_train = np.float64(X_train)
X_test = np.float64(X_test)
X_comp = np.float64(X_comp)
In [6]:
def convert_to_spectra(X):
out = []
for row in X:
xfft = np.fft.fft(row)
n = len(xfft)
half_n = np.ceil(n/2.0)
xfft = (2.0 / n) * xfft[1:half_n]
out.append(np.abs(xfft))
out = np.array(out)
return out
In [7]:
X_train_spectra = convert_to_spectra(X_train)
X_test_spectra = convert_to_spectra(X_test)
X_comp_spectra = convert_to_spectra(X_comp)
In [8]:
X_train_spectra.shape
Out[8]:
In [9]:
plt.plot(X_train_spectra[0])
Out[9]:
In [10]:
def moving_average(X, n=3):
ret = []
for row in X:
row = np.cumsum(row)
row[n:] = row[n:] - row[:-n]
row = row[n - 1:] / n
ret.append(row)
ret = np.array(ret)
return ret
In [11]:
X_train_spectra = moving_average(X_train_spectra, n=5)
X_test_spectra = moving_average(X_test_spectra, n=5)
X_comp_spectra = moving_average(X_comp_spectra, n=5)
In [12]:
plt.plot(X_train_spectra[0])
Out[12]:
In [13]:
print(X_train_spectra.min(), X_train_spectra.max())
print(X_test_spectra.min(), X_test_spectra.max())
print(X_comp_spectra.min(), X_comp_spectra.max())
In [14]:
X_train_spectra = np.int16(X_train_spectra)
X_test_spectra = np.int16(X_test_spectra)
X_comp_spectra = np.int16(X_comp_spectra)
In [15]:
X_train_spectra = np.vstack((X_train_spectra, X_test_spectra))
Y_train = np.concatenate((Y_train, Y_test), axis=0)
In [16]:
for_google = np.c_[Y_train, X_train_spectra]
np.savetxt("X_train_spectra_ave_goog_everything.csv", for_google, delimiter=",", fmt='%i')
In [17]:
for_google.shape
Out[17]:
In [18]:
print(X_train_spectra.shape)
print(Y_train.shape)
print(X_test_spectra.shape)
print(Y_test.shape)
print(X_comp_spectra.shape)
In [19]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, verbose=True,max_depth=None,min_samples_split=1, random_state=0)
model.fit(X_train_spectra,Y_train)
Out[19]:
In [20]:
my_score = model.score(X_test_spectra,Y_test)
print(my_score)
In [21]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
In [22]:
Y_pred = model.predict(X_test_spectra)
In [23]:
accuracy_score(Y_test, Y_pred)
Out[23]:
In [24]:
print(classification_report(Y_test, Y_pred))
In [25]:
confusion_matrix(Y_test, Y_pred, labels=[0, 1])
Out[25]:
In [26]:
Y_comp = model.predict(X_comp_spectra)
In [27]:
np.savetxt("sklearn_spectra_ave_everything.csv", np.array(Y_comp,dtype=int), delimiter=",", fmt='%i')
In [26]: