In [1]:

    
from __future__ import print_function
import numpy as np
import googleprediction

Google's APL library is setup to work well with command line applications. Mimic some of that behavior here.

Initialize Model



In [2]:

    
model = googleprediction.GooglePredictor(
    "myproject",
    "mybucket/X_train_spectra_ave_goog.csv",
    "tswift_fft_ave",
    "client_secrets.json")

List Available Models



In [ ]:

    
model.list()

Fit Model to Training Data in the Storage Bucket



In [ ]:

    
model.fit('CLASSIFICATION')

List Model Features



In [ ]:

    
model.get_params()

Load Data Files for Testing



In [8]:

    
with np.load("data_files.npz") as data:
    X_train = data['X_train']
    Y_train = data['Y_train']
    X_test = data['X_test']
    Y_test = data['Y_test']
    X_comp = data['X_comp']
del data



In [9]:

    
X_train = np.float64(X_train)
X_test = np.float64(X_test)
X_comp = np.float64(X_comp)

Compute Frequency Spectra for Input Features



In [10]:

    
def convert_to_spectra(X):
    out = []
    for row in X:
        xfft = np.fft.fft(row)
        n = len(xfft)
        half_n = np.ceil(n/2.0)
        xfft = (2.0 / n) * xfft[1:half_n]
        out.append(np.abs(xfft))
    out = np.array(out)
    return out



In [11]:

    
X_train_spectra = convert_to_spectra(X_train)
X_test_spectra = convert_to_spectra(X_test)
X_comp_spectra = convert_to_spectra(X_comp)

Smooth the Spectra



In [12]:

    
def moving_average(X, n=3):
    ret = []
    for row in X:
        row = np.cumsum(row)
        row[n:] = row[n:] - row[:-n]
        row = row[n - 1:] / n
        ret.append(row)
    ret = np.array(ret)
    return ret



In [13]:

    
X_train_spectra = moving_average(X_train_spectra, n=5)
X_test_spectra = moving_average(X_test_spectra, n=5)
X_comp_spectra = moving_average(X_comp_spectra, n=5)



In [14]:

    
X_train_spectra = np.int16(X_train_spectra)
X_test_spectra = np.int16(X_test_spectra)
X_comp_spectra = np.int16(X_comp_spectra)



In [15]:

    
Y_test.shape









    Out[15]:





(6720,)

Predict on the Test Set



In [16]:

    
out = model.predict(X_test_spectra)
print(out)









    



=======================
Making some predictions
=======================
[u'1' u'0' u'1' ..., u'1' u'1' u'1']



In [17]:

    
out = np.int16(out)
out.shape









    Out[17]:





(6720,)

Compute Performance on the Test Set



In [18]:

    
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix



In [19]:

    
print(classification_report(np.int16(Y_test), out))









    



             precision    recall  f1-score   support

          0       0.96      0.93      0.94      3381
          1       0.93      0.96      0.94      3339

avg / total       0.94      0.94      0.94      6720



In [20]:

    
confusion_matrix(Y_test, out, labels=[0, 1])









    Out[20]:





array([[3141,  240],
       [ 145, 3194]])



In [21]:

    
print(accuracy_score(Y_test, out))









    



0.942708333333

Save Predicitons



In [22]:

    
np.savetxt("gpapi_test_pred_fft.csv", np.array(out,dtype=int), delimiter=',', fmt='%i')



In [ ]: