In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os, re, sys, time

# sklearn stuff
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.externals import joblib

# keras stuff
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Conv2D, MaxPooling2D, Flatten
from keras.optimizers import SGD
from keras.metrics import binary_accuracy
from keras import callbacks

# scipy stuff
from scipy.interpolate import interp1d
from scipy.stats import mode

# preprocessing stuff
from preprocessingTR import *

%matplotlib inline


Using Theano backend.

In [2]:
print('Loading model')
t0 = time.time()
model = keras.models.load_model('./Models/ConvNetC.h5')
t1 = time.time()
print('Loaded model in %.2fs' % (t1 - t0))


Loading model
Loaded model in 39.37s

In [43]:
import multiprocessing.dummy as mp 
import multiprocessing

a = 'who there'

def do_print(s):
    with open('./tst/%s.txt' % s, 'w') as f:
        f.write('%s %s' % (a, s))

t0 = time.time()
p=mp.Pool(multiprocessing.cpu_count())
p.map(do_print,range(0,1000)) # range(0,1000) if you want to replicate your example
p.close()
p.join()
t1 = time.time()
print('Finished in %.2fs' % (t1-t0))


Finished in 1.54s

In [36]:
t0 = time.time()
for i in range(1000):
    do_print(i)
t1 = time.time()
print('Finished in %.2fs' % (t1-t0))


Finished in 2.77s

In [18]:
for layer in model.layers:
    print(layer.name, '\t', layer.output_shape)


conv2d_1 	 (None, 3, 300, 128)
conv2d_2 	 (None, 3, 300, 128)
conv2d_3 	 (None, 3, 300, 128)
max_pooling2d_1 	 (None, 1, 30, 128)
flatten_1 	 (None, 3840)
dense_1 	 (None, 512)
dropout_1 	 (None, 512)
dense_2 	 (None, 512)
dropout_2 	 (None, 512)
dense_3 	 (None, 3)

In [12]:
mp.get_output_shape_at(0)


Out[12]:
(None, 1, 30, 128)

In [13]:
mp.output_shape


Out[13]:
(None, 1, 30, 128)

In [16]:
mp.name


Out[16]:
'max_pooling2d_1'

In [ ]:


In [ ]:


In [3]:
user_lookup = {0: 'other', 1: 'Mukund', 2: 'Frank'}
user2netid = {0: 'other', 1:'ms2666', 2: 'fc249'}

In [4]:
def load_data(xpath='./Data_test/processed/full.pickle'):
    return pd.read_pickle(xpath)

def scale_data(df, model_dir='./Models/'):
    """
    Scale data
    """
    xTe = df.values
    ss = joblib.load(model_dir + 'ss.pkl')
    xTe = ss.transform(xTe)

    return xTe

In [5]:
u_dict = generate_udict('./Data_test/')

In [140]:
u_dict


Out[140]:
{0: [0]}

In [180]:
# preprocess and save training data
preprocess_and_save(u_dict, dir_name='./Data_test/processed/', begin_idx=150)


Processing user 0, trial 0, 0 remaining
FAILED

In [181]:
merge_incremental(base='./Data_test/processed/')


full.pickle not found... Building now

In [182]:
# load and split data
data = load_data()
xTe = scale_data(data)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-182-a1e535666701> in <module>()
      1 # load and split data
      2 data = load_data()
----> 3 xTe = scale_data(data)

<ipython-input-4-474e93f8545a> in scale_data(df, model_dir)
      8     xTe = df.values
      9     ss = joblib.load(model_dir + 'ss.pkl')
---> 10     xTe = ss.transform(xTe)
     11 
     12     return xTe

/home/ubicomp/anaconda3/lib/python3.5/site-packages/sklearn/preprocessing/data.py in transform(self, X, y, copy)
    644         X = check_array(X, accept_sparse='csr', copy=copy,
    645                         ensure_2d=False, warn_on_dtype=True,
--> 646                         estimator=self, dtype=FLOAT_DTYPES)
    647 
    648         if X.ndim == 1:

/home/ubicomp/anaconda3/lib/python3.5/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    414                              " minimum of %d is required%s."
    415                              % (n_samples, shape_repr, ensure_min_samples,
--> 416                                 context))
    417 
    418     if ensure_min_features > 0 and array.ndim == 2:

ValueError: Found array with 0 sample(s) (shape=(0, 0)) while a minimum of 1 is required by StandardScaler.

In [183]:
xTe.shape


Out[183]:
(34, 900)

In [172]:
plt.figure(figsize=(15,4))
for row in xTe:
    plt.plot(row)



In [173]:
xTe_conv = xTe.reshape(-1, 3, 300, 1)

In [174]:
preds = model.predict(xTe_conv)

In [175]:
user_lookup[mode(preds.round().argmax(axis=1)).mode[0]]


Out[175]:
'Frank'

In [190]:
fig, ax = plt.subplots()
fig.set_figheight(8)
fig.set_figwidth(3)
ax.bar([0, 1, 2], preds.mean(axis=0))
ax.set_xticks([0,1,2])
ax.set_xticklabels(['Other', 'Mukund', 'Frank'])
ax.title.set_text('Softmax Probabilities')
fig.savefig('./Data_test/results.png')
plt.show()



In [ ]:


In [ ]:


In [7]:
loss = np.random.rand(100)
accuracy = np.random.rand(100)
x = np.arange(100)

In [24]:
fig, axarr = plt.subplots(2, 1, sharex=True)
fig.set_figwidth(10)
fig.set_figheight(6)

axarr[0].plot(x, loss)
axarr[0].set_ylabel('Loss')

axarr[1].plot(x, accuracy)
axarr[1].set_xlabel('Epoch')
axarr[1].set_ylabel('Accuracy')

fig.savefig('./Data/train_results.png')



In [18]:
conf = np.random.randint(0, 10, (3,3))

In [25]:
plt.figure(figsize=(6,6))
heatmap = plt.pcolor(conf, cmap=plt.cm.Blues, alpha=0.8, edgecolors='k')



In [26]:
plt.pcolor?

In [61]:
fig, ax = plt.subplots()
fig.suptitle('Confusion Matrix', fontsize = 20)
fig.subplots_adjust(top=0.8)
ax.matshow(conf, cmap=plt.cm.Blues)
ax.set_ylabel('Actual')
ax.set_xlabel('Predicted')
ax.xaxis.set_label_position('top')

for (i, j), z in np.ndenumerate(conf):
    ax.text(j, i, '{:0.1f}'.format(z), ha='center', va='center',
            bbox=dict(boxstyle='round', facecolor='white', edgecolor='0.3'))
fig.savefig('./Data/confmat.png')



In [52]:
from sklearn.metrics import confusion_matrix

In [53]:
y = np.random.randint(0, 3, 10)
yhat = y.copy()

In [54]:
for i in range(5):
    idx = np.random.randint(0, 10)
    yhat[idx] += 1
    yhat[idx] = yhat[idx] % 3

In [59]:
confusion_matrix(y, yhat)


Out[59]:
array([[3, 2, 0],
       [0, 2, 1],
       [2, 0, 0]])

In [60]:
for a,b in zip(y, yhat):
    print(a, b, a==b)


2 0 False
1 1 True
0 0 True
0 1 False
1 2 False
2 0 False
1 1 True
0 0 True
0 1 False
0 0 True

In [ ]: