In [1]:
from __future__ import print_function, unicode_literals, absolute_import, division
from six.moves import range, zip, map, reduce, filter
In [2]:
import numpy as np
import matplotlib.pyplot as plt
from IPython import display
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
In [3]:
import seaborn as sns
sns.set_style('whitegrid')
plt.rc('figure', figsize=(7.0, 5.0))
In [29]:
import keras
from keras import backend as K
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Activation
from keras.optimizers import Adam
from keras.callbacks import LambdaCallback
from keras.utils import np_utils
In [5]:
def plot_callback(func,p=20):
def plot_epoch_end(epoch,logs):
if epoch == 0 or (epoch+1) % p == 0:
plt.clf(); func(); plt.title('epoch %d' % (epoch+1))
display.clear_output(wait=True); display.display(plt.gcf())
def clear(*args):
plt.clf()
return LambdaCallback(on_epoch_end=plot_epoch_end,on_train_end=clear)
In [6]:
def plot_loss_acc(hist):
plt.figure(figsize=(15,4));
if len(hist.params['metrics']) == 2:
plt.subplot(121); plt.semilogy(hist.epoch,hist.history['loss'])
plt.xlabel('epoch'); plt.ylabel('loss'); plt.legend(['train'],loc='upper right')
plt.subplot(122); plt.plot(hist.epoch,hist.history['acc']);
plt.xlabel('epoch'); plt.ylabel('accuracy'); plt.legend(['train'],loc='lower right');
else:
plt.subplot(121); plt.semilogy(hist.epoch,hist.history['loss'], hist.epoch,hist.history['val_loss']);
plt.xlabel('epoch'); plt.ylabel('loss'); plt.legend(['train','test'],loc='upper right')
plt.subplot(122); plt.plot(hist.epoch,hist.history['acc'], hist.epoch,hist.history['val_acc'])
plt.xlabel('epoch'); plt.ylabel('accuracy'); plt.legend(['train','test'],loc='lower right');
In [7]:
iris = sns.load_dataset("iris")
iris.sample(10)
Out[7]:
In [8]:
sns.pairplot(iris, hue='species');
In [9]:
def label_encode(arr):
uniques, ids = np.unique(arr, return_inverse=True)
return ids
In [10]:
classes = ('setosa', 'versicolor', 'virginica')
labels = label_encode(classes)
for i,c in enumerate(classes):
print('%10s → %d' % (c, labels[i]))
In [11]:
def onehot_encode(arr):
uniques, ids = np.unique(arr, return_inverse=True)
return np_utils.to_categorical(ids, len(uniques))
In [12]:
classes = ('setosa', 'versicolor', 'virginica')
onehot = onehot_encode(classes)
for i,c in enumerate(classes):
print('%10s → [%d,%d,%d]' % (c, onehot[i,0], onehot[i,1], onehot[i,2]))
In [13]:
data = iris
feature_name = 'petal_length'
data = data[[feature_name,'species']]
In [14]:
X = data.values[:,0]
y = label_encode(data.values[:,1])
y_oh = onehot_encode(data.values[:,1])
N = len(y)
In [15]:
R = np.linspace(X.min()-1,X.max()+1,100)
Xp = np.zeros(X.shape[0])-.1
Rp = np.zeros(R.shape[0])-.2
def plot_all(model=None):
plt.scatter(X, Xp, c=y, cmap='jet');
plt.xlabel(feature_name)
if model is not None:
prob = model.predict(R)
yhat = np.argmax(prob,axis=1)
plt.scatter(R, Rp, c=yhat);
plt.plot(R,prob)
leg = plt.legend(map(lambda s:'p("%s")'%s,classes),loc='upper center',frameon=False,ncol=3)
plt.xlim(X.min()-1.5,X.max()+1.5)
plt.ylim(-.4,1.2)
plot_all()
In [16]:
model = Sequential()
model.add(Dense(16, input_shape=(1,)))
model.add(Activation('tanh'))
model.add(Dense(3))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
In [17]:
hist = model.fit(X,y_oh,batch_size=5,epochs=300,verbose=0,
callbacks=[plot_callback(lambda:plot_all(model))]);
In [18]:
plot_loss_acc(hist)
Use all features and split dataset in train and test subsets:
In [19]:
N = iris.shape[0] # number of data points / table rows
data = iris.sample(N,replace=False) # shuffle data
X = data.values[:,0:4]
y_oh = onehot_encode(data.values[:,4])
N_train = N//2 # random 50/50 train/test split
X_train, y_train = X[:N_train], y_oh[:N_train]
X_test, y_test = X[N_train:], y_oh[N_train:]
In [20]:
model = Sequential()
model.add(Dense(16, input_shape=(4,)))
model.add(Activation('tanh'))
model.add(Dense(3))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
In [21]:
hist = model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=200, verbose=0, batch_size=5)
In [22]:
plot_loss_acc(hist)
In [23]:
loss, accuracy = model.evaluate(X_train, y_train, verbose=0)
print('train set: loss = %.5f, accuracy = %.5f' % (loss,accuracy))
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print('test set: loss = %.5f, accuracy = %.5f' % (loss,accuracy))
In [24]:
N_train = 20 # only 20 of 150 samples for training, rest for testing
X_train, y_train = X[:N_train], y_oh[:N_train]
X_test, y_test = X[N_train:], y_oh[N_train:]
In [25]:
model = Sequential()
model.add(Dense(16, input_shape=(4,)))
model.add(Activation('tanh'))
model.add(Dense(16))
model.add(Activation('tanh'))
model.add(Dense(16))
model.add(Activation('tanh'))
model.add(Dense(3))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
In [26]:
hist = model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=1000, verbose=0, batch_size=5)
In [27]:
plot_loss_acc(hist)
In [28]:
loss, accuracy = model.evaluate(X_train, y_train, verbose=0)
print('train set: loss = %.5f, accuracy = %.5f' % (loss,accuracy))
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print('test set: loss = %.5f, accuracy = %.5f' % (loss,accuracy))