Neural networks can learn complex interactions among features, making them remarkably powerful classifiers. Even a simple network without many hidden layers far outperforms logistic regression on the Iris data. Let's use Keras running on top of TensorFlow to see a neural network classifier in action.
In [83]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
from sklearn import datasets
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.utils import np_utils
In [8]:
iris = datasets.load_iris()
iris_df = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
columns= iris['feature_names'] + ['target'])
In [9]:
iris_df.head()
Out[9]:
Targets 0, 1, 2 correspond to three species: setosa, versicolor, and virginica.
In [17]:
sns.pairplot(iris_df, hue="target")
Out[17]:
In [22]:
X = iris_df.values[:, :4]
Y = iris_df.values[: , 4]
In [23]:
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, train_size=0.5, random_state=0)
In [24]:
lr = LogisticRegressionCV()
lr.fit(train_X, train_Y)
Out[24]:
In [26]:
print("Accuracy = {:.2f}".format(lr.score(test_X, test_Y)))
In [27]:
# Let's Encode the Output in a vector (one hot encoding)
# since this is what the network outputs
def one_hot_encode_object_array(arr):
'''One hot encode a numpy array of objects (e.g. strings)'''
uniques, ids = np.unique(arr, return_inverse=True)
return np_utils.to_categorical(ids, len(uniques))
train_y_ohe = one_hot_encode_object_array(train_Y)
test_y_ohe = one_hot_encode_object_array(test_Y)
In [28]:
model = Sequential()
model.add(Dense(16, input_shape=(4,)))
model.add(Activation("sigmoid"))
In [29]:
# define output layer
model.add(Dense(3))
# softmax is used here, because there are three classes (sigmoid only works for two classes)
model.add(Activation("softmax"))
In [30]:
# define loss function and optimization
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
What's happening here?
In [32]:
model.fit(train_X, train_y_ohe, epochs=100, batch_size=1, verbose=0)
Out[32]:
In [33]:
loss, accuracy = model.evaluate(test_X, test_y_ohe, verbose=0)
print("Accuracy = {:.2f}".format(accuracy))
Nice! 14% more accurate than logistic regression. Although, you always have to wonder if we're overfitting...
In [34]:
stochastic_net = Sequential()
stochastic_net.add(Dense(16, input_shape=(4,)))
stochastic_net.add(Activation("sigmoid"))
stochastic_net.add(Dense(3))
stochastic_net.add(Activation("softmax"))
stochastic_net.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=["accuracy"])
In [35]:
stochastic_net.fit(train_X, train_y_ohe, epochs=100, batch_size=1, verbose=0)
Out[35]:
In [36]:
loss, accuracy = stochastic_net.evaluate(test_X, test_y_ohe, verbose=0)
print("Accuracy = {:.2f}".format(accuracy))
based on Mike William's Getting Started with Deep Learning on safaribooksonline
based on https://github.com/wxs/keras-mnist-tutorial/blob/master/MNIST%20in%20Keras.ipynb
In [32]:
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
In [33]:
# show sample data
for i in range(9):
plt.subplot(3,3,i+1)
plt.imshow(X_train[i], cmap='gray', interpolation='none')
plt.title("Class {}".format(y_train[i]))
In [34]:
X_train.shape
Out[34]:
In [84]:
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
# without scaling, the network performs very poorly (~40% accuracy)
X_train /= 255
X_test /= 255
now we have an input vector of size 784, encoding each pixel
In [76]:
Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)
In [77]:
model = Sequential()
In [78]:
# Hidden Layer
model.add(Dense(512, input_shape=(784,)))
# use a rectified linear unit as activation
# bascially a line y =x for x ≥ 0; 0 otherwise
model.add(Activation("relu"))
note, you can also add a dropout rate (say 0.2) to prevent overfiting
In [79]:
# Output
model.add(Dense(10))
model.add(Activation("softmax"))
In [80]:
model.compile(loss='categorical_crossentropy',
optimizer='adam', metrics=["accuracy"])
In [81]:
model.fit(X_train, Y_train,
batch_size=128, epochs=4,
verbose=1)
Out[81]:
In [82]:
loss, accuracy = model.evaluate(X_test, Y_test, verbose=1)
print("Accuracy = {:.2f}".format(accuracy))
This single layer network is 98% accurate—how amazing!
In [5]:
from IPython.core.display import HTML
HTML("""
<style>
div.text_cell_render h1 { /* Main titles bigger, centered */
font-size: 2.2em;
line-height:1.4em;
text-align:left;
}
div.text_cell_render h2 { /* Parts names nearer from text */
font-size: 1.8em;
}
div.text_cell_render { /* Customize text cells */
font-family: sans-serif;
font-size:1.5em;
}
</style>
""")
Out[5]: