In [1]:
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from IPython import display
from sklearn import metrics
In [2]:
%matplotlib inline
tf.logging.set_verbosity(tf.logging.ERROR)
import warnings; warnings.simplefilter('ignore')
In [3]:
mnist_raw = pd.read_csv("/data/mnist/mnist_train_small.csv", sep=",", header=None)
In [4]:
mnist_raw.tail()
Out[4]:
In [5]:
mnist_raw.head()
Out[5]:
In [6]:
mnist_raw.sum(1).divide(255)
Out[6]:
In [7]:
def parse_labels_and_features(dataset):
"""Given the data set, pulls out the label column as a series, and pulls out
the features as a pandas DataFame. This is a good place to scale or transform
the features if needed."""
# The labels are in column zero.
labels = dataset[0]
# We'll pick out the remaining columns to use as features.
#
# Scale the data to [0,1] by dividing out the max value, 255.
#
# Note: DataFrame.loc uses label-location indexing, which includes both ends
# of ranges.
features = dataset.loc[:,1:784] / 255
return labels, features
# Note that this data is given to us in random order, so we can just take head
# and tail to create training and validation splits.
training_labels, training_features = parse_labels_and_features(
mnist_raw.head(15000))
validation_labels, validation_features = parse_labels_and_features(
mnist_raw.tail(5000))
In [8]:
# Show a random example and its corresponding label.
rand_example = np.random.randint(1, 100)
plt.matshow(
np.matrix(training_features[rand_example:rand_example+1]).reshape(28, 28))
plt.title(
"A randomly chosen example. Label: %i" % training_labels[rand_example])
plt.show()
In [9]:
BATCH_SIZE = 100 #@param
LEARNING_RATE = 0.03 #@param
STEPS = 1000 #@param
periods = 10
steps_per_period = STEPS / periods
from sklearn.metrics import confusion_matrix
import seaborn as sns
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
training_features)
classifier = tf.contrib.learn.LinearClassifier(
feature_columns=feature_columns,
n_classes=10,
optimizer=tf.train.AdagradOptimizer(learning_rate=LEARNING_RATE),
gradient_clip_norm=5.0
)
training_errors = []
validation_errors = []
print "Training model..."
# Note that we track the model's error using the log loss function.
# This is not to be confused with the loss function that
# LinearClassifier uses internally for training.
print "LogLoss:"
for period in range (0, periods):
classifier.fit(
training_features,
training_labels,
steps=steps_per_period,
batch_size=BATCH_SIZE
)
predictions_training = list(classifier.predict_proba(
training_features, as_iterable=True))
predictions_validation = list(classifier.predict_proba(
validation_features, as_iterable=True))
log_loss_training = metrics.log_loss(
training_labels, predictions_training)
log_loss_validation = metrics.log_loss(
validation_labels, predictions_validation)
training_errors.append(log_loss_training)
validation_errors.append(log_loss_validation)
print " period %02d : %3.2f" % (period, log_loss_training)
final_predictions = list(classifier.predict(
validation_features, as_iterable=True))
accuracy_validation = metrics.accuracy_score(
validation_labels, final_predictions)
print "Final accuracy (on validation data): %0.2f" % accuracy_validation
In [10]:
plt.figure(figsize=(18, 6))
plt.subplot(1, 2, 1)
# Output a graph of loss metrics over periods.
plt.ylabel("LogLoss")
plt.xlabel("Periods")
plt.title("LogLoss vs. Periods")
plt.plot(training_errors, label="training")
plt.plot(validation_errors, label="validation")
plt.legend()
# Compute confusion matrix
plt.subplot(1, 2, 2)
cm = confusion_matrix(validation_labels, final_predictions)
# Normalize the confusion matrix by row (i.e by the number of samples
# in each class)
cm_normalized = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
# Plot confusion matrix
ax = sns.heatmap(cm_normalized, cmap="bone_r")
ax.set_aspect(1)
plt.title("Confusion matrix")
plt.ylabel("True label")
plt.xlabel("Predicted label")
plt.show()
In [11]:
BATCH_SIZE = 100
HIDDEN_UNITS = [100, 100]
LEARNING_RATE = 0.05
STEPS = 1000
periods = 5
steps_per_period = STEPS / periods
from sklearn.metrics import confusion_matrix
import seaborn as sns
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
training_features)
classifier = tf.contrib.learn.DNNClassifier(
feature_columns=feature_columns,
n_classes=10,
hidden_units=HIDDEN_UNITS,
optimizer=tf.train.AdagradOptimizer(learning_rate=LEARNING_RATE),
gradient_clip_norm=5.0
)
training_errors = []
validation_errors = []
print "Training model..."
# Note that we track the model's error using the function called
# logistic loss. This is not to be confused with the loss function.
# LinearClassifier defines its own loss function internally.
print "LogLoss:"
for period in range (0, periods):
classifier.fit(
training_features,
training_labels,
steps=steps_per_period,
batch_size=BATCH_SIZE
)
predictions_training = list(classifier.predict_proba(
training_features, as_iterable=True))
predictions_validation = list(classifier.predict_proba(
validation_features, as_iterable=True))
log_loss_training = metrics.log_loss(
training_labels, predictions_training)
log_loss_validation = metrics.log_loss(
validation_labels, predictions_validation)
training_errors.append(log_loss_training)
validation_errors.append(log_loss_validation)
print " period %02d : %3.2f" % (period, log_loss_training)
final_predictions = list(classifier.predict(
validation_features, as_iterable=True))
accuracy_validation = metrics.accuracy_score(
validation_labels, final_predictions)
print "Final accuracy (on validation data): %0.2f" % accuracy_validation
In [12]:
plt.figure(figsize=(18, 6))
plt.subplot(1, 2, 1)
# Output a graph of loss metrics over periods.
plt.ylabel("LogLoss")
plt.xlabel("Periods")
plt.title("LogLoss vs. Periods")
plt.plot(training_errors, label="training")
plt.plot(validation_errors, label="validation")
plt.legend()
# Compute confusion matrix
plt.subplot(1, 2, 2)
cm = confusion_matrix(validation_labels, final_predictions)
# Normalize the confusion matrix by row (i.e by the number of samples
# in each class)
cm_normalized = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
# Plot confusion matrix
ax = sns.heatmap(cm_normalized, cmap="bone_r")
ax.set_aspect(1)
plt.title("Confusion matrix")
plt.ylabel("True label")
plt.xlabel("Predicted label")
plt.show()
In [13]:
# Note that this cell requires that a TensorFlowDNNClassifier called
# "classifier" has already been trained.
weights0 = classifier.weights_[0]
print "weights0 shape:",
print weights0.shape
num_nodes = weights0.shape[1]
num_rows = int(math.ceil(num_nodes / 10.0))
fig, axes = plt.subplots(num_rows, 10, figsize=(20, 2 * num_rows))
for coef, ax in zip(weights0.T, axes.ravel()):
ax.matshow(coef.reshape(28, 28), cmap=plt.cm.pink)
ax.set_xticks(())
ax.set_yticks(())
plt.show()
In [ ]: