In [1]:
from sklearn import datasets
In [2]:
digits = datasets.load_digits()
data = digits.data
labels = digits.target
data.shape, labels.shape
Out[2]:
In [3]:
from sklearn.preprocessing import OneHotEncoder
In [4]:
one_hot = OneHotEncoder(sparse=False, dtype=int)
labels = labels.reshape(-1, 1) # convert (n,) -> (n,1)
labels = one_hot.fit_transform(labels)
In [5]:
from sklearn.model_selection import train_test_split
In [6]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)
In [7]:
import tensorflow as tf
create variables and input nodes
In [8]:
x_dim = X_train.shape[1]
y_dim = y_train.shape[1]
W = tf.Variable(tf.random_normal([x_dim, y_dim], stddev=0.01), name='weights')
b = tf.Variable(tf.zeros([y_dim]), name="biases")
X = tf.placeholder(tf.float32, [None, x_dim], name='X')
y = tf.placeholder(tf.float32, [None, y_dim], name='y')
wire the model
In [9]:
# scope operations to have nicely composed graph
with tf.name_scope('logits'):
logits = tf.matmul(X, W) + b
with tf.name_scope('softmax'):
softmax = tf.nn.softmax_cross_entropy_with_logits(logits, y, name='softmax')
with tf.name_scope('loss'):
loss = tf.reduce_mean(softmax, name='loss')
create optimizer
In [10]:
lr = 0.05 # learning rate
with tf.name_scope('logits'):
optimizer = tf.train.AdamOptimizer(lr)
# Create a variable to track the global step.
global_step = tf.Variable(0, name='global_step', trainable=False)
# Use the optimizer to apply the gradients that minimize the loss
# (and also increment the global step counter) as a single training step.
train_op = optimizer.minimize(loss, global_step=global_step)
create evaluation and prediction ops
In [11]:
with tf.name_scope('evaluation'):
correct = tf.equal(tf.argmax(y,1), tf.argmax(logits,1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
prediction_op = tf.argmax(logits, 1)
add debug outputs
In [12]:
with tf.name_scope('summary'):
tf.scalar_summary(loss.op.name, loss)
tf.scalar_summary(accuracy.op.name, accuracy)
summary_op = tf.merge_all_summaries()
In [13]:
with tf.name_scope('init'):
init_op = tf.initialize_all_variables()
In [14]:
from tqdm import tqdm
In [15]:
!rm -rf ./logs
In [16]:
train_writer = tf.train.SummaryWriter('./logs/train')
test_writer = tf.train.SummaryWriter('./logs/test')
In [17]:
batch_size = 100
n_epochs = 2000
i = 0
with tf.Session() as sess:
train_writer.add_graph(sess.graph)
sess.run(init_op) # init all variables
for e in tqdm(range(n_epochs)):
for start in range(0, len(X_train), batch_size):
end = start + batch_size
batch = {X: X_train[start:end], y: y_train[start:end]}
_, summary_val = sess.run([train_op, summary_op], feed_dict=batch)
train_writer.add_summary(summary_val, i)
i += 1
summary_val = sess.run(summary_op, feed_dict={X: X_test, y: y_test})
test_writer.add_summary(summary_val, i)
i += 1
acc = sess.run(accuracy, feed_dict={X: X_test, y: y_test})
print('accuracy:', acc)
train_writer.flush()
test_writer.flush()
Run tensorboard --logdir=./logs/
to see debug output
In [18]:
from sklearn.linear_model import LogisticRegression
In [19]:
# to have fair comparison we 'disable' regularization by setting C=1e5
lr = LogisticRegression(C=1e5, fit_intercept=True, random_state=42,
solver='lbfgs', max_iter=20, multi_class='multinomial')
In [20]:
y_train_categorical = y_train.dot(one_hot.active_features_).astype(int)
y_test_categorical = y_test.dot(one_hot.active_features_).astype(int)
In [21]:
lr.fit(X_train, y_train_categorical)
Out[21]:
In [22]:
lr_predictions = lr.predict(X_test)
acc = (lr_predictions == y_test_categorical).mean()
print('accuracy:', acc)
In [ ]: