----- IMPORTANT ------
The code presented here assumes that you're running TensorFlow v1.3.0 or higher, this was not released yet so the easiet way to run this is update your TensorFlow version to TensorFlow's master.
To do that go here and then execute:
pip install --ignore-installed --upgrade <URL for the right binary for your machine>
.
For example, considering a Linux CPU-only running python2:
pip install --upgrade https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-1.2.1-cp27-none-linux_x86_64.whl
1) Simple Linear Regression with low-level TensorFlow
2) Simple Linear Regression with a canned estimator
3) Playing with real data: linear regressor and DNN
4) Building a custom estimator to classify handwritten digits (MNIST)
In [ ]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
# tensorflow
import tensorflow as tf
print('Expected TensorFlow version is v1.3.0 or higher')
print('Your TensorFlow version:', tf.__version__)
# data manipulation
import numpy as np
import pandas as pd
# visualization
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = [12,8]
In [ ]:
def make_noisy_data(m=0.1, b=0.3, n=100):
x = np.random.randn(n)
noise = np.random.normal(scale=0.01, size=len(x))
y = m * x + b + noise
return x, y
Create training data
In [ ]:
x_train, y_train = make_noisy_data()
Plot the training data
In [ ]:
plt.plot(x_train, y_train, 'b.')
In [ ]:
# input and output
x = tf.placeholder(shape=[None], dtype=tf.float32, name='x')
y_label = tf.placeholder(shape=[None], dtype=tf.float32, name='y_label')
# variables
W = tf.Variable(tf.random_normal([1], name="W")) # weight
b = tf.Variable(tf.random_normal([1], name="b")) # bias
# actual model
y = W * x + b
In [ ]:
loss = tf.reduce_mean(tf.square(y - y_label))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
train = optimizer.minimize(loss)
In [ ]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init) # initialize variables
for i in range(100): # train for 100 steps
sess.run(train, feed_dict={x: x_train, y_label:y_train})
x_plot = np.linspace(-3, 3, 101) # return evenly spaced numbers over a specified interval
# using the trained model to predict values for the training data
y_plot = sess.run(y, feed_dict={x: x_plot})
# saving final weight and bias
final_W = sess.run(W)
final_b = sess.run(b)
In [ ]:
plt.scatter(x_train, y_train)
plt.plot(x_plot, y_plot, 'g')
In [ ]:
print('W:', final_W, 'expected: 0.1')
print('b:', final_b, 'expected: 0.3')
In [ ]:
x_dict = {'x': x_train}
train_input = tf.estimator.inputs.numpy_input_fn(x_dict, y_train,
shuffle=True,
num_epochs=None) # repeat forever
In [ ]:
features = [tf.feature_column.numeric_column('x')] # because x is a real number
In [ ]:
estimator = tf.estimator.LinearRegressor(features)
estimator.train(train_input, steps = 1000)
In [ ]:
x_test_dict = {'x': np.linspace(-5, 5, 11)}
data_source = tf.estimator.inputs.numpy_input_fn(x_test_dict, shuffle=False)
predictions = list(estimator.predict(data_source))
preds = [p['predictions'][0] for p in predictions]
for y in predictions:
print(y['predictions'])
In [ ]:
plt.scatter(x_train, y_train)
plt.plot(x_test_dict['x'], preds, 'g')
The Adult dataset is from the Census bureau and the task is to predict whether a given adult makes more than $50,000 a year based attributes such as education, hours of work per week, etc.
But the code here presented can be easilly aplicable to any csv dataset that fits in memory.
More about the data here
In [ ]:
census_train_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
census_train_path = tf.contrib.keras.utils.get_file('census.train', census_train_url)
census_test_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test'
census_test_path = tf.contrib.keras.utils.get_file('census.test', census_test_url)
In [ ]:
column_names = [
'age', 'workclass', 'fnlwgt', 'education', 'education-num',
'marital-status', 'occupation', 'relationship', 'race', 'sex',
'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
'income'
]
census_train = pd.read_csv(census_train_path, index_col=False, names=column_names)
census_test = pd.read_csv(census_train_path, index_col=False, names=column_names)
census_train_label = census_train.pop('income') == " >50K"
census_test_label = census_test.pop('income') == " >50K"
In [ ]:
census_train.head(10)
In [ ]:
census_train_label[:20]
In [ ]:
train_input = tf.estimator.inputs.pandas_input_fn(
census_train,
census_train_label,
shuffle=True,
batch_size = 32, # process 32 examples at a time
num_epochs=None,
)
In [ ]:
test_input = tf.estimator.inputs.pandas_input_fn(
census_test,
census_test_label,
shuffle=True,
num_epochs=1)
In [ ]:
features, labels = train_input()
features
In [ ]:
features = [
tf.feature_column.numeric_column('hours-per-week'),
tf.feature_column.bucketized_column(tf.feature_column.numeric_column('education-num'), list(range(25))),
tf.feature_column.categorical_column_with_vocabulary_list('sex', ['male','female']),
tf.feature_column.categorical_column_with_hash_bucket('native-country', 1000),
]
In [ ]:
estimator = tf.estimator.LinearClassifier(features, model_dir='census/linear',n_classes=2)
In [ ]:
estimator.train(train_input, steps=5000)
In [ ]:
estimator.evaluate(test_input)
In [ ]:
features = [
tf.feature_column.numeric_column('education-num'),
tf.feature_column.numeric_column('hours-per-week'),
tf.feature_column.numeric_column('age'),
tf.feature_column.indicator_column(
tf.feature_column.categorical_column_with_vocabulary_list('sex',['male','female'])),
tf.feature_column.embedding_column( # now using embedding!
tf.feature_column.categorical_column_with_hash_bucket('native-country', 1000), 10)
]
In [ ]:
estimator = tf.estimator.DNNClassifier(hidden_units=[20,20],
feature_columns=features,
n_classes=2,
model_dir='census/dnn')
In [ ]:
estimator.train(train_input, steps=5000)
In [ ]:
estimator.evaluate(test_input)
In [ ]:
def census_input_fn(path):
def input_fn():
dataset = (
tf.contrib.data.TextLineDataset(path)
.map(csv_decoder)
.shuffle(buffer_size=100)
.batch(32)
.repeat())
columns = dataset.make_one_shot_iterator().get_next()
income = tf.equal(columns.pop('income')," >50K")
return columns, income
return input_fn
In [ ]:
csv_defaults = collections.OrderedDict([
('age',[0]),
('workclass',['']),
('fnlwgt',[0]),
('education',['']),
('education-num',[0]),
('marital-status',['']),
('occupation',['']),
('relationship',['']),
('race',['']),
('sex',['']),
('capital-gain',[0]),
('capital-loss',[0]),
('hours-per-week',[0]),
('native-country',['']),
('income',['']),
])
In [ ]:
def csv_decoder(line):
parsed = tf.decode_csv(line, csv_defaults.values())
return dict(zip(csv_defaults.keys(), parsed))
In [ ]:
tf.reset_default_graph()
census_input = census_input_fn(census_train_path)
training_batch = census_input()
In [ ]:
with tf.Session() as sess:
features, high_income = sess.run(training_batch)
In [ ]:
print(features['education'])
In [ ]:
print(features['age'])
In [ ]:
print(high_income)
In [ ]:
train,test = tf.contrib.keras.datasets.mnist.load_data()
x_train,y_train = train
x_test,y_test = test
mnist_train_input = tf.estimator.inputs.numpy_input_fn({'x':np.array(x_train, dtype=np.float32)},
np.array(y_train,dtype=np.int32),
shuffle=True,
num_epochs=None)
mnist_test_input = tf.estimator.inputs.numpy_input_fn({'x':np.array(x_test, dtype=np.float32)},
np.array(y_test,dtype=np.int32),
shuffle=True,
num_epochs=1)
In [ ]:
estimator = tf.estimator.LinearClassifier([tf.feature_column.numeric_column('x',shape=784)],
n_classes=10,
model_dir="mnist/linear")
estimator.train(mnist_train_input, steps = 10000)
In [ ]:
estimator.evaluate(mnist_test_input)
$> tensorboard --logdir mnnist/DNN
In [ ]:
estimator = tf.estimator.DNNClassifier(hidden_units=[256],
feature_columns=[tf.feature_column.numeric_column('x',shape=784)],
n_classes=10,
model_dir="mnist/DNN")
estimator.train(mnist_train_input, steps = 10000)
In [ ]:
estimator.evaluate(mnist_test_input)
In [ ]:
# Parameters
BATCH_SIZE = 128
STEPS = 10000
In [ ]:
def build_cnn(input_layer, mode):
with tf.name_scope("conv1"):
conv1 = tf.layers.conv2d(inputs=input_layer,filters=32, kernel_size=[5, 5],
padding='same', activation=tf.nn.relu)
with tf.name_scope("pool1"):
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
with tf.name_scope("conv2"):
conv2 = tf.layers.conv2d(inputs=pool1,filters=64, kernel_size=[5, 5],
padding='same', activation=tf.nn.relu)
with tf.name_scope("pool2"):
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
with tf.name_scope("dense"):
pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
with tf.name_scope("dropout"):
is_training_mode = mode == tf.estimator.ModeKeys.TRAIN
dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=is_training_mode)
logits = tf.layers.dense(inputs=dropout, units=10)
return logits
In [ ]:
def model_fn(features, labels, mode):
# Describing the model
input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
tf.summary.image('mnist_input',input_layer)
logits = build_cnn(input_layer, mode)
# Generate Predictions
classes = tf.argmax(input=logits, axis=1)
predictions = {
'classes': classes,
'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
}
if mode == tf.estimator.ModeKeys.PREDICT:
# Return an EstimatorSpec object
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
with tf.name_scope('loss'):
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)
loss = tf.reduce_sum(loss)
tf.summary.scalar('loss', loss)
with tf.name_scope('accuracy'):
accuracy = tf.cast(tf.equal(tf.cast(classes,tf.int32),labels),tf.float32)
accuracy = tf.reduce_mean(accuracy)
tf.summary.scalar('accuracy', accuracy)
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.train.get_global_step(),
learning_rate=1e-4,
optimizer='Adam')
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions,
loss=loss, train_op=train_op)
# Configure the accuracy metric for evaluation
eval_metric_ops = {
'accuracy': tf.metrics.accuracy(
classes,
input=labels)
}
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions,
loss=loss, eval_metric_ops=eval_metric_ops)
In [ ]:
# create estimator
run_config = tf.contrib.learn.RunConfig(model_dir='mnist/CNN')
estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)
# train for 10000 steps
estimator.train(input_fn=mnist_train_input, steps=10000)
# evaluate
estimator.evaluate(input_fn=mnist_test_input)
# predict
preds = estimator.predict(input_fn=test_input_fn)
In [ ]:
# Run an experiment
from tensorflow.contrib.learn.python.learn import learn_runner
# Enable TensorFlow logs
tf.logging.set_verbosity(tf.logging.INFO)
In [ ]:
# create experiment
def experiment_fn(run_config, hparams):
# create estimator
estimator = tf.estimator.Estimator(model_fn=model_fn,
config=run_config)
return tf.contrib.learn.Experiment(
estimator,
train_input_fn=train_input_fn,
eval_input_fn=test_input_fn,
train_steps=STEPS
)
# run experiment
learn_runner.run(experiment_fn,
run_config=run_config)
$> tensorboard --logdir mnist/CNN