In [1]:
import tensorflow as tf
import numpy as np
def rand01(digit):
# Add some random noise to bits, but keep always between 0 and 1
s = abs(np.random.normal(0.0, 0.05))
if digit == 0:
noise = digit + s
else:
noise = digit - s
return noise
### Training Examples
### All combinations of XOR
X = [[0, 0],[0, 1],[1, 0],[1, 1]]
Y = [[1], [0], [0], [1]]
# Add some random noise to our inputs. Useful if we use the tanh activiation function
add_noise = np.vectorize(rand01)
X = add_noise(X)
Y = add_noise(Y)
# Neural Network Parameters
N_STEPS = 200000
N_EPOCH = 5000
N_TRAINING = len(X)
N_INPUT_NODES = 2
N_HIDDEN_NODES = 5
N_OUTPUT_NODES = 1
ACTIVATION = 'tanh' # sigmoid or tanh
COST = 'ACE' # MSE or ACE
LEARNING_RATE = 0.05
if __name__ == '__main__':
### Create placeholders for variables and define Neural Network structure ###
### Feed forward 3 layer, Neural Network. ###
x_ = tf.placeholder(tf.float32, shape=[N_TRAINING, N_INPUT_NODES], name="x-input")
y_ = tf.placeholder(tf.float32, shape=[N_TRAINING, N_OUTPUT_NODES], name="y-input")
theta1 = tf.Variable(tf.random_uniform([N_INPUT_NODES,N_HIDDEN_NODES], -1, 1), name="theta1")
theta2 = tf.Variable(tf.random_uniform([N_HIDDEN_NODES,N_OUTPUT_NODES], -1, 1), name="theta2")
bias1 = tf.Variable(tf.zeros([N_HIDDEN_NODES]), name="bias1")
bias2 = tf.Variable(tf.zeros([N_OUTPUT_NODES]), name="bias2")
if ACTIVATION == 'sigmoid':
### Use a sigmoidal activation function ###
layer1 = tf.sigmoid(tf.matmul(x_, theta1) + bias1)
output = tf.sigmoid(tf.matmul(layer1, theta2) + bias2)
else:
### Use tanh activation function ###
layer1 = tf.tanh(tf.matmul(x_, theta1) + bias1)
output = tf.tanh(tf.matmul(layer1, theta2) + bias2)
output = tf.add(output, 1)
output = tf.multiply(output, 0.5)
if COST == "MSE":
# Mean Squared Estimate - the simplist cost function (MSE)
cost = tf.reduce_mean(tf.square(Y - output))
train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cost)
else:
# Average Cross Entropy - better behaviour and learning rate
cost = - tf.reduce_mean( (y_ * tf.log(output)) + (1 - y_) * tf.log(1.0 - output) )
train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cost)
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for i in range(N_STEPS):
sess.run(train_step, feed_dict={x_: X, y_: Y})
if i % N_EPOCH == 0:
print('Batch ', i)
print('Inference ', sess.run(output, feed_dict={x_: X, y_: Y}))
print('Cost ', sess.run(cost, feed_dict={x_: X, y_: Y}))
#print('op: ', sess.run(output))
In [3]:
#!/usr/bin/env PYTHONIOENCODING="utf-8" python
"""
A simple neural network learning the XNOR function
"""
import tensorflow as tf
sess = tf.InteractiveSession()
# Desired input output mapping of XNOR function:
x_ = [[0, 0], [0, 1], [1, 0], [1, 1]] # input
#labels=[0, 1, 1, 0] # output =>
expect=[[0,1], [1,0], [1,0], [0,1]] # ONE HOT REPRESENTATION! 'class' [1,0]==0 [0,1]==1
# x = tf.Variable(x_)
x = tf.placeholder("float", [None,2]) # can we feed directly?
y_ = tf.placeholder("float", [None, 2]) # two output classes
number_hidden_nodes = 20 # 20 outputs to create some room for negatives and positives
W = tf.Variable(tf.random_uniform([2, number_hidden_nodes], -.01, .01))
b = tf.Variable(tf.random_uniform([number_hidden_nodes], -.01, .01))
hidden = tf.nn.relu(tf.matmul(x,W) + b) # first layer.
# the XNOR function is the first nontrivial function, for which a two layer network is needed.
W2 = tf.Variable(tf.random_uniform([number_hidden_nodes,2], -.1, .1))
b2 = tf.Variable(tf.zeros([2]))
hidden2 = tf.matmul(hidden, W2)#+b2
y = tf.nn.softmax(hidden2)
# Define loss and optimizer
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.GradientDescentOptimizer(0.2).minimize(cross_entropy)
# Train
tf.initialize_all_variables().run()
for step in range(1000):
feed_dict={x: x_, y_:expect } # feed the net with our inputs and desired outputs.
e,a=sess.run([cross_entropy,train_step],feed_dict)
if e<1:break # early stopping yay
print ("step %d : entropy %s" % (step,e)) # error/loss should decrease over time
# Test trained model
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) # argmax along dim-1
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) # [True, False, True, True] -> [1,0,1,1] -> 0.75.
print ("accuracy %s"%(accuracy.eval({x: x_, y_: expect})))
learned_output=tf.argmax(y,1)
print (learned_output.eval({x: x_}))
In [ ]: