In [1]:
import numpy as np
np.random.seed(100) # fix the state
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import SGD


Using Theano backend.

In [2]:
# XOR Problem
X = np.array([[0, 0],
              [1, 0],
              [0, 1],
              [1, 1]], np.float32)

In [3]:
# row, dimension
X.shape


Out[3]:
(4, 2)

In [4]:
y = np.array([0, 1, 1, 0], np.float32)

In [13]:
"""
- Sigmoid is performing really bad
- tanh/relu is better
- Increasing the number of hidden layers should also help e.g. if the number is 20 we always get convergence
"""
model = Sequential()
model.add(Dense(2, input_dim=2 , bias=True, init="uniform"))
model.add(Activation("relu"))
model.add(Dense(1))
model.add(Activation("sigmoid"))

In [14]:
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss="mean_squared_error")

In [15]:
"""
input -> hidden
6 parameters
X_0 -> h_01 / h_02
X_1 -> h_11 / h_12
X_2 -> h_21 / h_22

hidden -> output
3 parameters
h_0 -> y
h_1 -> y
h_2 -> y
"""
model.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
dense_5 (Dense)                  (None, 2)             6           dense_input_3[0][0]              
____________________________________________________________________________________________________
activation_5 (Activation)        (None, 2)             0           dense_5[0][0]                    
____________________________________________________________________________________________________
dense_6 (Dense)                  (None, 1)             3           activation_5[0][0]               
____________________________________________________________________________________________________
activation_6 (Activation)        (None, 1)             0           dense_6[0][0]                    
====================================================================================================
Total params: 9
____________________________________________________________________________________________________

In [16]:
"""
- The higher the number of iterations, the more likely the solution converge
- Optimal solution depends on random initialization
"""
model.fit(X, y, nb_epoch=10000, verbose=0, batch_size=4)
model.predict(X)


Out[16]:
array([[ 0.00434781],
       [ 0.99145859],
       [ 0.99145901],
       [ 0.00434764]], dtype=float32)

In [ ]:


In [ ]: