Learning Internal Representation by Error Propagation

A example implementation of the following classic paper that changed the history of deep learning
Rumelhart, D. E., Hinton, G. E., & Williams, R. J. (1985). Learning internal representations by error propagation (No. ICS-8506). CALIFORNIA UNIV SAN DIEGO LA JOLLA INST FOR COGNITIVE SCIENCE.

Varona-Moya, S., & Cobos, P. L. (2012, September). Analogical inferences in the family trees task: a review. In International Conference on Artificial Neural Networks (pp. 221-228). Springer Berlin Heidelberg.
Paccanaro, A., & Hinton, G. E. (2001). Learning distributed representations of concepts using linear relational embedding. IEEE Transactions on Knowledge and Data Engineering, 13(2), 232-244.

Network structure

Data Creation



In [3]:

    
person_1_input = [[1 if target == person else 0 for target in range(24) ] for person in range(24)]
person_2_output = person_1_input[:] # Data copy - Person 1 is the same data as person 2.

relationship_input = [[1 if target == relationship else 0 for target in range(12) ] for relationship in range(12)]

Relationship Representation



In [4]:

    
# (colin has-father james)
# (colin has-mother victoria)
# (james has-wife victoria) 
# (charlotte has-brother colin)
# (victoria has-brother arthur)
# (charlotte has-uncle arthur) 
# 아래의 리스트는 가족관계도에 있는 관계를 위의 예시와 같은 방법으로 나타낸 것입니다.
# [input_person, relationship, output_person]

triple_relationship = [[0, 3, 1], [0, 4, 3], [0, 5, 4],
                       [1, 2, 0], [1, 4, 3], [1, 5, 4],
                       [2, 2, 3],
                       [3, 3, 2], [3, 0, 0], [3, 1, 1], [3, 9, 4], [3, 10, 10], [3, 11, 11],
                       [4, 2, 5], [4, 0, 0], [4, 1, 1], [4, 5, 3], [4, 4, 10], [4, 5, 11],
                       [5, 3, 4], [5, 0, 6], [5, 1, 7], [5, 9, 9], [5, 4, 10], [5, 5, 11],
                       [6, 3, 7], [6, 4, 5], [6, 5, 8],
                       [7, 2, 6], [7, 4, 5], [7, 5, 8],
                       [8, 2, 9], [8, 0, 6], [8, 1, 7], [8, 8, 5], [8, 10, 10], [8, 11, 11],
                       [9, 3, 8],
                       [10, 0, 5], [10, 1, 4], [10, 9, 11], [10, 6, 3], [10, 7, 8],
                       [11, 0, 5], [11, 1, 4], [11, 8, 10], [11, 6, 3], [11, 7, 8],
                       [12, 3, 13], [12, 4, 15], [12, 5, 16],
                       [13, 2, 12], [13, 4, 15], [13, 5, 16],
                       [14, 2, 15],
                       [15, 3, 14], [15, 0, 12], [15, 1, 13], [15, 9, 16], [15, 10, 22], [15, 11, 23],
                       [16, 2, 17], [16, 0, 12], [16, 1, 15], [16, 5, 15], [16, 4, 22], [16, 5, 23],
                       [17, 3, 16], [17, 0, 18], [17, 1, 19], [17, 9, 21], [17, 4, 22], [17, 5, 23],
                       [18, 3, 19], [18, 4, 17], [18, 5, 20],
                       [19, 2, 18], [19, 4, 17], [19, 5, 20],
                       [20, 2, 21], [20, 0, 18], [20, 1, 19], [20, 8, 17], [20, 10, 22], [8, 11, 23],
                       [21, 3, 20],
                       [22, 0, 17], [22, 1, 16], [22, 9, 23], [22, 6, 15], [22, 7, 20],
                       [23, 0, 17], [23, 1, 16], [23, 8, 22], [23, 6, 15], [23, 7, 20]]

Code



In [1]:

    
import tensorflow as tf
import numpy as np



In [23]:

    
x_data_1 = np.array(person_1_input).astype(np.float32)
x_data_2 = np.array(relationship_input).astype(np.float32)
y_data = np.transpose(triple_relationship)[2].astype(np.float32)


# Weights and bias
W11 = tf.Variable(tf.zeros([24, 6]))
W12 = tf.Variable(tf.zeros([12, 6]))
W21 = tf.Variable(tf.zeros([24, 12]))
W22 = tf.Variable(tf.zeros([12, 12]))
W3 = tf.Variable(tf.zeros([6, 6]))

b11 = tf.Variable(tf.zeros([6]))
b12 = tf.Variable(tf.zeros([6]))
b2 = tf.Variable(tf.zeros([12]))
b3 = tf.Variable(tf.zeros([6]))

# Hypothesis
L11 = tf.sigmoid(tf.matmul(x_data_1, W11) + b11) # 24 by 6 mat
L12 = tf.sigmoid(tf.matmul(x_data_2, W12) + b12) # 12 by 6 mat
L2 = tf.sigmoid(tf.matmul(tf.transpose(L11), W21) + tf.matmul(tf.transpose(L12), W22) + b2) # 6 by 12
hypothesis = tf.nn.softmax(tf.matmul(tf.transpose(L2), W3) + b3)
print(hypothesis)
# hypothesis = tf.nn.tanh(tf.matmul(L2, W3) + b3)
# hypothesis = tf.nn.relu(tf.matmul(L2, W3) + b3)


# Cost function
cost = -tf.reduce_mean( y_data*tf.log(hypothesis)+(1-y_data)* tf.log(1.-hypothesis) )
# cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(Y_pred, Y))

# # Minimize cost.
# # a = tf.Variable(0.1)
# # optimizer = tf.train.GradientDescentOptimizer(a)
# # train = optimizer.minimize(cost)

# # # Initializa all variables.
# # init = tf.initialize_all_variables()


# # # Loop
# # with tf.Session() as sess:
# #     sess.run(init)
    
# #     for step in range(8001):
# #         sess.run(train)
        
# #         if step % 1000 == 0:
# #             print(
# #                 step, 
# #                 sess.run(cost, feed_dict={X:x_data, Y:y_data}), 
# #                 sess.run(W1),
# #                 sess.run(W2)
# #             )
    
# #     # Test model
# #     correct_prediction = tf.equal( tf.floor(hypothesis+0.5), Y)
# #     accuracy = tf.reduce_mean(tf.cast( correct_prediction, "float" ) )
    
# #     # Check accuracy
# #     print( sess.run( [hypothesis, tf.floor(hypothesis+0.5), correct_prediction, accuracy], 
# #                    feed_dict={X:x_data, Y:y_data}) )
# #     print( "Accuracy:", accuracy.eval({X:x_data, Y:y_data}) )









    



Tensor("Softmax_13:0", shape=(12, 6), dtype=float32)

Learning Internal Representation by Error Propagation

Related Paper

Network structure

Data Creation

Relationship Representation

Code