In [1]:
import tensorflow as tf
import scipy.io as sio
import numpy as np

In [2]:
data = sio.loadmat('ex3data1.mat')
x = data.get('X')
y_temp = data.get('y')
print(x)
print(y_temp)


[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
[[10]
 [10]
 [10]
 ..., 
 [ 9]
 [ 9]
 [ 9]]

In [3]:
y = np.asarray([i[0] for i in y_temp])

print(x.shape)
print(y.shape)
print(y)


(5000, 400)
(5000,)
[10 10 10 ...,  9  9  9]

In [4]:
def vec2mat(vec, nLabel):
    ret = np.zeros([len(vec), nLabel])
    for i in xrange(len(vec)):
        ret[i][vec[i]-1] = 1
    return ret

y_vec = vec2mat(y, 10)
y_vec = np.float32(y_vec)
x = np.float32(x)

In [14]:
b1 = tf.Variable(tf.zeros([400, 1]))
w1 = tf.Variable(tf.random_uniform([400,400], -1.0, 1.0))
b2 = tf.Variable(tf.zeros([10, 1]))
w2 = tf.Variable(tf.random_uniform([10,400], -1.0, 1.0))

In [15]:
print x.shape
result1 = tf.clip_by_value(tf.sigmoid(tf.matmul(w1, x.T) + b1), 1e-10, 1.0) 
result2 = tf.matmul(w2, result1) + b2
predict = tf.nn.softmax(result2)


(5000, 400)

In [16]:
loss = -tf.reduce_mean(y_vec.T * tf.log(predict))

In [17]:
train = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

In [ ]:
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

for step in xrange(15001):
    sess.run(train)
    if step % 500 == 0:
        print(step,  sess.run(loss))


(0, 1.4708897)
(500, 0.85442501)
(1000, 0.7887789)

In [10]:
result1= sess.run(predict)
print len(result1[0])
print np.argmax(result1[0])
_result = np.argmax(result1, 0) + 1
print _result


5000
852
[10 10 10 ...,  9  9  7]

In [11]:
correct = [y[i] == _result[i] for i in xrange(len(y))]
print(np.mean(correct) * 100)


91.74

In [ ]: