In [1]:
%pylab inline
In [2]:
import os
import urllib
dataset = 'mnist.pkl.gz'
def reporthook(a,b,c):
print "\rdownloading: %5.1f%%"%(a*b*100.0/c),
if not os.path.isfile(dataset):
origin = "https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz"
print('Downloading data from %s' % origin)
urllib.urlretrieve(origin, dataset, reporthook=reporthook)
In [3]:
import gzip
import pickle
with gzip.open(dataset, 'rb') as f:
train_set, valid_set, test_set = pickle.load(f)
In [4]:
print "train_set", train_set[0].shape, train_set[1].shape
print "valid_set", valid_set[0].shape, valid_set[1].shape
print "test_set", test_set[0].shape, test_set[1].shape
In [5]:
imshow(train_set[0][0].reshape((28, 28)), cmap="gray")
Out[5]:
In [6]:
def show(x, i=[0]):
plt.figure(i[0])
imshow(x.reshape((28,28)), cmap="gray")
i[0]+=1
for i in range(5):
print train_set[1][i]
show(train_set[0][i])
In [7]:
W = np.random.uniform(low=-1, high=1, size=(28*28,10))
b = np.random.uniform(low=-1, high=1, size=10)
In [8]:
x = train_set[0][0]
y = train_set[1][0]
In [9]:
Pr = exp(dot(x, W)+b)
Pr.shape
Out[9]:
In [10]:
Pr = Pr/Pr.sum()
print Pr
In [11]:
loss = -log(Pr[y])
loss
Out[11]:
In [12]:
gradb = Pr.copy()
gradb[y] -= 1
print gradb
In [13]:
gradb = Pr.copy()
gradb[y] -= 1
print gradb
In [14]:
print Pr.shape, x.shape, W.shape
gradW = dot(x.reshape(784,1), Pr.reshape(1,10), )
gradW[:, y] -= x
In [15]:
W -= 0.1 * gradW
b -= 0.1 * gradb
In [16]:
Pr = exp(dot(x, W)+b)
Pr = Pr/Pr.sum()
loss = -log(Pr[y])
loss
Out[16]:
In [17]:
W = np.random.uniform(low=-1, high=1, size=(28*28,10))
b = np.random.uniform(low=-1, high=1, size=10)
score = 0
N=50000*20
d = 0.001
learning_rate = 1e-2
for i in xrange(N):
if i%50000==0:
print i, "%5.3f%%"%(score*100)
x = train_set[0][i%50000]
y = train_set[1][i%50000]
Pr = exp(dot(x, W)+b)
Pr = Pr/Pr.sum()
loss = -log(Pr[y])
score *=(1-d)
if Pr.argmax() == y:
score += d
gradb = Pr.copy()
gradb[y] -= 1
gradW = dot(x.reshape(784,1), Pr.reshape(1,10), )
gradW[:, y] -= x
W -= learning_rate * gradW
b -= learning_rate * gradb
In [18]:
def compute_Pr(x):
Pr = exp(dot(x, W)+b)
return Pr/Pr.sum(axis=1, keepdims=True)
def compute_accuracy(Pr, y):
return mean(Pr.argmax(axis=1)==y)
In [19]:
W = np.random.uniform(low=-1, high=1, size=(28*28,10))
b = np.random.uniform(low=-1, high=1, size=10)
score = 0
N=50000*100
batch_size = 500
learning_rate = .7
for i in xrange(0, N, batch_size):
if i%100000==0:
x, y = test_set[0], test_set[1]
test_score = compute_accuracy(compute_Pr(x), y)*100
x, y = valid_set[0], valid_set[1]
valid_score = compute_accuracy(compute_Pr(x), y)*100
print i, "%5.2f%%"%test_score, "%5.2f%%"%valid_score
# 隨機選出一些訓練資料出來
rndidx = np.random.choice(train_set[0].shape[0], batch_size, replace=False)
x, y = train_set[0][rndidx], train_set[1][rndidx]
# 一次計算所有的 Pr
Pr = compute_Pr(x)
# 計算平均 gradient
gradb = Pr.mean(axis=0)-[(y==i).mean() for i in range(10)]
gradW = dot(x.T, Pr)
for i in range(batch_size):
gradW[:, y[i]]-=x[i]
gradW /= batch_size
# 更新 W 和 b
W -= learning_rate * gradW
b -= learning_rate * gradb
In [20]:
x, y = test_set[0], test_set[1]
Pr = compute_Pr(x)
test_score = compute_accuracy(Pr, y)*100
x, y = valid_set[0], valid_set[1]
Pr = compute_Pr(x)
valid_score = compute_accuracy(Pr, y)*100
print "test accuracy %5.2f%%"%test_score, "valid accuracy %5.2f%%"%valid_score
x, y = train_set[0], train_set[1]
Pr = compute_Pr(x)
train_score = compute_accuracy(Pr, y)*100
print "train accuracy %5.2f%%"%train_score
In [21]:
x = test_set[0][:10]
y = test_set[1][:10]
Pr = compute_Pr(x)
print Pr.argmax(axis=1)
print y
for i in range(10):
show(x[i])
In [22]:
x = test_set[0][:100]
y = test_set[1][:100]
Pr = compute_Pr(x)
y2 = Pr.argmax(axis=1)
for i in range(100):
if y2[i] != y[i]:
print y2[i], y[i]
show(x[i])
In [ ]: