In [1]:
using Knet, Images
In [2]:
const GPU_ID = 0
const USE_GPU = (gpu() != -1) && (GPU_ID != -1) && (gpu(GPU_ID) != -1)
Out[2]:
In [3]:
const T = Float32
atype = KnetArray{T}
Out[3]:
In [4]:
include(Knet.dir("data","mnist.jl"))
include(Knet.dir("data","imagenet.jl"))
xtrn, ytrn, xtst, ytst = mnist()
xtype = USE_GPU ? KnetArray : typeof(xtrn)
@assert xtype == (USE_GPU ? KnetArray : typeof(xtst))
dtrn = minibatch(xtrn, ytrn, 200, shuffle=true, partial=true, xtype=xtype)
dtst = minibatch(xtst, ytst, 100, shuffle=false, partial=true, xtype=xtype)
Out[4]:
In [5]:
weights(Z_dim, H_dim; atype=Array{T}) = begin
X_dim = 28 * 28
D_dim = X_dim + Z_dim
θd = [xavier(H_dim, D_dim),
zeros(H_dim),
xavier(1, H_dim),
zeros(1)]
θd = map(x -> convert(atype, x), θd)
θg = [xavier(H_dim, Z_dim),
zeros(H_dim),
xavier(X_dim, H_dim),
zeros(X_dim)]
θg = map(x -> convert(atype, x), θg)
θr = [xavier(H_dim, X_dim),
zeros(H_dim),
xavier(Z_dim, H_dim),
zeros(Z_dim)]
θr = map(x -> convert(atype, x), θr)
θd, θg, θr
end
Out[5]:
In [6]:
G(θg, z) = begin
v = θg[1] * z .+ θg[2]
v = relu.(v)
v = θg[3] * v .+ θg[4]
v = sigm.(v)
v
end
Out[6]:
In [7]:
D(θd, x) = begin
v = θd[1] * x .+ θd[2]
v = relu.(v)
v = dropout(v, 0.5)
v = θd[3] * v .+ θd[4]
v
end
Out[7]:
In [8]:
R(θr, x) = begin
v = θr[1] * x .+ θr[2]
v = relu.(v)
v = θr[3] * v .+ θr[4]
v
end
Out[8]:
In [9]:
sample_z(Z_dim, X_num) = convert(atype, randn(T, Z_dim, X_num))
Out[9]:
In [10]:
BCE(x, x̂) = -mean(x .* log.(x̂ .+ eps(T)) .+ (1 .- x) .* log.(1 .- x̂ .+ eps(T)))
Out[10]:
In [11]:
MSE(x, x̂) = mean(sum((x - x̂) .* (x - x̂), 1))
Out[11]:
In [12]:
loss_d(θd, θg, θr, x, z) = begin
x_real = mat(x)
x_fake = G(θg, z)
z_fake = R(θr, x_real)
BCE(1, sigm.(D(θd, [x_real; z_fake]))) + BCE(0, sigm.(D(θd, [x_fake; z])))
end
grad_loss_d = grad(loss_d)
Out[12]:
In [13]:
loss_g(θg, θd, θr, z) = begin
x_fake = G(θg, z)
z_fake = R(θr, x_fake)
-mean(D(θd, [x_fake; z])) + MSE(z_fake, z)
end
grad_loss_g = grad(loss_g)
Out[13]:
In [14]:
loss_r(θr, θg, θd, x, z) = begin
x_real = mat(x)
x_fake = G(θg, z)
z_fake = R(θr, x_fake)
mean(D(θd, [x_real; z_fake])) + MSE(z_fake, z)
end
grad_loss_r = grad(loss_r)
Out[14]:
In [15]:
report(θg, θd, θr, epoch) = begin
z = sample_z(Z_dim, 200)
println((:epoch, epoch),
(:loss_g, loss_g(θg, θd, θr, z)))
end
Out[15]:
In [16]:
Z_dim = 32; H_dim = 256
θd, θg, θr = weights(Z_dim, H_dim, atype=atype)
Out[16]:
In [17]:
epoch_num = 50
opt_d = optimizers(θd, Adam, lr=0.001, beta1=0.7)
opt_g = optimizers(θg, Adam, lr=0.001, beta1=0.7)
opt_r = optimizers(θr, Adam, lr=0.001, beta1=0.7)
@time begin
for epoch = 1:epoch_num
loss_running = 0
for (x, y) = dtrn
X_num = length(y)
z = sample_z(Z_dim, X_num)
dθd = grad_loss_d(θd, θg, θr, x, z)
dθg = grad_loss_g(θg, θd, θr, z)
dθr = grad_loss_r(θr, θg, θd, x, z)
update!(θd, dθd, opt_d)
update!(θg, dθg, opt_g)
update!(θr, dθr, opt_r)
end
report(θg, θd, θr, epoch)
end
end
In [18]:
function plot_dream_and_thought(θg; gridsize=(5, 5), scale=1.0)
m, n = gridsize
nimg = m * n
z_dream = sample_z(Z_dim, nimg)
x_dream = G(θg, z_dream)
images = map(i -> reshape(Array(x_dream)[:, i], (28, 28, 1)), 1:nimg)
grid = make_image_grid(images; gridsize=gridsize, scale=scale)
display(colorview(Gray, grid))
z_thought = R(θr, x_dream)
x_thought = G(θg, z_thought)
images = map(i -> reshape(Array(x_thought)[:, i], (28, 28, 1)), 1:nimg)
grid = make_image_grid(images; gridsize=gridsize, scale=scale)
display(colorview(Gray, grid))
print("MSE between dream and thought is: $(MSE(x_dream, x_thought)).")
x_dream, x_thought
end
Out[18]:
In [19]:
x_dream, x_thought = plot_dream_and_thought(θg; gridsize=(20, 9))
Out[19]:
In [ ]: