It works on a Windows 10 PC which has an old GPU: GeForce GTX 550 Ti.

Simply run the following command (not using docker) in the root of this project:

jupyter notebook

Also run the following in a separate terminal:

tensorboard --logdir=./dl/logs --host=127.0.0.1 --port=8889

In [1]:
!pip install tensorflow-gpu


Requirement already satisfied: tensorflow-gpu in c:\coding\anaconda3\lib\site-packages (1.8.0)
Requirement already satisfied: termcolor>=1.1.0 in c:\coding\anaconda3\lib\site-packages (from tensorflow-gpu) (1.1.0)
Requirement already satisfied: protobuf>=3.4.0 in c:\coding\anaconda3\lib\site-packages (from tensorflow-gpu) (3.6.0)
Requirement already satisfied: six>=1.10.0 in c:\coding\anaconda3\lib\site-packages (from tensorflow-gpu) (1.11.0)
Requirement already satisfied: absl-py>=0.1.6 in c:\coding\anaconda3\lib\site-packages (from tensorflow-gpu) (0.2.2)
Requirement already satisfied: astor>=0.6.0 in c:\coding\anaconda3\lib\site-packages (from tensorflow-gpu) (0.6.2)
Requirement already satisfied: wheel>=0.26 in c:\coding\anaconda3\lib\site-packages (from tensorflow-gpu) (0.31.1)
Requirement already satisfied: tensorboard<1.9.0,>=1.8.0 in c:\coding\anaconda3\lib\site-packages (from tensorflow-gpu) (1.8.0)
Requirement already satisfied: grpcio>=1.8.6 in c:\coding\anaconda3\lib\site-packages (from tensorflow-gpu) (1.12.1)
Requirement already satisfied: numpy>=1.13.3 in c:\coding\anaconda3\lib\site-packages (from tensorflow-gpu) (1.14.3)
Requirement already satisfied: gast>=0.2.0 in c:\coding\anaconda3\lib\site-packages (from tensorflow-gpu) (0.2.0)
Requirement already satisfied: setuptools in c:\coding\anaconda3\lib\site-packages (from protobuf>=3.4.0->tensorflow-gpu) (39.1.0)
Requirement already satisfied: markdown>=2.6.8 in c:\coding\anaconda3\lib\site-packages (from tensorboard<1.9.0,>=1.8.0->tensorflow-gpu) (2.6.11)
Requirement already satisfied: werkzeug>=0.11.10 in c:\coding\anaconda3\lib\site-packages (from tensorboard<1.9.0,>=1.8.0->tensorflow-gpu) (0.14.1)
Requirement already satisfied: bleach==1.5.0 in c:\coding\anaconda3\lib\site-packages (from tensorboard<1.9.0,>=1.8.0->tensorflow-gpu) (1.5.0)
Requirement already satisfied: html5lib==0.9999999 in c:\coding\anaconda3\lib\site-packages (from tensorboard<1.9.0,>=1.8.0->tensorflow-gpu) (0.9999999)
distributed 1.21.8 requires msgpack, which is not installed.

In [2]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0,1


env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0,1

In [3]:
import os
print(os.environ["CUDA_DEVICE_ORDER"])
print(os.environ["CUDA_VISIBLE_DEVICES"])


PCI_BUS_ID
0,1

In [4]:
import tensorflow as tf


C:\coding\Anaconda3\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

In [5]:
from tensorflow.python.client import device_lib

device_lib.list_local_devices()


Out[5]:
[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 8034297251726998311]

In [6]:
tf.test.gpu_device_name()


Out[6]:
''

In [7]:
!nvcc --version


nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2017 NVIDIA Corporation
Built on Fri_Sep__1_21:08:32_Central_Daylight_Time_2017
Cuda compilation tools, release 9.0, V9.0.176

In [8]:
!pip install mxnet-cu90


Requirement already satisfied: mxnet-cu90 in c:\coding\anaconda3\lib\site-packages (1.2.0)
Requirement already satisfied: requests in c:\coding\anaconda3\lib\site-packages (from mxnet-cu90) (2.18.4)
Requirement already satisfied: numpy in c:\coding\anaconda3\lib\site-packages (from mxnet-cu90) (1.14.3)
Requirement already satisfied: graphviz in c:\coding\anaconda3\lib\site-packages (from mxnet-cu90) (0.8.3)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\coding\anaconda3\lib\site-packages (from requests->mxnet-cu90) (3.0.4)
Requirement already satisfied: idna<2.7,>=2.5 in c:\coding\anaconda3\lib\site-packages (from requests->mxnet-cu90) (2.6)
Requirement already satisfied: urllib3<1.23,>=1.21.1 in c:\coding\anaconda3\lib\site-packages (from requests->mxnet-cu90) (1.22)
Requirement already satisfied: certifi>=2017.4.17 in c:\coding\anaconda3\lib\site-packages (from requests->mxnet-cu90) (2018.4.16)
distributed 1.21.8 requires msgpack, which is not installed.

In [9]:
# From https://stackoverflow.com/questions/49076092/is-there-a-way-to-check-if-mxnet-uses-my-gpu/49079940#49079940
# https://developer.download.nvidia.com/compute/DCGM/docs/nvidia-smi-367.38.pdf
!"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi" --query-gpu=timestamp,name,pci.bus_id,driver_version,pstate,pcie.link.gen.max,pcie.link.gen.current,temperature.gpu,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv


timestamp, name, pci.bus_id, driver_version, pstate, pcie.link.gen.max, pcie.link.gen.current, temperature.gpu, utilization.gpu [%], utilization.memory [%], memory.total [MiB], memory.free [MiB], memory.used [MiB]
2018/06/29 09:51:58.754, GeForce GTX 550 Ti, 00000000:01:00.0, 385.54, P0, [Not Supported], [Not Supported], 42, [Not Supported], [Not Supported], 1024 MiB, 448 MiB, 576 MiB

In [10]:
import mxnet as mx 
def gpu_device(gpu_number=0):
    try:
        _ = mx.nd.array([1, 2, 3], ctx=mx.gpu(gpu_number))
    except mx.MXNetError:
        return None
    return mx.gpu(gpu_number)

In [11]:
gpu_device()


Out[11]:
gpu(0)

In [12]:
mx.gpu(0)


Out[12]:
gpu(0)

In [13]:
from __future__ import print_function
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon

In [14]:
data_ctx = mx.cpu()
model_ctx = mx.cpu()
# model_ctx = mx.gpu(0)

In [15]:
batch_size = 64
num_inputs = 784
num_outputs = 10
num_examples = 60000
def transform(data, label):
    return data.astype(np.float32)/255, label.astype(np.float32)

In [16]:
train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False)

In [17]:
class MLP(gluon.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = gluon.nn.Dense(64)
            self.dense1 = gluon.nn.Dense(64)
            self.dense2 = gluon.nn.Dense(10)

    def forward(self, x):
        x = nd.relu(self.dense0(x))
        x = nd.relu(self.dense1(x))
        x = self.dense2(x)
        return x

In [18]:
net = MLP()
net.collect_params().initialize(mx.init.Normal(sigma=.01), ctx=model_ctx)

In [19]:
data = nd.ones((1,784))
net(data.as_in_context(model_ctx))


Out[19]:
[[-5.2642502e-04 -4.8494569e-04 -9.1017238e-05 -1.0700601e-03
   9.5340359e-04  1.2931204e-03 -3.8861975e-04 -6.4619188e-04
   1.3646495e-04 -1.7153830e-03]]
<NDArray 1x10 @cpu(0)>

In [20]:
class MLP(gluon.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = gluon.nn.Dense(64, activation="relu")
            self.dense1 = gluon.nn.Dense(64, activation="relu")
            self.dense2 = gluon.nn.Dense(10)

    def forward(self, x):
        x = self.dense0(x)
        print("Hidden Representation 1: %s" % x)
        x = self.dense1(x)
        print("Hidden Representation 2: %s" % x)
        x = self.dense2(x)
        print("Network output: %s" % x)
        return x

net = MLP()
net.collect_params().initialize(mx.init.Normal(sigma=.01), ctx=model_ctx)
net(data.as_in_context(model_ctx))


Hidden Representation 1: 
[[0.         0.         0.0257028  0.41763663 0.         0.
  0.         0.         0.         0.03712562 0.16054314 0.35507876
  0.         0.12578698 0.         0.         0.         0.30374664
  0.292567   0.35357708 0.         0.07809136 0.21969806 0.2177984
  0.         0.3457912  0.13206203 0.01624641 0.27534354 0.22952288
  0.2202207  0.         0.00258669 0.06395139 0.68015635 0.
  0.         0.         0.1652459  0.18695295 0.25243065 0.01728743
  0.06471729 0.         0.         0.2552151  0.         0.
  0.03300378 0.33107045 0.6453747  0.04547642 0.         0.
  0.         0.19542485 0.02424754 0.         0.         0.04300808
  0.16542053 0.13203493 0.         0.        ]]
<NDArray 1x64 @cpu(0)>
Hidden Representation 2: 
[[0.0000000e+00 0.0000000e+00 4.8457514e-03 0.0000000e+00 2.4975553e-02
  0.0000000e+00 9.2384806e-03 1.1846514e-02 0.0000000e+00 1.5087268e-02
  0.0000000e+00 1.3427198e-02 1.6015759e-02 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 2.7162414e-02 4.1979598e-05
  0.0000000e+00 1.8946800e-02 3.0578913e-03 0.0000000e+00 0.0000000e+00
  2.7754948e-02 7.5642066e-04 0.0000000e+00 0.0000000e+00 1.9757828e-02
  1.7670706e-02 0.0000000e+00 4.0669916e-03 1.0265570e-02 7.5005908e-03
  1.5555882e-02 0.0000000e+00 0.0000000e+00 0.0000000e+00 2.8156085e-02
  0.0000000e+00 0.0000000e+00 2.0807199e-02 0.0000000e+00 0.0000000e+00
  0.0000000e+00 5.2651879e-04 0.0000000e+00 0.0000000e+00 3.6671013e-02
  1.6886523e-02 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  1.5089142e-02 1.0638590e-02 9.0155248e-03 1.8627236e-02 1.4041221e-02
  0.0000000e+00 0.0000000e+00 0.0000000e+00 1.2555162e-02]]
<NDArray 1x64 @cpu(0)>
Network output: 
[[-1.1785791e-03  1.9014490e-04  8.1118196e-04 -3.8255830e-04
   4.7956721e-04 -1.2719276e-04  3.3852040e-05 -2.3284566e-04
   7.1805023e-04  1.1753932e-03]]
<NDArray 1x10 @cpu(0)>
Out[20]:
[[-1.1785791e-03  1.9014490e-04  8.1118196e-04 -3.8255830e-04
   4.7956721e-04 -1.2719276e-04  3.3852040e-05 -2.3284566e-04
   7.1805023e-04  1.1753932e-03]]
<NDArray 1x10 @cpu(0)>

In [33]:
num_hidden = 64
net = gluon.nn.HybridSequential()
with net.name_scope():
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_outputs))
net.hybridize()

In [34]:
net.collect_params().initialize(mx.init.Normal(sigma=.1), ctx=model_ctx)

In [35]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [36]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .01})

In [37]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [38]:
!pip install mxboard


Requirement already satisfied: mxboard in c:\coding\anaconda3\lib\site-packages (0.1.0)
Requirement already satisfied: protobuf>=3.0.0 in c:\coding\anaconda3\lib\site-packages (from mxboard) (3.6.0)
Requirement already satisfied: Pillow in c:\coding\anaconda3\lib\site-packages (from mxboard) (5.1.0)
Requirement already satisfied: six in c:\coding\anaconda3\lib\site-packages (from mxboard) (1.11.0)
Requirement already satisfied: numpy in c:\coding\anaconda3\lib\site-packages (from mxboard) (1.14.3)
Requirement already satisfied: setuptools in c:\coding\anaconda3\lib\site-packages (from protobuf>=3.0.0->mxboard) (39.1.0)
distributed 1.21.8 requires msgpack, which is not installed.

In [39]:
!pip install tensorboard


Requirement already satisfied: tensorboard in c:\coding\anaconda3\lib\site-packages (1.8.0)
Requirement already satisfied: html5lib==0.9999999 in c:\coding\anaconda3\lib\site-packages (from tensorboard) (0.9999999)
Requirement already satisfied: bleach==1.5.0 in c:\coding\anaconda3\lib\site-packages (from tensorboard) (1.5.0)
Requirement already satisfied: protobuf>=3.4.0 in c:\coding\anaconda3\lib\site-packages (from tensorboard) (3.6.0)
Requirement already satisfied: werkzeug>=0.11.10 in c:\coding\anaconda3\lib\site-packages (from tensorboard) (0.14.1)
Requirement already satisfied: six>=1.10.0 in c:\coding\anaconda3\lib\site-packages (from tensorboard) (1.11.0)
Requirement already satisfied: wheel>=0.26; python_version >= "3" in c:\coding\anaconda3\lib\site-packages (from tensorboard) (0.31.1)
Requirement already satisfied: markdown>=2.6.8 in c:\coding\anaconda3\lib\site-packages (from tensorboard) (2.6.11)
Requirement already satisfied: numpy>=1.12.0 in c:\coding\anaconda3\lib\site-packages (from tensorboard) (1.14.3)
Requirement already satisfied: setuptools in c:\coding\anaconda3\lib\site-packages (from protobuf>=3.4.0->tensorboard) (39.1.0)
distributed 1.21.8 requires msgpack, which is not installed.

In [40]:
from mxboard import SummaryWriter
sw = SummaryWriter(logdir='logs', flush_secs=5)

In [42]:
epochs = 10
smoothing_constant = .01

# collect parameter names for logging the gradients of parameters in each epoch
params = net.collect_params()
param_names = params.keys()
global_step = 0

for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
 
        sw.add_scalar(tag='cross_entropy', value=loss.mean().asscalar(), global_step=global_step)
        if i == 0:
            sw.add_image('minist_first_minibatch', data.reshape((batch_size, 1, 28, 28)), e)
        if e == 0:
            sw.add_graph(net)
        grads = [i.grad() for i in net.collect_params().values()]
        for i, name in enumerate(param_names):
            sw.add_histogram(tag=name, values=grads[i], global_step=e, bins=1000)

        global_step += 1
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()

        
    
    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    sw.add_scalar(tag='accuracy_curves', value=('train_acc', train_accuracy), global_step=e)
    sw.add_scalar(tag='accuracy_curves', value=('valid_acc', test_accuracy), global_step=e)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))


Epoch 0. Loss: 1.2356886094411215, Train_acc 0.8395166666666667, Test_acc 0.8474
Epoch 1. Loss: 0.46565542084376016, Train_acc 0.88465, Test_acc 0.8912
Epoch 2. Loss: 0.3715192502895991, Train_acc 0.901, Test_acc 0.9029
Epoch 3. Loss: 0.32939287207126616, Train_acc 0.91, Test_acc 0.9112
Epoch 4. Loss: 0.30172612600326537, Train_acc 0.91625, Test_acc 0.9184
Epoch 5. Loss: 0.2804558823386828, Train_acc 0.9217333333333333, Test_acc 0.921
Epoch 6. Loss: 0.2626380964756012, Train_acc 0.9262666666666667, Test_acc 0.9252
Epoch 7. Loss: 0.24708882774909338, Train_acc 0.9300333333333334, Test_acc 0.9296
Epoch 8. Loss: 0.23393845278819403, Train_acc 0.9341833333333334, Test_acc 0.9332
Epoch 9. Loss: 0.22262413431803385, Train_acc 0.93545, Test_acc 0.9352


In [ ]: