In [ ]:

    
def weight_variable(shape):
    """Create a weight variable with appropriate initialization."""
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)
def bias_variable(shape):
    """Create a bias variable with appropriate initialization."""
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def variable_summaries(var):
    """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
    with tf.name_scope('summaries'):
      mean = tf.reduce_mean(var)
      tf.summary.scalar('mean', mean)
      with tf.name_scope('stddev'):
        stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)
        
def images_to_sprite(data):
    """Creates the sprite image along with any necessary padding

    Args:
      data: NxHxW[x3] tensor containing the images.

    Returns:
      data: Properly shaped HxWx3 image with any necessary padding.
    """
    if len(data.shape) == 3:
        data = np.tile(data[...,np.newaxis], (1,1,1,3))
    data = data.astype(np.float32)
    min = np.min(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) - min).transpose(3,0,1,2)
    max = np.max(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) / max).transpose(3,0,1,2)
    # Inverting the colors seems to look better for MNIST
    #data = 1 - data

    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = ((0, n ** 2 - data.shape[0]), (0, 0),
            (0, 0)) + ((0, 0),) * (data.ndim - 3)
    data = np.pad(data, padding, mode='constant',
            constant_values=0)
    # Tile the individual thumbnails into an image.
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3)
            + tuple(range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    data = (data * 255).astype(np.uint8)
    return data

        
class MNIST:
    def __init__(self):
        """Set up the neural network and the log file for later display"""
        self.mnist = input_data.read_data_sets(FLAGS.data_dir,
                                               one_hot=True,
                                               fake_data=FLAGS.fake_data)
        tf.reset_default_graph()
        config = tf.ConfigProto(log_device_placement=True)
        config.gpu_options.allow_growth=True
        self.sess = tf.InteractiveSession(config=config)
        
        # input placeholder: let network/graph know the input size beforehand 
        with tf.name_scope('input'):
            self.x = tf.placeholder(tf.float32, [None, 784], name='x-input')
            self.y_ = tf.placeholder(tf.float32, [None, 10], name='y-input')

        # Here is for visualization
        with tf.name_scope('input_reshape'):
            self.image_shaped_input = tf.reshape(self.x, [-1, 28, 28, 1])
            tf.summary.image('input', self.image_shaped_input, 10)
            
        # create one hidden layer with input size 784 and output size 500
        self.hidden1, self.hidden1_na = self.nn_layer(self.x, 784, 500, 'layer1')
        
        # add a dropout layer
        with tf.name_scope('dropout'):
            self.keep_prob = tf.placeholder(tf.float32)
            tf.summary.scalar('dropout_keep_probability', self.keep_prob)
            self.dropped = tf.nn.dropout(self.hidden1, self.keep_prob)

        # Do not apply softmax activation yet. Instead, use 
        # tf.nn.softmax_cross_entropy_with_logits, because it is numerically stable.
        # Here, we create the final output layer.
        self.y, _ = self.nn_layer(self.dropped, 500, 10, 'layer2', act=tf.identity)

        # Calculate the loss
        with tf.name_scope('cross_entropy'):
            self.diff = tf.nn.softmax_cross_entropy_with_logits(labels=self.y_,
                                                                logits=self.y)
            with tf.name_scope('total'):
              self.cross_entropy = tf.reduce_mean(self.diff)
        tf.summary.scalar('cross_entropy', self.cross_entropy)
        
        # Set up the optimization method, and here we use Adam
        with tf.name_scope('train'):
            self.train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).\
                         minimize(self.cross_entropy)
        
        # Calculate the accuracy
        with tf.name_scope('accuracy'):
            with tf.name_scope('correct_prediction'):
                self.correct_prediction = tf.equal(tf.argmax(self.y, 1), 
                                                   tf.argmax(self.y_, 1))
            with tf.name_scope('accuracy'):
                self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
        tf.summary.scalar('accuracy', self.accuracy)

        # Merge all the summaries and write them out to /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default)
        self.merged = tf.summary.merge_all()
        self.train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', self.sess.graph)
        self.test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test')
        tf.global_variables_initializer().run()

    # create a layer
    def nn_layer(self, input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
        # Adding a name scope ensures logical grouping of the layers in the graph.
        with tf.name_scope(layer_name):
          # This Variable will hold the state of the weights for the layer
          with tf.name_scope('weights'):
            weights = weight_variable([input_dim, output_dim])
            variable_summaries(weights)
          with tf.name_scope('biases'):
            biases = bias_variable([output_dim])
            variable_summaries(biases)
          with tf.name_scope('Wx_plus_b'):
            preactivate = tf.matmul(input_tensor, weights) + biases
            tf.summary.histogram('pre_activations', preactivate)
          activations = act(preactivate, name='activation')
          tf.summary.histogram('activations', activations)
        return activations, preactivate
    
    
    def feed_dict(self, train):
        """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
        if train or FLAGS.fake_data:
          xs, ys = self.mnist.train.next_batch(100, fake_data=FLAGS.fake_data)
          k = FLAGS.dropout
        else:
          xs, ys = self.mnist.test.images, self.mnist.test.labels
          k = 1.0
        return {self.x: xs, self.y_: ys, self.keep_prob: k}
                
    def train(self):
        for i in range(FLAGS.max_steps):
            if i % 10 == 0:  # Record summaries and test-set accuracy
              summary, acc = self.sess.run([self.merged, self.accuracy],
                                      feed_dict=self.feed_dict(False))
              self.test_writer.add_summary(summary, i)
              print('Accuracy at step %s: %s' % (i, acc))
            else:  # Record train set summaries, and train
              if i % 100 == 99:  # Record execution stats
                run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()
                summary, _ = self.sess.run([self.merged, self.train_step],
                                      feed_dict=self.feed_dict(True),
                                      options=run_options,
                                      run_metadata=run_metadata)
                self.train_writer.add_run_metadata(run_metadata, 'step%03d' % i)
                self.train_writer.add_summary(summary, i)
                print('Adding run metadata for', i)
              else:  # Record a summary
                summary, _ = self.sess.run([self.merged, self.train_step], 
                                      feed_dict=self.feed_dict(True))
                self.train_writer.add_summary(summary, i)
        self.train_writer.close()
        self.test_writer.close()
        
    def tsne(self):
        # We extract the feature of first hidden layer for later embedding use.
        emb = self.sess.run(self.hidden1_na,feed_dict=self.feed_dict(False))
        N = len(emb)
        #
        emb_var = tf.Variable(emb, name='Embedding_of_h1')
        self.sess.run(emb_var.initializer)
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir)
        config = projector.ProjectorConfig()
        embedding = config.embeddings.add()
        embedding.tensor_name = emb_var.name

        # Comment out if you don't have metadata
        embedding.metadata_path = os.path.join(FLAGS.log_dir, 'metadata.tsv')

        # Comment out if you don't want sprites
        embedding.sprite.image_path = os.path.join(FLAGS.log_dir, 'sprite.png')
        embedding.sprite.single_image_dim.extend([28, 28])

        projector.visualize_embeddings(summary_writer, config)
        saver = tf.train.Saver([emb_var])
        saver.save(self.sess, os.path.join(FLAGS.log_dir, 'model.ckpt'), 1)

        names = [str(i) for i in range(10)]
        metadata_file = open(os.path.join(FLAGS.log_dir, 'metadata.tsv'), 'w')
        metadata_file.write('Name\tClass\n')
        for i in range(N):
            metadata_file.write('%06d\t%s\n' % (i, 
                                                names[np.argmax(mnist.mnist.test.labels[i])]))
        metadata_file.close()
        sprite = images_to_sprite(np.reshape(mnist.mnist.test.images,(-1,28,28)))
        scipy.misc.imsave(os.path.join(FLAGS.log_dir, 'sprite.png'), sprite)