In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf

In [2]:
header = ["label", "cap-shape","cap-surface", "cap-color", "bruises", "odor", "gill-attachment", "gill-spacing", "gill-size", "gill-color",
"stalk-shape", "stalk-root", "stalk-surface-above-ring", "stalk-surface-below-ring", "stalk-color-above-ring",
"stalk-color-below-ring", "veil-type", "veil-color", "ring-number", "ring-type", "spore-print-color", "population",
"habitat"]

def makeOneHot(col):
    vals,idx = pd.factorize(col)
    res = np.zeros((len(vals),len(idx)))
    res[range(len(vals)),vals] = 1
    return res

def makeOrdinal(col):
    vals,idx = pd.factorize(col)
    return vals.reshape(len(vals),1)

def loadData():
    df = pd.read_csv("./agaricus-lepiota.data", header=0, names=header)
    X = np.hstack([makeOneHot(df[c]) for c in header[1:]])
    y = makeOneHot(df[header[0]])
    
    return X.astype(np.float32), y.astype(np.float32)

def loadData2():
    df = pd.read_csv("./agaricus-lepiota.data", header=0, names=header)
    X = np.hstack([makeOrdinal(df[c]) for c in header[1:]])
    y = makeOneHot(df[header[0]])
    
    return X.astype(np.float32), y.astype(np.float32)

def randomizeData(X,y):
    num_instances = X.shape[0]
    indices = np.random.permutation(num_instances)
    return X[indices,:], y[indices]

X, y = loadData()
X, y = randomizeData(X,y)

#print len(header)
#print X.shape, y.shape
#print X, y

In [3]:
split = int(X.shape[0]*0.8)

X_train = X[:split,:]
y_train = y[:split]
X_test  = X[split:,:]
y_test  = y[split:]

print X_train.shape, y_train.shape


(6498, 117) (6498, 2)

In [4]:
num_features = X.shape[1]
num_labels   = y.shape[1]

graph = tf.Graph()
with graph.as_default():
    # Input data.
    # Load the training, validation and test data into constants that are
    # attached to the graph.
    Xtr = tf.constant(X_train)
    ytr = tf.constant(y_train)
    Xts = tf.constant(X_test)
    
    # Variables.
    # These are the parameters that we are going to be training. The weight
    # matrix will be initialized using random valued following a (truncated)
    # normal distribution. The biases get initialized to zero.
    weights = tf.Variable(tf.truncated_normal([num_features, num_labels]))
    biases  = tf.Variable(tf.zeros([num_labels]))

    # Training computation.
    # We multiply the inputs with the weight matrix, and add biases. We compute
    # the softmax and cross-entropy (it's one operation in TensorFlow, because
    # it's very common, and it can be optimized). We take the average of this
    # cross-entropy across all training examples: that's our loss.
    logits = tf.matmul(Xtr, weights) + biases
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, ytr))

    # Optimizer.
    # We are going to find the minimum of this loss using gradient descent.
    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

    # Predictions for the training, validation, and test data.
    # These are not part of training, but merely here so that we can report
    # accuracy figures as we train.
    train_prediction = tf.nn.softmax(logits)
    test_prediction = tf.nn.softmax(tf.matmul(Xts, weights) + biases)

In [5]:
num_steps = 801

def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

with tf.Session(graph=graph) as session:
  # This is a one-time operation which ensures the parameters get initialized as
  # we described in the graph: random weights for the matrix, zeros for the
  # biases. 
  tf.initialize_all_variables().run()
  print 'Initialized'
  for step in xrange(num_steps):
    # Run the computations. We tell .run() that we want to run the optimizer,
    # and get the loss value and the training predictions returned as numpy
    # arrays.
    _, l, predictions = session.run([optimizer, loss, train_prediction])
    if (step % 100 == 0):
      print 'Loss at step', step, ':', l
      print 'Training accuracy: %.1f%%' % accuracy(predictions, y_train)
  print 'Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), y_test)


Initialized
Loss at step 0 : 4.43674
Training accuracy: 22.3%
Loss at step 100 : 0.538355
Training accuracy: 79.0%
Loss at step 200 : 0.309362
Training accuracy: 88.8%
Loss at step 300 : 0.224843
Training accuracy: 91.8%
Loss at step 400 : 0.181071
Training accuracy: 93.6%
Loss at step 500 : 0.154121
Training accuracy: 94.6%
Loss at step 600 : 0.135362
Training accuracy: 95.3%
Loss at step 700 : 0.121155
Training accuracy: 95.8%
Loss at step 800 : 0.109772
Training accuracy: 96.2%
Test accuracy: 95.9%

In [6]:
batch_size = 128

graph = tf.Graph()
with graph.as_default():
    # Input data. For the training data, we use a placeholder that will be fed
    # at run time with a training minibatch.
    Xtr = tf.placeholder(tf.float32, shape=(batch_size, 117))
    ytr = tf.placeholder(tf.float32, shape=(batch_size, 2))
    Xts = tf.constant(X_test)
    
    weights = tf.Variable(tf.truncated_normal([117, 2]))
    biases  = tf.Variable(tf.zeros([2]))

    logits = tf.matmul(Xtr, weights) + biases
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, ytr))

    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

    train_prediction = tf.nn.softmax(logits)
    test_prediction = tf.nn.softmax(tf.matmul(Xts, weights) + biases)

In [7]:
num_steps = 3001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print "Initialized"
  for step in xrange(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    offset = (step * batch_size) % (y_train.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = X_train[offset:(offset + batch_size), :]
    batch_labels = y_train[offset:(offset + batch_size)]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {Xtr : batch_data, ytr : batch_labels}
    _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print "Minibatch loss at step", step, ":", l
      print "Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)
  print "Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), y_test)


Initialized
Minibatch loss at step 0 : 1.79163
Minibatch accuracy: 46.9%
Minibatch loss at step 500 : 0.0949247
Minibatch accuracy: 96.9%
Minibatch loss at step 1000 : 0.0524768
Minibatch accuracy: 97.7%
Minibatch loss at step 1500 : 0.0319975
Minibatch accuracy: 100.0%
Minibatch loss at step 2000 : 0.0406476
Minibatch accuracy: 99.2%
Minibatch loss at step 2500 : 0.0424642
Minibatch accuracy: 98.4%
Minibatch loss at step 3000 : 0.0240518
Minibatch accuracy: 99.2%
Test accuracy: 99.2%

In [8]:
batch_size = 100

graph = tf.Graph()
with graph.as_default():
    Xtr = tf.placeholder(tf.float32, shape=(batch_size, num_features))
    ytr = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    Xts = tf.constant(X_test)
    
    w1 = tf.Variable(tf.truncated_normal([num_features, 10]))
    b1 = tf.Variable(tf.zeros([10]))
    w2 = tf.Variable(tf.truncated_normal([10, num_labels]))
    b2 = tf.Variable(tf.zeros([num_labels]))
    
    a1 = tf.nn.relu(tf.matmul(Xtr, w1) + b1)
    logits = tf.matmul(a1, w2) + b2
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, ytr))
    
    regularizer = (tf.nn.l2_loss(w1) + tf.nn.l2_loss(b1) + 
                   tf.nn.l2_loss(w2) + tf.nn.l2_loss(b2))
  
    loss += 0.005 * regularizer

    optimizer = tf.train.AdamOptimizer().minimize(loss)

    train_prediction = tf.nn.softmax(logits)
    test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(Xts, w1) + b1), w2) + b2)

In [9]:
num_steps = 3001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print "Initialized"
  for step in xrange(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    offset = (step * batch_size) % (y_train.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = X_train[offset:(offset + batch_size), :]
    batch_labels = y_train[offset:(offset + batch_size)]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {Xtr : batch_data, ytr : batch_labels}
    _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print "Minibatch loss at step", step, ":", l
      print "Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)
  print "Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), y_test)


Initialized
Minibatch loss at step 0 : 3.13414
Minibatch accuracy: 72.0%
Minibatch loss at step 500 : 1.29038
Minibatch accuracy: 98.0%
Minibatch loss at step 1000 : 0.663984
Minibatch accuracy: 100.0%
Minibatch loss at step 1500 : 0.34532
Minibatch accuracy: 100.0%
Minibatch loss at step 2000 : 0.183898
Minibatch accuracy: 100.0%
Minibatch loss at step 2500 : 0.10729
Minibatch accuracy: 100.0%
Minibatch loss at step 3000 : 0.0718768
Minibatch accuracy: 100.0%
Test accuracy: 99.9%

In [ ]: