Download the Iris dataset


In [1]:
import os
import urllib

SOURCE_URL = 'http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
WORK_DIRECTORY = "./iris-data"
FILENAME = 'iris.data'

if not os.path.exists(WORK_DIRECTORY):
    os.mkdir(WORK_DIRECTORY)
filepath = os.path.join(WORK_DIRECTORY, FILENAME)
if not os.path.exists(filepath):
    filepath, _ = urllib.urlretrieve(SOURCE_URL, filepath)
    statinfo = os.stat(filepath)
    print 'Succesfully downloaded', FILENAME, statinfo.st_size, 'bytes.'
else:
    print 'Already downloaded', FILENAME


Already downloaded iris.data

Load the dataset


In [2]:
import pandas as pd
def label2class(label):
    return labels_onehot[label]

df = pd.read_csv(filepath, sep=',', header=None,
                names=['sepal_lenght', 'sepal_width', 'petal_length', 'petal_width', 'class']
                #,converters={'class' :label2class}
                )

Clean the dataset

We need to do some cleaning with pandas


In [ ]:

Prepare X and Y


In [3]:
X = df[['sepal_lenght', 'sepal_width', 'petal_length', 'petal_width']].values
y = pd.get_dummies(df['class']).values
print(X.shape)
print(y.shape)


(150, 4)
(150, 3)

Split the data


In [4]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)


(120, 4)
(30, 4)
(120, 3)
(30, 3)

Tensorflow stuff


In [5]:
import tensorflow as tf

# first layer from 4 inputs to 3 outputs
x = tf.placeholder(tf.float32, [None, 4])
W = tf.Variable(tf.zeros([4, 3]))
b = tf.Variable(tf.zeros([3]))

y = tf.nn.softmax(tf.matmul(x, W) + b)

# placeholder for correct values
y_ = tf.placeholder(tf.float32, [None, 3])

# crossentropy
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))

# define how to train the network
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# last step is to initialize model variables
init = tf.initialize_all_variables()

In [6]:
# create a session
sess = tf.Session()
sess.run(init)

Training


In [7]:
for i in range(1000):
    sess.run(train_step, feed_dict={x: X_train, y_: y_train})

Model evaluation


In [8]:
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: X_test, y_: y_test}))


1.0