In [1]:
import os
import urllib
SOURCE_URL = 'http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
WORK_DIRECTORY = "./iris-data"
FILENAME = 'iris.data'
if not os.path.exists(WORK_DIRECTORY):
os.mkdir(WORK_DIRECTORY)
filepath = os.path.join(WORK_DIRECTORY, FILENAME)
if not os.path.exists(filepath):
filepath, _ = urllib.urlretrieve(SOURCE_URL, filepath)
statinfo = os.stat(filepath)
print 'Succesfully downloaded', FILENAME, statinfo.st_size, 'bytes.'
else:
print 'Already downloaded', FILENAME
In [2]:
import pandas as pd
def label2class(label):
return labels_onehot[label]
df = pd.read_csv(filepath, sep=',', header=None,
names=['sepal_lenght', 'sepal_width', 'petal_length', 'petal_width', 'class']
#,converters={'class' :label2class}
)
In [ ]:
In [3]:
X = df[['sepal_lenght', 'sepal_width', 'petal_length', 'petal_width']].values
y = pd.get_dummies(df['class']).values
print(X.shape)
print(y.shape)
In [4]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
In [5]:
import tensorflow as tf
# first layer from 4 inputs to 3 outputs
x = tf.placeholder(tf.float32, [None, 4])
W = tf.Variable(tf.zeros([4, 3]))
b = tf.Variable(tf.zeros([3]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
# placeholder for correct values
y_ = tf.placeholder(tf.float32, [None, 3])
# crossentropy
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
# define how to train the network
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
# last step is to initialize model variables
init = tf.initialize_all_variables()
In [6]:
# create a session
sess = tf.Session()
sess.run(init)
In [7]:
for i in range(1000):
sess.run(train_step, feed_dict={x: X_train, y_: y_train})
In [8]:
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: X_test, y_: y_test}))