NOTE: this has nothing to do with neural networks.
This notebook teaches an usage of tensorflow's dataflow graph + gradient descent optimizer for linear regression
Courtesy : Airial Intelligence.
The datset used is from this datascience challenge: https://github.com/aerialintel/data-science-challenge
The task here is to predict the yield of wheat.
In [1]:
%%bash
# Download the dataset
[ ! -d dataset ] && mkdir dataset
[ ! -f dataset/wheat-2013-supervised.csv ] && wget https://aerialintel.blob.core.windows.net/recruiting/datasets/wheat-2013-supervised.csv -O dataset/wheat-2013-supervised.csv
# [ ! -f dataset/wheat-2014-supervised.csv ] && wget https://aerialintel.blob.core.windows.net/recruiting/datasets/wheat-2014-supervised.csv -O dataset/wheat-2014-supervised.csv
In [2]:
%matplotlib inline
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display, HTML
import sys
import os
import time
current_milli_time = lambda: int(round(time.time() * 1000))
In [11]:
path = "dataset/wheat-2013-supervised.csv"
df=pd.read_csv(path, sep=',')
df=df[:10000] # for a smaller dataset for quick experimentation
display(df[0:5])
train_X = df.iloc[:, range(5,15)] # basic features
train_Y = df.iloc[:, [25]] # Labels
means = train_X.mean()
stds = train_X.std()
train_X = (train_X - means) / stds
In [12]:
#train_X[0:3]
#Y[0:3]
train_X = train_X.as_matrix()
train_Y = train_Y.as_matrix()
In [13]:
train_X.shape, train_Y.shape
Out[13]:
In [16]:
n_recs, n_attrs = train_X.shape
## Tensorflow for linear regression
X = tf.placeholder("float", [None, n_attrs])
Y = tf.placeholder("float", [None, 1])
W = tf.Variable(tf.zeros([n_attrs, 1]))
b = tf.Variable(tf.zeros([1]))
Y_ = tf.matmul(X, W) + b
learning_rate = 0.001
# Root Mean squared error
cost = tf.sqrt(tf.reduce_sum(tf.pow(Y_ - Y, 2)))
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
num_epochs = 20
progress_delay = 10 * 1000
last_time = current_milli_time()
conv_tol = 0.1
training_cost = float('inf')
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for e in range(num_epochs):
for x, y in zip(train_X, train_Y):
sess.run(optimizer, feed_dict={X: np.array([x]), Y: np.array([y])})
if (current_milli_time() - last_time > progress_delay):
last_time = current_milli_time()
new_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
print "Epoch =", e," Training cost=", new_cost, " W=", sess.run(W).flatten(), " b=", sess.run(b)
if abs(new_cost - training_cost) < conv_tol:
print("Converged...")
break
training_cost = new_cost
print ("Optimization finished.")
training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
print "Training cost=", training_cost, " W=", sess.run(W).flatten(), " b=", sess.run(b)
# predictions
preds = sess.run(Y_, feed_dict={X: train_X, Y: train_Y})
# Graphic display
plt.plot(train_X[0:1000], train_Y[0:1000], 'ro', label='Original data')
plt.plot(train_X[0:1000], preds[0:1000], label='Fitted line')
plt.legend()
plt.show()
In [74]:
Out[74]:
In [ ]: