In [33]:
from __future__ import division
import tarfile
import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt
import random
# Display plots inline
%matplotlib inline
In [34]:
def gentxt(filepath,delimiter):
return np.genfromtxt(filepath,delimiter=delimiter,dtype=None)
def extract():
print ("Training Data Loading")
trainX=gentxt("data/trainX.csv",delimiter='\t')
trainY=gentxt("data/trainY.csv",delimiter='\t')
print("Testing Data Loading")
testX=gentxt("data/testX.csv",delimiter='\t')
testY=gentxt("data/testY.csv",delimiter='\t')
return trainX,trainY,testX,testY
trainX,trainY,testX,testY=extract()
In [35]:
#Shape for X and Y
#X has 953 rows and 2955 features
#Y has 2 labels whether it is a spam or not
print(trainX.shape)
print(trainY.shape)
In [36]:
#find out for different iterations to find out the optimal iterations
iter1=10000
iter2=15000
iter3=26000
learningRate = tf.train.exponential_decay(learning_rate=0.0008,
global_step= 1,
decay_steps=trainX.shape[0],
decay_rate= 0.95,
staircase=True)
In [ ]:
In [37]:
#Define the placeholder variables
numfeatures=trainX.shape[1]
numlabels=trainY.shape[1]
X=tf.placeholder(tf.float32,shape=[None,numfeatures])
Y=tf.placeholder(tf.float32,shape=[None,numlabels])
#Define the weights and biases
#Define weights and biases as variables since it changes over the iterations
w=tf.Variable(tf.random_normal([numfeatures,numlabels],mean=0,
stddev=(np.sqrt(6/numfeatures+
numlabels+1))))
b=tf.Variable(tf.random_normal([1,numlabels],mean=0,
stddev=(np.sqrt(6/numfeatures+
numlabels+1))))
In [38]:
#Find out the predicted Y value
init=tf.initialize_all_variables()
Y_predicted=tf.nn.sigmoid(tf.add(tf.matmul(X,w),b))
In [39]:
#Define the loss function and optimizer
#We use a mean squared loss function
#There is a function in tensorflow tf.nn.l2_loss which finds the mean squared loss without square root
loss=tf.nn.l2_loss(Y_predicted-Y)
optimizer=tf.train.GradientDescentOptimizer(learningRate).minimize(loss)
In [40]:
#Define the session to compute the graph
errors=[]
with tf.Session() as sess:
sess.run(init)
prediction=tf.equal(tf.argmax(Y,1),tf.argmax(Y_predicted,1))
accuracy=tf.reduce_mean(tf.cast(prediction,"float"))
for i in range (iter1):
sess.run(optimizer,feed_dict={X:trainX,Y:trainY})
accuracy_value=accuracy.eval(feed_dict={X:trainX,Y:trainY})
errors.append(1-accuracy_value)
print("The error has been reduced to",errors[-1])
print(sess.run(accuracy,feed_dict={X:trainX,Y:trainY}))
In [32]:
plt.plot([np.mean(errors[i-50:i]) for i in range(len(errors))])
plt.show()
In [72]:
errors=[]
with tf.Session() as sess:
sess.run(init)
prediction=tf.equal(tf.argmax(Y,1),tf.argmax(Y_predicted,1))
accuracy=tf.reduce_mean(tf.cast(prediction,"float"))
for i in range (iter2):
sess.run(optimizer,feed_dict={X:trainX,Y:trainY})
accuracy_value=sess.run(accuracy,feed_dict={X:trainX,Y:trainY})
errors.append(1-accuracy_value)
print("The error has been reduced to",errors[-1])
print(sess.run(accuracy,feed_dict={X:trainX,Y:trainY}))
In [73]:
plt.plot([np.mean(errors[i-50:i]) for i in range(len(errors))])
plt.show()
In [16]:
errors=[]
with tf.Session() as sess:
sess.run(init)
prediction=tf.equal(tf.argmax(Y,1),tf.argmax(Y_predicted,1))
accuracy=tf.reduce_mean(tf.cast(prediction,"float"))
for i in range (iter3):
sess.run(optimizer,feed_dict={X:trainX,Y:trainY})
accuracy_value=sess.run(accuracy,feed_dict={X:trainX,Y:trainY})
errors.append(1-accuracy_value)
print("The error has been reduced to",errors[-1])
print(sess.run(accuracy,feed_dict={X:trainX,Y:trainY}))
plt.plot([np.mean(errors[i-50:i]) for i in range(len(errors))])
plt.show()
In [ ]:
plt.plot([np.mean(errors[i-50:i]) for i in range(len(errors))])
plt.show()
In [22]:
iter4=26000
learningRate = tf.train.exponential_decay(learning_rate=0.0008,
global_step= 1,
decay_steps=trainX.shape[0],
decay_rate= 0.95,
staircase=True)
In [25]:
In [26]:
init=tf.initialize_all_variables()
h1=tf.nn.sigmoid(tf.add(tf.matmul(X,w1),b1))
Y_predicted = tf.nn.sigmoid(tf.add(tf.matmul(h1, w2), b2))
#Define the loss function and optimizer
#We use a mean squared loss function
#There is a function in tensorflow tf.nn.l2_loss which finds the mean squared loss without square root
loss=tf.nn.l2_loss(Y_predicted-Y)
optimizer=tf.train.GradientDescentOptimizer(learningRate).minimize(loss)
In [29]:
errors=[]
with tf.Session() as sess:
sess.run(init)
prediction=tf.equal(tf.argmax(Y,1),tf.argmax(Y_predicted,1))
accuracy=tf.reduce_mean(tf.cast(prediction,"float"))
for i in range (iter4):
sess.run(optimizer,feed_dict={X:trainX,Y:trainY})
accuracy_value=accuracy.eval(feed_dict={X:trainX,Y:trainY})
errors.append(1-accuracy_value)
print("The error has been reduced to",errors[-1])
print(sess.run(accuracy,feed_dict={X:trainX,Y:trainY}))
`
In [30]:
plt.plot([np.mean(errors[i-50:i]) for i in range(len(errors))])
plt.show()
In [ ]: