In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [3]:
## List of filenames from which data is to be read
filenames = ["heart.csv"]
## Creating a FIFO Queue under the hood using tf.train.string_input_producer
filename_queue = tf.train.string_input_producer(filenames)
## Creating a TextLineReader File Reader which reads a single line of text
reader = tf.TextLineReader(skip_header_lines=1) # skip the first header line
## Reading the string from filename_queue using the reader object
key, value = reader.read(filename_queue)
In [18]:
N_FEATURES = 9
## Value will be a string and unfit to feed it to the model
## We convert the string to a vector using tf.decode_csv
record_defaults = [[1.0] for _ in range(N_FEATURES)]
record_defaults[4] = [''] # 4th feature is a string
record_defaults.append([1]) # Label is an integer
## Using tf.decode_csv to decode string acc. to format provided by record_defaults
content = tf.decode_csv(value, record_defaults=record_defaults)
In [19]:
## Converting the 5th column to binary
condition = tf.equal(content[4],tf.constant('Present'))
content[4] = tf.where(condition, tf.constant(1.0), tf.constant(0.0))
## Packing all 9 features into a tensor
features = tf.stack(content[:N_FEATURES])
label = content[-1]
In [20]:
## Data needs to be in batches rather than being in single line form
BATCH_SIZE = 20
min_after_dequeue = 10*BATCH_SIZE
## Maximum capacity of the queue
capacity = 20*BATCH_SIZE
In [25]:
with tf.Session() as sess:
## Coordinator to handle multiple threads
coord = tf.train.Coordinator()
## Starting the threads using coordinator object
threads = tf.train.start_queue_runners(coord=coord)
## Printing the key, value pairs of data generated
print '='*3,"Printing key/value pairs",'='*3
print sess.run(key)
print sess.run(value)
## Printing the processed values from the above
print '='*3,"Printing processed feautures/label pairs",'='*3
print sess.run(features)
print sess.run(label)
## Generating the BATCH_SIZE amount of data
data_batch, label_batch = tf.train.shuffle_batch([features, label], batch_size=BATCH_SIZE,
capacity=capacity, min_after_dequeue=min_after_dequeue)
print '='*3,"Printing batches of features and their corresponding labels","="*3
print data_batch.shape
print label_batch.shape
coord.request_stop()
coord.join(threads)
In [ ]: