In [2]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
import glob
from math import ceil
sys.path.append('src')
from ocr.helpers import implt
from ocr.mlhelpers import TrainingPlot, DataSet
from ocr.datahelpers import load_gap_data
%matplotlib notebook
plt.rcParams['figure.figsize'] = (9.0, 5.0)
In [3]:
slider = (60, 60) # Height is set to 60 by data and
# width should be less than 120
learning_rate = 5e-5
dropout = 0.5 # Percentage of dopped out data
train_set = 0.8 # Percentage of training data
TRAIN_STEPS = 500000
TEST_ITER = 150
COST_ITER = 50
SAVE_ITER = 2000
BATCH_SIZE = 64
save_loc = 'models/gap-clas/large/CNN-CG'
In [4]:
images, labels = load_gap_data('data/gapdet/large/', slider=slider)
In [5]:
print("Number of images: " + str(len(images)))
# Splitting on train and test data
div = int(train_set * len(images))
trainData = images[0:div]
trainLabels = labels[0:div]
evalData = images[div:]
evalLabels = labels[div:]
print("Training images: %g" % div)
In [6]:
# Prepare training dataset
trainSet = DataSet(trainData, trainLabels)
evalSet = DataSet(evalData, evalLabels)
In [7]:
sess = tf.InteractiveSession()
def weights(name, shape):
return tf.get_variable(name, shape=shape,
initializer=tf.contrib.layers.xavier_initializer(),
regularizer=tf.contrib.layers.l2_regularizer(scale=SCALE))
def bias(const, shape, name=None):
return tf.Variable(tf.constant(const, shape=shape), name=name)
# Help functions for standard layers
def conv2d(x, W, name=None):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name=name)
def max_pool_2x2(x, name=None):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
# Regularization scale - FOR TWEAKING
SCALE = 0.001
# Weighting cross entropy
POS_WEIGHT = (len(labels) - sum(labels)) / sum(labels)
# Place holders for data (x) and labels (y_)
x = tf.placeholder(tf.float32, [None, slider[0]*slider[1]], name='x')
targets = tf.placeholder(tf.int64, [None])
# Reshape input data
reshape_images = tf.reshape(x, [-1, slider[0], slider[1], 1])
# Image standardization
x_images = tf.map_fn(
lambda img: tf.image.per_image_standardization(img), reshape_images)
# 1. Layer - Convulation + Subsampling
W_conv1 = weights('W_conv1', shape=[8, 8, 1, 10])
b_conv1 = bias(0.1, shape=[10], name='b_conv1')
h_conv1 = tf.nn.relu(conv2d(x_images, W_conv1) + b_conv1, name='h_conv1')
# 2. Layer - Max Pool
h_pool1 = max_pool_2x2(h_conv1, name='h_pool1')
# 3. Layer - Convulation + Subsampling
W_conv2 = weights('W_conv2', shape=[5, 5, 10, 20])
b_conv2 = bias(0.1, shape=[20], name='b_conv2')
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2, name='h_conv2')
# 4. Layer - Max Pool
h_pool2 = max_pool_2x2(h_conv2, name='h_pool2')
# 5. Fully Connected layer
W_fc1 = weights('W_fc1', shape=[ceil(slider[0]/4)*ceil(slider[1]/4)*20, 1000])
b_fc1 = bias(0.1, shape=[1000], name='b_fc1')
h_conv2_flat = tf.reshape(
h_pool2, [-1, ceil(slider[0]/4)*ceil(slider[1]/4)*20], name='h_conv2_flat')
h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1, name='h_fc1')
# 6. Dropout
keep_prob = tf.placeholder(tf.float32, name='keep_prob')
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob, name='h_fc1_drop')
# 7. Output layer
W_fc2 = weights('W_fc2', shape=[1000, 2])
b_fc2 = bias(0.1, shape=[2], name='b_fc2')
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
# Activation function for real use in application
activation = tf.argmax(tf.matmul(h_fc1, W_fc2) + b_fc2, 1, name='activation')
# Cost: cross entropy + regularization
# Regularization with L2 Regularization with decaying learning rate
# cross_entropy = tf.nn.weighted_cross_entropy_with_logits(logits=y_conv, targets=y_)
weights = tf.multiply(targets, POS_WEIGHT) + 1
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
logits=y_conv,
labels=targets,
weights=weights)
# Using cross entropy for sigmoid as loss
regularization = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
cost = tf.reduce_mean(cross_entropy) + sum(regularization)
# Optimizer
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost, name='train_step')
# Evaluating
correct_prediction = tf.equal(tf.argmax(y_conv,1), targets)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
In [8]:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
# Graph for live ploting
trainPlot = TrainingPlot(TRAIN_STEPS, TEST_ITER, COST_ITER)
try:
for i in range(TRAIN_STEPS):
trainBatch, labelBatch = trainSet.next_batch(BATCH_SIZE)
if i%COST_ITER == 0:
# Plotting cost
tmpCost = cost.eval(feed_dict={x: trainBatch,
targets: labelBatch,
keep_prob: 1.0})
trainPlot.updateCost(tmpCost, i // COST_ITER)
if i%TEST_ITER == 0:
# Plotting accuracy
evalD, evalL = evalSet.next_batch(500)
accEval = accuracy.eval(feed_dict={x: evalD,
targets: evalL,
keep_prob: 1.0})
accTrain = accuracy.eval(feed_dict={x: trainBatch,
targets: labelBatch,
keep_prob: 1.0})
trainPlot.updateAcc(accEval, accTrain, i // TEST_ITER)
if i%SAVE_ITER == 0:
# Saving model
saver.save(sess, save_loc)
train_step.run(feed_dict={x: trainBatch,
targets: labelBatch,
keep_prob: 1 - dropout})
except KeyboardInterrupt:
pass
saver.save(sess, save_loc)
evalD, evalL = evalSet.next_batch(500)
print("Accuracy %g" % accuracy.eval(feed_dict={x: evalD,
targets: evalL,
keep_prob: 1.0}))
sess.close()