In [2]:
# import modules
%matplotlib inline

import random
import pylab
import pandas as pd
import numpy as np
import cPickle as pkl
from lasagne import layers, updates
from scipy.misc import imread, imresize
from theano.tensor.nnet import softmax
from sklearn.feature_extraction import image
from nolearn.lasagne import NeuralNet, BatchIterator
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report, accuracy_score

script_root = '/home/faizy/workspace/project/project/scripts/'
data_root = '/home/faizy/workspace/project/project/datasets/'
model_root = '/home/faizy/workspace/project/project/models/'

chars74k_root = 'English/'


Using gpu device 0: GeForce GT 740M (CNMeM is disabled)

In [ ]:
def shiftup(dataset):
    shifted_dataset = np.zeros(dataset.shape)
    # loop for images
    for i in range(dataset.shape[0]):
        # loop for shift up
        for j in range(16):
            shifted_dataset[i, 0, j:j+1, :] = dataset[i, 0, 16 + j : 16 + j + 1, :]
        for j in range(16, 32):
            shifted_dataset[i, 0, j:j+1, :] = shifted_dataset[i, :, 15, :]
    return shifted_dataset

In [ ]:
def shiftdown(dataset):
    shifted_dataset = np.zeros(dataset.shape)
    # loop for images
    for i in range(dataset.shape[0]):
        # loop for shift up
        for j in range(16, 32):
            shifted_dataset[i, 0, j:j+1, :] = dataset[i, 0, j - 16 : j + 1 - 16, :]
        for j in range(16):
            shifted_dataset[i, 0, j:j+1, :] = shifted_dataset[i, :, 16, :]
    return shifted_dataset

In [ ]:
def shiftleft(dataset):
    shifted_dataset = np.zeros(dataset.shape)
    # loop for images
    for i in range(dataset.shape[0]):
        for j in range(16):
            shifted_dataset[i, 0, :, j:j+1] = dataset[i, 0, :, 16 + j: 16 + j + 1]
        for j in range(16, 32):
            shifted_dataset[i, :, :, j] = shifted_dataset[i, :, :, 15]
    
    return shifted_dataset

In [ ]:
def shiftright(dataset):
    shifted_dataset = np.zeros(dataset.shape)
    # loop for images
    for i in range(dataset.shape[0]):
        for j in range(16, 32):
            shifted_dataset[i, 0, :, j : j + 1] = dataset[i, 0, :, j - 16 : j + 1 - 16]
        for j in range(16):
            shifted_dataset[i, 0, :, j] = dataset[i, 0, :, 15]
    
    return shifted_dataset

In [ ]:
# load train_test set
# chars74k
data = pd.read_csv(script_root + 'LISTFILE.txt', sep = ' ', header = None)

data_x = np.zeros((data.shape[0], 1, 32, 32))
data_y = np.ones((data.shape[0], )).astype('int32')

for idx, path in enumerate(data[0]):
    img = imread(data_root + chars74k_root + path)
    img = imresize(img, (32, 32))
    if len(img.shape) == 3:
        data_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data_x[idx, ...] = img
        
data_x = data_x.astype('float32')

# inefficient but trial
train1_x = data_x
train1_y = data_y

In [ ]:
# Augmented data
train3_x = shiftup(train1_x)
train4_x = shiftdown(train1_x)
train5_x = shiftleft(train1_x)
train6_x = shiftright(train1_x)

train3_y = np.zeros((train3_x.shape[0], )).astype('int')
train4_y = np.zeros((train4_x.shape[0], )).astype('int')
train5_y = np.zeros((train5_x.shape[0], )).astype('int')
train6_y = np.zeros((train6_x.shape[0], )).astype('int')

In [ ]:
print train1_x.shape, train3_x.shape, train4_x.shape, train5_x.shape, train6_x.shape

In [ ]:
# preprocess
# chars74k
train1_x /= train1_x.std(axis = None)
train1_x -= train1_x.mean()

# augmented data

train3_x /= train3_x.std(axis = None)
train3_x -= train3_x.mean()

train4_x /= train4_x.std(axis = None)
train4_x -= train4_x.mean()

train5_x /= train5_x.std(axis = None)
train5_x -= train5_x.mean()

train6_x /= train6_x.std(axis = None)
train6_x -= train6_x.mean()

In [ ]:
# finally
#train_x = np.vstack((train1_x, train2_x))
train_x = np.vstack((train1_x, train3_x))
train_x = np.vstack((train_x, train4_x))
train_x = np.vstack((train_x, train5_x))
train_x = np.vstack((train_x, train6_x))

train_y = np.concatenate([train1_y, train3_y, train4_y, train5_y, train6_y])

In [ ]:
# check again
train_x = train_x.astype('float32')
train_y = train_y.astype('int32')

In [ ]:
# setting nn 
net = NeuralNet(
    layers = [
        ('input', layers.InputLayer),
        ('conv1', layers.Conv2DLayer),
        ('pool1', layers.MaxPool2DLayer),
        ('dropout1', layers.DropoutLayer),
        ('conv2', layers.Conv2DLayer),
        ('pool2', layers.MaxPool2DLayer),
        ('dropout2', layers.DropoutLayer),
        ('conv3', layers.Conv2DLayer),
        ('hidden4', layers.DenseLayer),
        ('output', layers.DenseLayer),
    ],

    input_shape = (None, 1, 32, 32),
    conv1_num_filters = 32, conv1_filter_size = (5, 5),
    pool1_pool_size = (2, 2),
    dropout1_p = 0.2,
    conv2_num_filters = 64, conv2_filter_size = (5, 5),
    pool2_pool_size = (2, 2),
    dropout2_p = 0.2,
    conv3_num_filters = 128, conv3_filter_size = (5, 5),
    hidden4_num_units = 128,
    output_num_units = 2, output_nonlinearity = softmax,

    batch_iterator_train = BatchIterator(batch_size = 1000),
    batch_iterator_test = BatchIterator(batch_size = 1000),

    update = updates.adam,

    use_label_encoder = True,
    regression = False,
    max_epochs = 50,
    verbose = 1,
)

In [ ]:
# train and test nn
net.fit(train_x, train_y);

In [ ]:
f = open(model_root + 'detector_2.pkl', 'wb')
pkl.dump(net, f)
f.close()

In [168]:
f = open(model_root + 'detection_tester.pkl', 'rb')
net = pkl.load(f)
f.close()

In [122]:
img = imread('/home/faizy/workspace/project/project/datasets/svt/svt1/img/03_12.jpg')[402:480, 640:1140, :]#imread('/home/faizy/workspace/project/project/datasets/svt/svt1/img/00_12.jpg')[292:451, 176:849, :]#[y:y+height, x:x+width]

In [139]:
patches = image.extract_patches(img, (78, 70, 3), extraction_step = 5)
print patches.shape


(1, 87, 1, 78, 70, 3)

In [140]:
new_lst = []
for i in range(patches.shape[0]):
    for j in range(patches.shape[1]):
        new_lst.append(imresize(patches[i, j, 0, :, :, :], (32, 32)))
        
print len(new_lst)


87

In [141]:
new_list = np.stack(new_lst)
new_list = new_list.dot([0.299, 0.587, 0.144])
tester = new_list.reshape(patches.shape[0]*patches.shape[1], 1, 32, 32)

In [142]:
tester /= tester.std(axis = None)
tester -= tester.mean()
tester = tester.astype('float32')

In [143]:
print tester.shape


(87, 1, 32, 32)

In [169]:
preder = net.predict_proba(tester)

In [170]:
heatmap = preder[:, 1].reshape((patches.shape[0], patches.shape[1]))
print heatmap.shape


(1, 87)

In [171]:
pylab.pcolor(heatmap[::-1])
pylab.axis('off')
pylab.show()
pylab.imshow(img)
pylab.show()



In [172]:
a = np.reshape(heatmap, patches.shape[1]*patches.shape[0])
print a.shape


(87,)

In [173]:
pylab.plot(a)


Out[173]:
[<matplotlib.lines.Line2D at 0x7f8382c15110>]

In [149]:
num = np.argmax(a)

In [150]:
pylab.imshow(img[:, 43*5:43*5+90, :])
pylab.show()



In [151]:
from scipy.ndimage.filters import maximum_filter
peakind = np.nonzero(maximum_filter (a, size=(patches.shape[1]/5)*0.75) == a)[0]

In [152]:
peakind# = np.array([3, 20, 36])


Out[152]:
array([ 5, 18, 30, 43, 57, 71, 84])

In [153]:
for i in peakind:
    pylab.imshow(img[:, i*5:i*5+90, :])
    pylab.show()



In [154]:
for i in peakind:
    pylab.imshow(tester[i, 0, :, :])
    pylab.gray()
    pylab.show()



In [161]:
word = np.zeros((7, 1, 32, 32))
for idx, item in enumerate(peakind):
    word[idx, ...] = tester[item, 0, :, :]
    
word = word.astype('float32')

In [162]:
# Load model
f = open(model_root + 'recognizer.pkl', 'rb')
netter = pkl.load(f)
f.close()

In [163]:
predict = netter.predict(word)

In [164]:
# Define word recognition functions
import re, collections

def words(text): return re.findall('[a-z]+', text.lower()) 

def train(features):
    model = collections.defaultdict(lambda: 1)
    for f in features:
        model[f] += 1
    return model

NWORDS = train(words(file(data_root + 'big.txt').read()))

alphabet = 'abcdefghijklmnopqrstuvwxyz'

def edits1(word):
   splits     = [(word[:i], word[i:]) for i in range(len(word) + 1)]
   deletes    = [a + b[1:] for a, b in splits if b]
   transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b)>1]
   replaces   = [a + c + b[1:] for a, b in splits for c in alphabet if b]
   inserts    = [a + c + b     for a, b in splits for c in alphabet]
   return set(deletes + transposes + replaces + inserts)

def known_edits2(word):
    return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in NWORDS)

def known(words): return set(w for w in words if w in NWORDS)

def correct(word):
    candidates = known([word]) or known(edits1(word)) or known_edits2(word) or [word]
    return sorted(candidates,  key=NWORDS.get, reverse = True)

def classer(arrayer):
    classer_array = []
    for i in range(len(arrayer)):
        if (0 <= arrayer[i] < 10):
            classer_array.append(arrayer[i])
        elif (10 <= arrayer[i] < 36) :
            classer_array.append(alphabet[arrayer[i] - 10].upper())
        elif (36 <= arrayer[i] < 62):
            classer_array.append(alphabet[arrayer[i] - 36])
        else : 
            print 'Is the array correct!?'
    return classer_array

In [165]:
real_pred = classer(predict)
real_pred = map(str, real_pred)
letter_stream = ''.join(real_pred).lower()

In [166]:
print letter_stream


trident

In [167]:
print 'Probable words are: ', ', '.join(correct(letter_stream))


Probable words are:  trident

In [112]:
pylab.plot(predict[2])


Out[112]:
[<matplotlib.lines.Line2D at 0x7f837ccabb50>]

In [ ]: