TEXT DETECTION


In [2]:
# import modules
%matplotlib inline
import random
import pylab
import pandas as pd
import numpy as np
import cPickle as pkl
from PIL import Image
import matplotlib.pyplot as plt
from lasagne import layers
from lasagne.updates import nesterov_momentum
from theano.tensor.nnet import softmax
from nolearn.lasagne import NeuralNet
from nolearn.lasagne import BatchIterator


Using gpu device 0: GeForce GT 740M (CNMeM is disabled)

reading SVT


In [5]:
from sklearn.feature_extraction import image
patches = image.extract_patches(img, (20, 20, 3), extraction_step = (20, 20, 3))
patches.shape


Out[5]:
(40, 64, 1, 20, 20, 3)

In [ ]:
#i = random.randrange(0, 751)
for i in range(patches.shape[0]):
    for j in range(patches.shape[1]):
        pylab.imshow(patches[i, j, 0, :, :, :])
        pylab.show()

In [7]:
from scipy.misc import imresize
new_lst = []
for i in range(patches.shape[0]):
    for j in range(patches.shape[1]):
        new_lst.append(imresize(patches[i, j, 0, :, :, :], (32, 32)))
        
print len(new_lst)


2560

In [8]:
new_list = np.stack(new_lst)
new_list = new_list.dot([0.299, 0.587, 0.144])
print new_list.shape
tester = new_list.reshape(patches.shape[0]*patches.shape[1], 1, 32, 32)


(2560, 32, 32)

In [9]:
tester /= tester.std(axis = None)
tester -= tester.mean()
tester = tester.astype('float32')

In [4]:
from scipy.misc import imread as ims
img = ims('/home/faizy/workspace/project/project/datasets/svt/svt1/img/01_15.jpg')
img.shape


Out[4]:
(800, 1280, 3)

In [15]:
pylab.imshow(img)
pylab.show()



In [43]:
# visualize text
# S -> 316:375, 447:500
# U -> 326:385, 490:550
pylab.imshow(img[400:450, 490:550, :])
pylab.show()



In [44]:
tester = imresize(img[400:450, 490:550, :], (32, 32))
tester = tester.dot([0.299, 0.587, 0.144])
tester = tester.reshape(1, 1, 32, 32)

In [45]:
tester /= tester.std(axis = None)
tester -= tester.mean()
tester = tester.astype('float32')

In [12]:
#TODO implement a chars74k trained network
preder = netter.predict_proba(tester)

reading chars74k


In [29]:
root = '/home/faizy/workspace/project/project/scripts'
data = pd.read_csv(root + '/LISTFILE.txt', sep = ' ', header = None)

In [70]:
print data.head(); print data.shape[0]


                                            0   1
0  Img/GoodImg/Bmp/Sample057/img057-00013.png  56
1  Img/GoodImg/Bmp/Sample057/img057-00028.png  56
2  Img/GoodImg/Bmp/Sample057/img057-00021.png  56
3  Img/GoodImg/Bmp/Sample057/img057-00022.png  56
4  Img/GoodImg/Bmp/Sample057/img057-00030.png  56
7705

In [35]:
from sklearn.cross_validation import train_test_split
train_x, test_x, train_y, test_y = train_test_split(data[0], data[1])

In [44]:
print train_x.head();print test_y.head(); print train_x.count()


7463    Img/GoodImg/Bmp/Sample041/img041-00142.png
864     Img/GoodImg/Bmp/Sample051/img051-00078.png
4922    Img/GoodImg/Bmp/Sample005/img005-00017.png
4426    Img/GoodImg/Bmp/Sample056/img056-00052.png
6970    Img/GoodImg/Bmp/Sample029/img029-00267.png
Name: 0, dtype: object
2020    10
3124    32
1989    10
3796    38
3284    29
Name: 1, dtype: int64
5778

In [69]:
root = '/home/faizy/workspace/project/project/datasets/English/'
from scipy.misc import imread as ims
i = random.randrange(0, train_x.count())
img = ims(root + train_x.iloc[i])
pylab.imshow(img)
pylab.show()


Actual code


In [2]:
# extra functions
def unpickle(filer):
    f = open(filer, 'rb')
    d_dict = pkl.load(f)
    f.close()
    return d_dict

from sklearn.cross_validation import train_test_split

In [3]:
# load train_test set
# cifar
train_dict = unpickle('/home/faizy/workspace/cifar/cifar-10-batches-py/data_batch_1')
train2_images = train_dict['data'].astype('float32')
train2_y = np.zeros((10000, )).astype('int')
test_dict = unpickle('/home/faizy/workspace/cifar/cifar-10-batches-py/test_batch')
test2_images = test_dict['data'].astype('float32')

# chars74k
data = pd.read_csv('/home/faizy/workspace/project/project/scripts/LISTFILE.txt', sep = ' ', header = None)
root = '/home/faizy/workspace/project/project/datasets/English/'
data_x = np.zeros((data.shape[0], 1, 32, 32))
data_y = np.ones((data.shape[0], )).astype('int32')
from scipy.misc import imread, imresize
for idx, path in enumerate(data[0]):
    img = imread(root + path)
    img = imresize(img, (32, 32))
    if len(img.shape) == 3:
        data_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data_x[idx, ...] = img
        
data_x = data_x.astype('float32')
train1_x, test1_x, train1_y, test1_y = train_test_split(data_x, data_y, test_size = 0.2)

In [73]:
train1_x.shape, test1_x.shape


Out[73]:
((6164, 1, 32, 32), (1541, 1, 32, 32))

In [4]:
# preprocess
# cifar
train2_images /= train2_images.std(axis = None)
train2_images -= train2_images.mean()

test2_images /= test2_images.std(axis = None)
test2_images -= test2_images.mean()

# chars74k
train1_x /= train1_x.std(axis = None)
train1_x -= train1_x.mean()

test1_x /= test1_x.std(axis = None)
test1_x -= test1_x.mean()

In [5]:
# reshape dataset
# cifar
# grayscaling and cropping to size
train2_x_rgb = train2_images.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)

train2_x = np.zeros((10000, 1, 32, 32))
for i in range(10000):
    train2_x[i, :, :, :] = np.dot(train2_x_rgb[i, :, :, :], [0.299, 0.587, 0.144])

test2_x_rgb = test2_images.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)

test2_x = np.zeros((10000, 1, 32, 32))
for i in range(10000):
    test2_x[i, :, :, :] = np.dot(test2_x_rgb[i, :, :, :], [0.299, 0.587, 0.144])

In [6]:
# finally
train_x = np.vstack((train1_x, train2_x))

train_y = np.concatenate([train1_y, train2_y])

test_x = np.vstack((test1_x, test2_x))

test_y = np.concatenate([test1_y, np.zeros((10000,))])

#TODO shuffle train-test

In [7]:
# check again
train_x = train_x.astype('float32')
train_y = train_y.astype('int32')
test_x = test_x.astype('float32')
test_y = test_y.astype('int32')

In [8]:
# setting nn 
net = NeuralNet(
    layers = [
        ('input', layers.InputLayer),
        ('conv1', layers.Conv2DLayer),
        ('pool1', layers.MaxPool2DLayer),
        ('dropout1', layers.DropoutLayer),
        ('conv2', layers.Conv2DLayer),
        ('pool2', layers.MaxPool2DLayer),
        ('dropout2', layers.DropoutLayer),
        ('conv3', layers.Conv2DLayer),
        ('hidden4', layers.DenseLayer),
        ('output', layers.DenseLayer),
    ],

    input_shape = (None, 1, 32, 32),
    conv1_num_filters = 32, conv1_filter_size = (5, 5),
    pool1_pool_size = (2, 2),
    dropout1_p = 0.2,
    conv2_num_filters = 64, conv2_filter_size = (5, 5),
    pool2_pool_size = (2, 2),
    dropout2_p = 0.2,
    conv3_num_filters = 128, conv3_filter_size = (5, 5),
    hidden4_num_units = 128,
    output_num_units = 2, output_nonlinearity = softmax,

    batch_iterator_train = BatchIterator(batch_size = 1000),
    batch_iterator_test = BatchIterator(batch_size = 1000),

    update=nesterov_momentum,
    update_learning_rate = 0.003,
    update_momentum = 0.9,

    use_label_encoder = True,
    regression = False,
    max_epochs = 50,
    verbose = 1,
)

In [9]:
# train and test nn
net.fit(train_x, train_y)
pred = net.predict(test_x)


# Neural Network with 273794 learnable parameters

## Layer information

  #  name      size
---  --------  --------
  0  input     1x32x32
  1  conv1     32x28x28
  2  pool1     32x14x14
  3  dropout1  32x14x14
  4  conv2     64x10x10
  5  pool2     64x5x5
  6  dropout2  64x5x5
  7  conv3     128x1x1
  8  hidden4   128
  9  output    2

  epoch    train loss    valid loss    train/val    valid acc  dur
-------  ------------  ------------  -----------  -----------  ------
      1       0.79128       0.66980      1.18136      0.69150  13.84s
      2       0.66188       0.63928      1.03535      0.69175  12.59s
      3       0.66947       0.62711      1.06755      0.69175  12.38s
      4       0.67260       0.62087      1.08332      0.69175  12.35s
      5       0.67158       0.61619      1.08990      0.69175  12.34s
      6       0.66845       0.61134      1.09342      0.69175  12.35s
      7       0.66461       0.60546      1.09769      0.69175  12.34s
      8       0.66058       0.59852      1.10370      0.69175  12.35s
      9       0.65861       0.59185      1.11279      0.69175  12.34s
     10       0.65436       0.58405      1.12038      0.69175  12.34s
     11       0.64908       0.57484      1.12915      0.69175  12.35s
     12       0.64378       0.56449      1.14047      0.69175  12.35s
     13       0.63739       0.55351      1.15154      0.69175  12.35s
     14       0.62967       0.54147      1.16287      0.69250  12.35s
     15       0.62278       0.53120      1.17241      0.69443  12.54s
     16       0.61650       0.52018      1.18516      0.71210  12.44s
     17       0.60835       0.51410      1.18334      0.70718  12.40s
     18       0.60390       0.50190      1.20322      0.73231  12.59s
     19       0.58709       0.49470      1.18676      0.72378  12.75s
     20       0.58557       0.48702      1.20237      0.74659  12.40s
     21       0.57475       0.48619      1.18215      0.74353  12.40s
     22       0.57890       0.47500      1.21875      0.76598  12.39s
     23       0.55838       0.46825      1.19248      0.78196  12.43s
     24       0.55638       0.45457      1.22395      0.79180  12.39s
     25       0.53860       0.44729      1.20414      0.80742  12.40s
     26       0.53925       0.43928      1.22756      0.80648  12.40s
     27       0.52760       0.43491      1.21313      0.82277  12.40s
     28       0.52686       0.43028      1.22445      0.81108  12.40s
     29       0.51702       0.41758      1.23812      0.83809  12.39s
     30       0.51035       0.40384      1.26374      0.82966  12.40s
     31       0.49078       0.38726      1.26730      0.85734  12.34s
     32       0.48180       0.38236      1.26006      0.83741  12.34s
     33       0.47182       0.36594      1.28935      0.86349  12.34s
     34       0.46649       0.36890      1.26455      0.84191  12.34s
     35       0.45170       0.35065      1.28818      0.86906  12.35s
     36       0.45360       0.35600      1.27416      0.84791  12.34s
     37       0.43483       0.33339      1.30428      0.87359  12.34s
     38       0.42848       0.34254      1.25090      0.85341  12.34s
     39       0.42453       0.31706      1.33896      0.87984  12.34s
     40       0.41166       0.32271      1.27564      0.86616  13.01s
     41       0.40397       0.29805      1.35536      0.88395  12.54s
     42       0.39076       0.30283      1.29036      0.87405  12.54s
     43       0.38199       0.28208      1.35420      0.88945  12.54s
     44       0.36812       0.28942      1.27192      0.87755  12.54s
     45       0.36692       0.26782      1.37001      0.89509  12.54s
     46       0.35441       0.28127      1.26006      0.87880  12.54s
     47       0.35285       0.26173      1.34815      0.89742  12.55s
     48       0.35122       0.27441      1.27993      0.88312  12.54s
     49       0.33953       0.24961      1.36020      0.90581  12.53s
     50       0.33398       0.26489      1.26083      0.88645  13.13s

In [12]:
# Saving model
f = open('/home/faizy/workspace/project/project/models/detector.pkl', 'wb')
model = pkl.dump(net, f)
f.close()

In [10]:
# Load model
f = open('/home/faizy/workspace/project/project/models/detector.pkl', 'rb')
netter = pkl.load(f)
f.close()

In [85]:
# visualize output
%matplotlib inline
i = random.randrange(0, test1_x.shape[0])
img = test_x[i, :]
img = img.reshape(32, 32)
pylab.imshow(img)
pylab.gray()
pylab.axis('off')
print '--------------'
if pred[i] == 1:
    print "It is a character"
else:
    print "It is NOT a character"


--------------
It is a character

In [93]:
# visualize output
%matplotlib inline
i = random.randrange(test1_x.shape[0], test1_x.shape[0] + test2_x.shape[0])
img = test_x[i, :]
img = img.reshape(32, 32)
pylab.imshow(img)
pylab.gray()
pylab.axis('off')
print '--------------'
if pred[i] == 1:
    print "It is a character"
else:
    print "It is NOT a character"


--------------
It is NOT a character

In [10]:
from sklearn.metrics import accuracy_score, classification_report

In [11]:
print classification_report(test_y, pred)


             precision    recall  f1-score   support

          0       0.96      0.97      0.96     10000
          1       0.79      0.72      0.75      1541

avg / total       0.93      0.94      0.94     11541


In [ ]:
test_x.shape

In [ ]:
test1_x.shape

In [158]:
preder.shape


Out[158]:
(36995,)

In [ ]:
for i, j in enumerate(preder):
    #if j == 1:
        pylab.imshow(tester[i, :, :].reshape(32, 32))
        pylab.show()

In [137]:
tester[i, :, :].reshape(32, 32).shape


Out[137]:
(32, 32)

In [19]:
heatmap = preder[:, 0].reshape((patches.shape[0], patches.shape[1]))
print heatmap.shape


(40, 64)

In [20]:
pylab.pcolor(heatmap[::-1])
pylab.axis('off')
pylab.show()
pylab.imshow(img)
pylab.axis('off')
pylab.show()



In [69]:
import pylab as pl
data = pl.random((25,25)) # 25x25 matrix of values
pl.pcolor(data)
pl.show()