In [1]:
%matplotlib inline
import os
import sys
import pylab
import random
from random import randint, uniform
from skimage.util import crop
from skimage import transform
import numpy as np
import pandas as pd
import cPickle as pkl
from lasagne import layers
from bs4 import BeautifulSoup as bs
from lasagne import updates
import lasagne as nn
from theano.tensor.nnet import softmax
from scipy.misc import imread, imresize
from nolearn.lasagne import NeuralNet, BatchIterator, visualize
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report, accuracy_score

repo_location = '/workspace/.project/project/'
data_root = os.path.join(os.path.expanduser('~') + repo_location + 'datasets/')
script_root = os.path.join(os.path.expanduser('~') + repo_location + 'scripts/')
model_root = os.path.join(os.path.expanduser('~') + repo_location + 'models/')

In [2]:
# Load dataset
train_soup = bs(open(data_root + 'icdar03/train/char/char.xml').read(), 'lxml-xml')
test_soup = bs(open(data_root + 'icdar03/test/char/char.xml').read(), 'lxml-xml')

X_train = []
y_train = []
X_test = []
y_test = []

for image in train_soup('image'):
    try:
        img = imread(data_root + 'icdar03/train/char/' + image['file'])
        X_train.append(img)
        y_train.append(image['tag'])
    except:
        pass
    
for image in test_soup('image'):
    try:
        img = imread(data_root + 'icdar03/test/char/' + image['file'])
        X_test.append(img)
        y_test.append(image['tag'])
    except:
        pass

    
data_train = pd.DataFrame({'image' : X_train, 'label' : y_train})
data_test = pd.DataFrame({'image' : X_test, 'label' : y_test})

# drop extra labels
data_train = data_train.loc[~data_train['label'].isin([':', '-', '.', '\'', '!', '(', '"', ')', '&', '?', u'\xa3', u'\xc9', u'\xd1', u'\xe9', ','])]
data_test = data_test.loc[~data_test['label'].isin([':', '-', '.', '\'', '!', '(', '"', ')', '&', '?', u'\xa3', u'\xc9', u'\xd1', u'\xe9', ','])]

print 'Loaded icdar03'


Loaded icdar03

In [3]:
# Reshape images to 32x32 and convert to grayscale
data_train_x = np.zeros((data_train['image'].count(), 1, 32, 32))
data_train_y = data_train['label'].values
data_test_x = np.zeros((data_test['image'].count(), 1, 32, 32))
data_test_y = data_test['label'].values

for idx, img in enumerate(data_train['image']):
    img = imresize(img, (32, 32))
    if len(img.shape) == 3:
        data_train_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data_train_x[idx, ...] = img
        
for idx, img in enumerate(data_test['image']):
    img = imresize(img, (32, 32))
    if len(img.shape) == 3:
        data_test_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data_test_x[idx, ...] = img
        
data_train_x = data_train_x.astype('float32')
data_test_x = data_test_x.astype('float32')
print 'icdar03 reshaped and grayscaled'


icdar03 reshaped and grayscaled

In [4]:
# Normalize by MuSigma
data_train_x /= data_train_x.std(axis = None)
data_train_x -= data_train_x.mean()

data_test_x /= data_test_x.std(axis = None)
data_test_x -= data_test_x.mean()

In [5]:
print data_train_x.shape, data_train_y.shape, data_test_x.shape, data_test_y.shape


(6113, 1, 32, 32) (6113,) (5379, 1, 32, 32) (5379,)

In [6]:
class TransIterator(BatchIterator):
    def fast_warp(self, img, tf, output_shape, mode='nearest'):
        return transform._warps_cy._warp_fast(img, tf.params, output_shape=output_shape, mode=mode)
    
    def transform(self, Xb, yb):
        Xb, yb = super(TransIterator, self).transform(Xb, yb)
        
        Xb_aug = np.empty(shape = (Xb.shape[0], 1, 32, 32), dtype = 'float32')
        yb_aug = yb

        # random rotations betweein -5 and 5 degrees
        dorotate = randint(-5,5)

        # random translations
        trans_1 = randint(-3,3)
        trans_2 = randint(-3,3)

        # random zooms
        zoom = uniform(0.8, 1.2)

        # shearing
        shear_deg = uniform(-10, 10)

        # set the transform parameters for skimage.transform.warp
        # have to shift to center and then shift back after transformation otherwise
        # rotations will make image go out of frame
        center_shift   = np.array((32, 32)) / 2. - 0.5
        tform_center   = transform.SimilarityTransform(translation=-center_shift)
        tform_uncenter = transform.SimilarityTransform(translation=center_shift)

        tform_aug = transform.AffineTransform(rotation = np.deg2rad(dorotate),
                                              scale =(1/zoom, 1/zoom),
                                              shear = np.deg2rad(shear_deg),
                                              translation = (trans_1, trans_2))

        tform = tform_center + tform_aug + tform_uncenter
        
        for j in range(Xb.shape[0]):
            Xb_aug[j][0] = self.fast_warp(Xb[j][0], tform,
                                          output_shape = (32, 32))

        return Xb_aug, yb_aug

In [7]:
# setting nn 
net = NeuralNet(
    layers = [
        ('input', layers.InputLayer),
        ('conv1', layers.Conv2DLayer),
        ('conv2', layers.Conv2DLayer),
        ('pool3', layers.MaxPool2DLayer),
        ('dropout4', layers.DropoutLayer),
        ('conv5', layers.Conv2DLayer),
        ('conv6', layers.Conv2DLayer),
        ('pool7', layers.MaxPool2DLayer),
        ('dropout8', layers.DropoutLayer),
        ('conv9', layers.Conv2DLayer),
        ('conv10', layers.Conv2DLayer),
        ('dropout12', layers.DropoutLayer),
        ('hidden13', layers.DenseLayer),
        ('dropout14', layers.DropoutLayer),
        ('hidden15', layers.DenseLayer),
        ('dropout16', layers.DropoutLayer),
        ('output', layers.DenseLayer),
    ],

    input_shape = (None, 1, 32, 32),
    conv1_num_filters = 128, conv1_filter_size = (3, 3),
    conv2_num_filters = 128, conv2_filter_size = (3, 3),
    pool3_pool_size = (2, 2),
    dropout4_p = 0,
    conv5_num_filters = 256, conv5_filter_size = (3, 3),
    conv6_num_filters = 256, conv6_filter_size = (3, 3),
    pool7_pool_size = (2, 2),
    dropout8_p = 0.2,
    conv9_num_filters = 512, conv9_filter_size = (3, 3),
    conv10_num_filters = 512, conv10_filter_size = (3, 3),
    dropout12_p = 0.2,
    hidden13_num_units = 1024,
    dropout14_p = 0.5,
    hidden15_num_units = 1024,
    dropout16_p = 0.5,
    output_num_units = 62, output_nonlinearity = softmax,

    batch_iterator_train = TransIterator(batch_size = 2500),
    batch_iterator_test = BatchIterator(batch_size = 2500),

    update = updates.adam,

    use_label_encoder = True,
    regression = False,
    max_epochs = 300,
    verbose = 1,
)

In [9]:
# train nn
net.load_params_from(os.path.join(model_root, 'recog_for_icdar.pkl')); # or load a pretrained model!
net.fit(data_train_x, data_train_y);


Loaded parameters to layer 'conv1' (shape 128x1x3x3).
Loaded parameters to layer 'conv1' (shape 128).
Loaded parameters to layer 'conv2' (shape 128x128x3x3).
Loaded parameters to layer 'conv2' (shape 128).
Loaded parameters to layer 'conv5' (shape 256x128x3x3).
Loaded parameters to layer 'conv5' (shape 256).
Loaded parameters to layer 'conv6' (shape 256x256x3x3).
Loaded parameters to layer 'conv6' (shape 256).
Loaded parameters to layer 'conv9' (shape 512x256x3x3).
Loaded parameters to layer 'conv9' (shape 512).
Loaded parameters to layer 'conv10' (shape 512x512x3x3).
Loaded parameters to layer 'conv10' (shape 512).
Loaded parameters to layer 'hidden13' (shape 512x1024).
Loaded parameters to layer 'hidden13' (shape 1024).
Loaded parameters to layer 'hidden15' (shape 1024x1024).
Loaded parameters to layer 'hidden15' (shape 1024).
Loaded parameters to layer 'output' (shape 1024x62).
Loaded parameters to layer 'output' (shape 62).
# Neural Network with 6212542 learnable parameters

## Layer information

  #  name       size
---  ---------  ---------
  0  input      1x32x32
  1  conv1      128x30x30
  2  conv2      128x28x28
  3  pool3      128x14x14
  4  dropout4   128x14x14
  5  conv5      256x12x12
  6  conv6      256x10x10
  7  pool7      256x5x5
  8  dropout8   256x5x5
  9  conv9      512x3x3
 10  conv10     512x1x1
 11  dropout12  512x1x1
 12  hidden13   1024
 13  dropout14  1024
 14  hidden15   1024
 15  dropout16  1024
 16  output     62

  epoch    train loss    valid loss    train/val    valid acc  dur
-------  ------------  ------------  -----------  -----------  ------
      1       0.44351       1.03218      0.42968      0.76726  11.34s
      2       0.63719       1.11852      0.56967      0.74639  11.26s
      3       0.53203       1.02110      0.52103      0.77849  11.32s
      4       0.39569       1.06443      0.37174      0.78732  12.08s
      5       0.48686       1.00733      0.48331      0.79213  11.99s
      6       0.40215       0.95121      0.42278      0.80658  11.76s
      7       0.28871       0.96496      0.29920      0.79374  11.82s
      8       0.36396       0.99789      0.36473      0.78090  11.76s
      9       0.28748       0.97475      0.29493      0.78732  11.73s
     10       0.23234       0.96681      0.24032      0.79695  11.72s
     11       0.26309       0.97020      0.27117      0.79695  11.76s
     12       0.56876       1.03779      0.54804      0.77929  11.71s
     13       0.43985       1.00619      0.43714      0.78973  11.67s
     14       0.46904       0.99340      0.47215      0.77929  11.67s
     15       0.56902       0.97598      0.58303      0.78170  11.70s
     16       0.36982       1.00153      0.36925      0.78812  11.67s
     17       0.32597       1.01795      0.32022      0.78973  11.67s
     18       0.27689       0.97637      0.28359      0.79454  11.66s
     19       0.21021       0.98973      0.21239      0.79374  11.65s
/home/cuda/anaconda2/lib/python2.7/site-packages/sklearn/cross_validation.py:417: Warning: The least populated class in y has only 1 members, which is too few. The minimum number of labels for any class cannot be less than n_folds=5.
  % (min_labels, self.n_folds)), Warning)

In [12]:
pred = net.predict(data_test_x)
print accuracy_score(data_test_y, pred)


0.78936605317

In [13]:
print classification_report(data_test_y, pred)


             precision    recall  f1-score   support

          0       0.42      0.11      0.17        46
          1       0.75      0.65      0.70        46
          2       0.92      0.96      0.94        49
          3       0.86      0.71      0.77        17
          4       1.00      0.29      0.45        24
          5       0.56      0.48      0.52        29
          6       1.00      0.60      0.75        15
          7       0.60      0.30      0.40        10
          8       0.43      0.50      0.46         6
          9       1.00      0.27      0.42        15
          A       0.93      0.87      0.90       223
          B       0.61      0.83      0.70        47
          C       0.86      0.82      0.84       153
          D       0.85      0.76      0.80        74
          E       0.82      0.93      0.87       322
          F       0.83      0.89      0.86        76
          G       0.87      0.92      0.89        63
          H       0.95      0.85      0.90        97
          I       0.59      0.39      0.47       163
          J       0.45      0.38      0.42        13
          K       0.97      0.61      0.75        46
          L       0.87      0.82      0.85       131
          M       0.69      0.87      0.77        89
          N       0.98      0.85      0.91       153
          O       0.65      0.59      0.62       187
          P       0.88      0.86      0.87        91
          Q       0.00      0.00      0.00         4
          R       0.88      0.86      0.87       205
          S       0.82      0.85      0.83       229
          T       0.83      0.84      0.84       205
          U       0.84      0.75      0.79        92
          V       0.94      0.65      0.77        26
          W       0.86      0.79      0.83        39
          X       0.93      0.68      0.79        19
          Y       0.79      0.90      0.84        42
          Z       0.00      0.00      0.00         7
          a       0.83      0.85      0.84       171
          b       0.53      0.83      0.65        24
          c       0.78      0.69      0.73       100
          d       0.84      0.80      0.82        54
          e       0.90      0.89      0.90       331
          f       0.92      0.77      0.84        47
          g       0.63      0.76      0.69        38
          h       0.90      0.77      0.83        86
          i       0.76      0.84      0.80       182
          j       0.00      0.00      0.00         4
          k       0.58      0.85      0.69        33
          l       0.31      0.42      0.36       105
          m       0.81      0.82      0.82        51
          n       0.86      0.90      0.88       162
          o       0.59      0.76      0.66       194
          p       0.74      0.75      0.74        56
          q       0.00      0.00      0.00         3
          r       0.89      0.82      0.86       177
          s       0.79      0.86      0.82       154
          t       0.78      0.89      0.83       173
          u       0.64      0.81      0.71        67
          v       0.53      0.71      0.61        24
          w       0.68      0.79      0.73        19
          x       0.48      0.92      0.63        12
          y       0.86      0.75      0.80        57
          z       0.00      0.00      0.00         2

avg / total       0.80      0.79      0.79      5379


In [ ]:
net.save_params_to(os.path.join(model_root, 'recog_for_icdar.pkl'))

In [ ]:


In [ ]: