In [1]:
%matplotlib inline
import os
import sys
import pylab
import random
from random import randint, uniform
from skimage.util import crop
from skimage import transform
import numpy as np
import pandas as pd
import cPickle as pkl
from lasagne import layers
from bs4 import BeautifulSoup as bs
from lasagne import updates
import lasagne as nn
from theano.tensor.nnet import softmax
from scipy.misc import imread, imresize
from nolearn.lasagne import NeuralNet, BatchIterator
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report, accuracy_score

repo_location = '/workspace/.project/project/'
data_root = os.path.join(os.path.expanduser('~') + repo_location + 'datasets/')
script_root = os.path.join(os.path.expanduser('~') + repo_location + 'scripts/')
model_root = os.path.join(os.path.expanduser('~') + repo_location + 'models/')


Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled)

In [2]:
# Load dataset
train_soup = bs(open(data_root + 'icdar03/train/char/char.xml').read(), 'lxml-xml')
test_soup = bs(open(data_root + 'icdar03/test/char/char.xml').read(), 'lxml-xml')

X_train = []
y_train = []
X_test = []
y_test = []

for image in train_soup('image'):
    try:
        img = imread(data_root + 'icdar03/train/char/' + image['file'])
        X_train.append(img)
        y_train.append(image['tag'])
    except:
        pass
    
for image in test_soup('image'):
    try:
        img = imread(data_root + 'icdar03/test/char/' + image['file'])
        X_test.append(img)
        y_test.append(image['tag'])
    except:
        pass

    
data_train = pd.DataFrame({'image' : X_train, 'label' : y_train})
data_test = pd.DataFrame({'image' : X_test, 'label' : y_test})

print 'Loaded icdar03'


Loaded icdar03

In [3]:
# Reshape images to 64x64 and convert to grayscale
data_train_x = np.zeros((data_train['image'].count(), 1, 64, 64))
data_train_y = data_train['label'].values
data_test_x = np.zeros((data_test['image'].count(), 1, 64, 64))
data_test_y = data_test['label'].values

for idx, img in enumerate(data_train['image']):
    img = imresize(img, (64, 64))
    if len(img.shape) == 3:
        data_train_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data_train_x[idx, ...] = img
        
for idx, img in enumerate(data_test['image']):
    img = imresize(img, (64, 64))
    if len(img.shape) == 3:
        data_test_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data_test_x[idx, ...] = img
        
data_train_x = data_train_x.astype('float32')
data_test_x = data_test_x.astype('float32')
print 'icdar03 reshaped and grayscaled'


icdar03 reshaped and grayscaled

In [4]:
# Normalize by MuSigma
data_train_x /= data_train_x.std(axis = None)
data_train_x -= data_train_x.mean()

data_test_x /= data_test_x.std(axis = None)
data_test_x -= data_test_x.mean()

In [5]:
print data_train_x.shape, data_train_y.shape, data_test_x.shape, data_test_y.shape


(6185, 1, 64, 64) (6185,) (5430, 1, 64, 64) (5430,)

In [6]:
class TransIterator(BatchIterator):
    def fast_warp(self, img, tf, output_shape, mode='nearest'):
        return transform._warps_cy._warp_fast(img, tf.params, output_shape=output_shape, mode=mode)
    
    def transform(self, Xb, yb):
        Xb, yb = super(TransIterator, self).transform(Xb, yb)
        
        Xb_aug = np.empty(shape = (Xb.shape[0], 1, 64, 64), dtype = 'float32')
        yb_aug = yb

        # random rotations betweein -5 and 5 degrees
        dorotate = randint(-5,5)

        # random translations
        trans_1 = randint(-10,10)
        trans_2 = randint(-10,10)

        # random zooms
        zoom = uniform(0.8, 1.2)

        # shearing
        shear_deg = uniform(-10, 10)

        # set the transform parameters for skimage.transform.warp
        # have to shift to center and then shift back after transformation otherwise
        # rotations will make image go out of frame
        center_shift   = np.array((64, 64)) / 2. - 0.5
        tform_center   = transform.SimilarityTransform(translation=-center_shift)
        tform_uncenter = transform.SimilarityTransform(translation=center_shift)

        tform_aug = transform.AffineTransform(rotation = np.deg2rad(dorotate),
                                              scale =(1/zoom, 1/zoom),
                                              shear = np.deg2rad(shear_deg),
                                              translation = (trans_1, trans_2))

        tform = tform_center + tform_aug + tform_uncenter
        
        for j in range(Xb.shape[0]):
            Xb_aug[j][0] = self.fast_warp(Xb[j][0], tform,
                                          output_shape = (64, 64))

        return Xb_aug, yb_aug

In [11]:
# setting nn 
net = NeuralNet(
    layers = [
        ('input', layers.InputLayer),
        ('conv1', layers.Conv2DLayer),
        ('conv2', layers.Conv2DLayer),
        ('pool3', layers.MaxPool2DLayer),
        ('dropout4', layers.DropoutLayer),
        ('conv5', layers.Conv2DLayer),
        ('conv6', layers.Conv2DLayer),
        ('pool7', layers.MaxPool2DLayer),
        ('dropout8', layers.DropoutLayer),
        ('hidden13', layers.DenseLayer),
        ('dropout14', layers.DropoutLayer),
        ('hidden15', layers.DenseLayer),
        ('dropout16', layers.DropoutLayer),
        ('output', layers.DenseLayer),
    ],

    input_shape = (None, 1, 64, 64),
    conv1_num_filters = 128, conv1_filter_size = (3, 3),
    conv2_num_filters = 128, conv2_filter_size = (3, 3),
    pool3_pool_size = (2, 2),
    dropout4_p = 0.2,
    conv5_num_filters = 256, conv5_filter_size = (3, 3),
    conv6_num_filters = 256, conv6_filter_size = (3, 3),
    pool7_pool_size = (2, 2),
    dropout8_p = 0.2,
    hidden13_num_units = 1024,
    dropout14_p = 0.5,
    hidden15_num_units = 1024,
    dropout16_p = 0.5,
    output_num_units = 75, output_nonlinearity = softmax,

    batch_iterator_train = TransIterator(batch_size = 256),
    batch_iterator_test = BatchIterator(batch_size = 256),

    update = updates.adam,

    use_label_encoder = True,
    regression = False,
    max_epochs = 300,
    verbose = 1,
)

In [12]:
# train nn
#net.load_params_from(os.path.join(model_root, 'recog_for_icdar.pkl')); # or load a pretrained model!
net.fit(data_train_x, data_train_y);


# Neural Network with 46463947 learnable parameters

## Layer information

  #  name       size
---  ---------  ---------
  0  input      1x64x64
  1  conv1      128x62x62
  2  conv2      128x60x60
  3  pool3      128x30x30
  4  dropout4   128x30x30
  5  conv5      256x28x28
  6  conv6      256x26x26
  7  pool7      256x13x13
  8  dropout8   256x13x13
  9  hidden13   1024
 10  dropout14  1024
 11  hidden15   1024
 12  dropout16  1024
 13  output     75

  epoch    train loss    valid loss    train/val    valid acc  dur
-------  ------------  ------------  -----------  -----------  ------
      1       4.07641       3.84068      1.06138      0.05156  51.14s
      2       3.80502       3.77123      1.00896      0.06221  52.71s
      3       3.72778       3.76573      0.98992      0.06651  52.60s
      4       3.60068       3.55215      1.01366      0.10490  52.67s
      5       3.37870       3.20961      1.05268      0.25244  52.75s
      6       3.31460       3.11665      1.06351      0.29743  52.54s
      7       3.13720       3.45049      0.90920      0.26071  52.64s
      8       3.12747       2.93692      1.06488      0.34820  52.23s
      9       2.89936       2.76368      1.04909      0.37906  52.45s
     10       2.80602       2.76480      1.01491      0.39823  52.71s
     11       2.73201       2.55306      1.07009      0.43058  52.39s
     12       2.49198       2.20826      1.12848      0.52925  52.34s
     13       2.45399       2.17939      1.12600      0.53540  52.31s
     14       2.29532       2.18743      1.04932      0.50444  51.97s
     15       2.16068       1.90916      1.13175      0.58969  52.27s
     16       2.05779       1.81029      1.13672      0.60912  52.37s
     17       2.00595       1.76699      1.13524      0.58976  52.28s
     18       1.89767       1.65010      1.15003      0.64004  52.21s
     19       1.76604       1.74187      1.01387      0.58767  52.13s
     20       1.82329       1.55659      1.17134      0.64707  52.13s
     21       1.53584       1.38197      1.11134      0.69298  52.32s
     22       1.69129       1.51963      1.11297      0.65890  52.50s
     23       1.51504       1.39901      1.08293      0.68024  52.30s
     24       1.47129       1.30035      1.13145      0.68836  52.31s
     25       1.57948       1.46135      1.08083      0.66046  52.58s
     26       1.37634       1.19765      1.14920      0.71425  52.44s
     27       1.30675       1.25757      1.03910      0.71191  52.60s
     28       1.30113       1.21337      1.07232      0.71535  52.56s
     29       1.27469       1.19678      1.06510      0.70658  52.38s
     30       1.36741       1.16384      1.17491      0.73030  52.31s
     31       1.39381       1.27761      1.09095      0.70647  52.48s
     32       1.25788       1.18331      1.06301      0.71950  52.81s
     33       1.13445       1.20560      0.94098      0.72976  52.25s
     34       1.27243       1.12350      1.13256      0.74084  52.18s
     35       1.31965       1.08498      1.21629      0.75490  52.18s
     36       1.22849       1.08676      1.13041      0.74681  52.26s
     37       1.17733       1.06411      1.10640      0.76452  52.16s
     38       1.18346       1.12353      1.05334      0.72050  52.54s
     39       1.03584       1.04976      0.98674      0.75735  52.36s
     40       1.05863       1.09693      0.96508      0.74862  52.08s
     41       1.03970       0.99575      1.04414      0.76452  52.24s
     42       1.03475       1.00705      1.02750      0.76527  52.16s
     43       1.16632       1.02110      1.14222      0.74290  52.05s
     44       1.04161       1.08446      0.96049      0.74276  52.24s
     45       1.15210       1.09951      1.04783      0.73125  52.36s
     46       1.16604       1.07109      1.08865      0.74560  52.26s
     47       0.98135       0.97855      1.00286      0.75827  51.96s
     48       0.96185       1.00219      0.95975      0.75739  52.61s
     49       1.00955       1.05180      0.95984      0.74464  52.58s
     50       0.95429       0.96262      0.99135      0.75834  52.25s
     51       0.97467       0.99312      0.98142      0.75810  52.12s
     52       0.97067       0.95575      1.01561      0.76470  52.25s
     53       0.87790       1.04621      0.83912      0.74872  52.20s
     54       0.83223       0.95058      0.87550      0.76537  52.81s
     55       0.93417       0.96337      0.96969      0.77734  52.37s
     56       0.89805       0.97126      0.92463      0.77027  52.24s
     57       0.96655       0.98855      0.97775      0.76810  52.82s
     58       0.86073       0.92240      0.93314      0.78341  52.67s
     59       0.88325       0.94583      0.93383      0.77638  52.64s
     60       0.83234       0.90616      0.91854      0.78518  52.45s
     61       0.88828       0.91719      0.96848      0.78526  52.40s
     62       0.83693       0.93152      0.89846      0.76594  52.59s
     63       0.88071       0.95990      0.91750      0.77758  52.49s
     64       0.81496       0.98427      0.82799      0.75451  52.58s
     65       0.90214       0.99817      0.90380      0.75121  52.01s
     66       0.80883       0.90705      0.89171      0.78423  52.01s
     67       0.85216       0.99418      0.85715      0.76004  52.00s
     68       0.79842       0.96609      0.82644      0.76701  51.99s
     69       0.84410       0.93095      0.90671      0.77506  51.95s
     70       0.76225       0.98362      0.77494      0.77261  51.98s
     71       0.86340       0.96376      0.89587      0.76477  52.35s
     72       0.73604       0.95531      0.77047      0.77560  52.50s
     73       0.86951       0.99263      0.87597      0.75973  52.15s
     74       0.76676       0.95051      0.80668      0.78067  52.81s
     75       0.78998       0.91465      0.86369      0.78831  52.62s
     76       0.78508       0.93686      0.83800      0.78234  52.43s
     77       0.76143       0.95747      0.79526      0.77520  52.82s
     78       0.72015       0.88653      0.81232      0.80116  52.11s
     79       0.81248       0.90736      0.89544      0.78430  52.41s
     80       0.80833       0.86469      0.93482      0.78678  52.37s
     81       0.77630       0.89811      0.86437      0.79012  52.49s
     82       0.65027       0.87044      0.74706      0.80020  52.36s
     83       0.70354       0.90840      0.77448      0.78937  53.39s
     84       0.73540       0.88729      0.82881      0.79331  52.83s
     85       0.65050       0.92933      0.69997      0.78156  52.44s
     86       0.71132       0.89947      0.79083      0.78824  52.29s
     87       0.71386       0.86086      0.82924      0.79797  52.42s
     88       0.72244       0.90869      0.79504      0.78927  52.46s
     89       0.79945       0.89412      0.89413      0.77958  52.26s
     90       0.70559       0.88018      0.80164      0.79215  52.28s
     91       0.65074       0.86625      0.75121      0.78842  52.39s
     92       0.73738       0.88455      0.83363      0.79026  52.25s
     93       0.70396       0.91857      0.76637      0.77588  52.39s
     94       0.69895       0.93093      0.75081      0.78852  52.21s
     95       0.63274       0.90213      0.70138      0.78536  52.59s
     96       0.63569       0.90054      0.70590      0.79250  52.06s
     97       0.56175       0.92017      0.61049      0.79019  52.02s
     98       0.65019       0.86290      0.75349      0.79956  52.00s
     99       0.63228       0.89000      0.71042      0.78852  52.07s
    100       0.52136       0.87447      0.59620      0.80237  52.02s

In [13]:
pred = net.predict(data_test_x)
print accuracy_score(data_test_y, pred)


0.79576427256

In [14]:
print classification_report(data_test_y, pred)


             precision    recall  f1-score   support

          !       0.31      0.50      0.38         8
          "       0.00      0.00      0.00         1
          &       1.00      0.57      0.73         7
          '       0.40      0.25      0.31         8
          (       0.00      0.00      0.00         1
          )       0.50      1.00      0.67         1
          ,       0.00      0.00      0.00         6
          -       0.50      0.75      0.60         4
          .       0.38      0.55      0.44        11
          0       1.00      0.04      0.08        46
          1       0.76      0.48      0.59        46
          2       0.82      0.94      0.88        49
          3       0.71      0.59      0.65        17
          4       0.78      0.58      0.67        24
          5       0.77      0.34      0.48        29
          6       0.90      0.60      0.72        15
          7       0.50      0.30      0.37        10
          8       1.00      0.67      0.80         6
          9       1.00      0.20      0.33        15
          ?       0.00      0.00      0.00         1
          A       0.96      0.92      0.94       223
          B       0.69      0.85      0.76        47
          C       0.80      0.80      0.80       153
          D       0.86      0.82      0.84        74
          E       0.78      0.91      0.84       322
          F       0.92      0.86      0.88        76
          G       0.90      0.83      0.86        63
          H       0.94      0.91      0.92        97
          I       0.49      0.53      0.51       163
          J       0.50      0.54      0.52        13
          K       0.97      0.72      0.82        46
          L       0.67      0.83      0.74       131
          M       0.82      0.90      0.86        89
          N       0.97      0.87      0.92       153
          O       0.65      0.83      0.73       187
          P       0.88      0.87      0.87        91
          Q       0.00      0.00      0.00         4
          R       0.91      0.85      0.88       205
          S       0.81      0.81      0.81       229
          T       0.91      0.81      0.86       205
          U       0.80      0.84      0.82        92
          V       0.79      0.73      0.76        26
          W       0.85      0.87      0.86        39
          X       0.76      0.84      0.80        19
          Y       0.84      0.88      0.86        42
          Z       0.80      0.57      0.67         7
          a       0.87      0.82      0.84       171
          b       0.69      0.83      0.75        24
          c       0.86      0.63      0.73       100
          d       0.80      0.87      0.83        54
          e       0.91      0.88      0.89       331
          f       0.90      0.74      0.81        47
          g       0.55      0.84      0.67        38
          h       0.97      0.80      0.88        86
          i       0.77      0.86      0.81       182
          j       0.00      0.00      0.00         4
          k       0.69      0.94      0.79        33
          l       0.33      0.36      0.34       105
          m       0.91      0.76      0.83        51
          n       0.91      0.91      0.91       162
          o       0.78      0.71      0.74       194
          p       0.83      0.70      0.76        56
          q       0.00      0.00      0.00         3
          r       0.87      0.79      0.82       177
          s       0.69      0.90      0.78       154
          t       0.84      0.90      0.87       173
          u       0.71      0.79      0.75        67
          v       0.71      0.62      0.67        24
          w       0.54      0.79      0.64        19
          x       0.73      0.92      0.81        12
          y       0.77      0.81      0.79        57
          z       0.00      0.00      0.00         2
          £       0.00      0.00      0.00         3

avg / total       0.81      0.80      0.79      5430

/home/cuda/anaconda2/envs/ff_env/lib/python2.7/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [15]:
net.save_params_to(os.path.join(model_root, 'recog_for_icdar_1.pkl'))

In [ ]: