In [47]:
%matplotlib inline

import os
import sys
import pylab
import random
from random import randint, uniform
from skimage.util import crop
from skimage import transform
import numpy as np
import pandas as pd
import cPickle as pkl
from lasagne import layers
from bs4 import BeautifulSoup as bs
from lasagne import updates
import lasagne as nn
from theano.tensor.nnet import softmax
from scipy.misc import imread, imresize
from nolearn.lasagne import NeuralNet, BatchIterator, visualize
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report, accuracy_score

repo_location = '/workspace/.project/project/'
data_root = os.path.join(os.path.expanduser('~') + repo_location + 'datasets/')
script_root = os.path.join(os.path.expanduser('~') + repo_location + 'scripts/')
model_root = os.path.join(os.path.expanduser('~') + repo_location + 'models/')

In [2]:
# Load dataset
train_soup = bs(open(data_root + 'icdar03/train/char/char.xml').read(), 'lxml-xml')
test_soup = bs(open(data_root + 'icdar03/test/char/char.xml').read(), 'lxml-xml')

X_train = []
y_train = []
X_test = []
y_test = []

for image in train_soup('image'):
    try:
        img = imread(data_root + 'icdar03/train/char/' + image['file'])
        X_train.append(img)
        y_train.append(image['tag'])
    except:
        pass
    
for image in test_soup('image'):
    try:
        img = imread(data_root + 'icdar03/test/char/' + image['file'])
        X_test.append(img)
        y_test.append(image['tag'])
    except:
        pass

    
data_train = pd.DataFrame({'image' : X_train, 'label' : y_train})
data_test = pd.DataFrame({'image' : X_test, 'label' : y_test})

# drop extra labels
data_train = data_train.loc[~data_train['label'].isin([':', '-', '.', '\'', '!', '(', '"', ')', '&', '?', u'\xa3', u'\xc9', u'\xd1', u'\xe9', ','])]
data_test = data_test.loc[~data_test['label'].isin([':', '-', '.', '\'', '!', '(', '"', ')', '&', '?', u'\xa3', u'\xc9', u'\xd1', u'\xe9', ','])]

print 'Loaded icdar03'


Loaded icdar03

In [3]:
# Reshape images to 32x32 and convert to grayscale
data_train_x = np.zeros((data_train['image'].count(), 1, 32, 32))
data_train_y = data_train['label'].values
data_test_x = np.zeros((data_test['image'].count(), 1, 32, 32))
data_test_y = data_test['label'].values

for idx, img in enumerate(data_train['image']):
    img = imresize(img, (32, 32))
    if len(img.shape) == 3:
        data_train_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data_train_x[idx, ...] = img
        
for idx, img in enumerate(data_test['image']):
    img = imresize(img, (32, 32))
    if len(img.shape) == 3:
        data_test_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data_test_x[idx, ...] = img
        
data_train_x = data_train_x.astype('float32')
data_test_x = data_test_x.astype('float32')
print 'icdar03 reshaped and grayscaled'


icdar03 reshaped and grayscaled

In [4]:
# Normalize by MuSigma
data_train_x /= data_train_x.std(axis = None)
data_train_x -= data_train_x.mean()

data_test_x /= data_test_x.std(axis = None)
data_test_x -= data_test_x.mean()

In [5]:
print data_train_x.shape, data_train_y.shape, data_test_x.shape, data_test_y.shape


(6113, 1, 32, 32) (6113,) (5379, 1, 32, 32) (5379,)

In [40]:
class TransIterator(BatchIterator):
    def fast_warp(self, img, tf, output_shape, mode='nearest'):
        return transform._warps_cy._warp_fast(img, tf.params, output_shape=output_shape, mode=mode)
    
    def transform(self, Xb, yb):
        Xb, yb = super(TransIterator, self).transform(Xb, yb)
        
        Xb_aug = np.empty(shape = (Xb.shape[0], 1, 32, 32), dtype = 'float32')
        yb_aug = yb

        dorotate = randint(-5,5)

        trans_1 = randint(-5,5)
        trans_2 = randint(-5,5)

        zoom = uniform(0.8, 1.2)
        
        shear_deg = uniform(-10, 10)

        center_shift   = np.array((32, 32)) / 2. - 0.5
        tform_center   = transform.SimilarityTransform(translation=-center_shift)
        tform_uncenter = transform.SimilarityTransform(translation=center_shift)

        tform_aug = transform.AffineTransform(rotation = np.deg2rad(dorotate),
                                              scale =(1/zoom, 1/zoom),
                                              shear = np.deg2rad(shear_deg),
                                              translation = (trans_1, trans_2))

        tform = tform_center + tform_aug + tform_uncenter
        
        for j in range(Xb.shape[0]):
            Xb_aug[j][0] = self.fast_warp(Xb[j][0], tform,
                                          output_shape = (32, 32))

        return Xb_aug, yb_aug

In [65]:
# setting nn 
net = NeuralNet(
    layers = [
        ('input', layers.InputLayer),
        ('conv1', layers.Conv2DLayer),
        ('conv2', layers.Conv2DLayer),
        ('pool3', layers.MaxPool2DLayer),
        ('conv4', layers.Conv2DLayer),
        ('conv5', layers.Conv2DLayer),
        ('pool6', layers.MaxPool2DLayer),
        ('dropout7', layers.DropoutLayer),
        ('conv8', layers.Conv2DLayer),
        ('conv9', layers.Conv2DLayer),
        ('pool10', layers.MaxPool2DLayer),
        ('dropout11', layers.DropoutLayer),
        ('conv12', layers.Conv2DLayer),
        ('conv13', layers.Conv2DLayer),
        ('pool14', layers.MaxPool2DLayer),
        ('dropout15', layers.DropoutLayer),
        ('conv16', layers.Conv2DLayer),
        ('conv17', layers.Conv2DLayer),
        ('pool18', layers.MaxPool2DLayer),
        ('dropout19', layers.DropoutLayer),
        ('hidden20', layers.DenseLayer),
        ('dropout21', layers.DropoutLayer),
        ('output', layers.DenseLayer),
    ],

    input_shape = (None, 1, 32, 32),
    conv1_num_filters = 128, conv1_filter_size = (3, 3), conv1_pad = 1,
    conv2_num_filters = 128, conv2_filter_size = (3, 3), conv2_pad = 1,
    pool3_pool_size = (2, 2),
    conv4_num_filters = 256, conv4_filter_size = (3, 3), conv4_pad = 1,
    conv5_num_filters = 256, conv5_filter_size = (3, 3), conv5_pad = 1,
    pool6_pool_size = (2, 2),
    dropout7_p = 0.2,
    conv8_num_filters = 256, conv8_filter_size = (3, 3), conv8_pad = 1,
    conv9_num_filters = 256, conv9_filter_size = (3, 3), conv9_pad = 1,
    pool10_pool_size = (2, 2),
    dropout11_p = 0.2,
    conv12_num_filters = 256, conv12_filter_size = (3, 3), conv12_pad = 1,
    conv13_num_filters = 256, conv13_filter_size = (3, 3), conv13_pad = 1,
    pool14_pool_size = (2, 2),
    dropout15_p = 0.2,
    conv16_num_filters = 512, conv16_filter_size = (3, 3), conv16_pad = 1,
    conv17_num_filters = 512, conv17_filter_size = (3, 3), conv17_pad = 1,
    pool18_pool_size = (2, 2),
    dropout19_p = 0.2,
    hidden20_num_units = 1024,
    dropout21_p = 0.5,
    output_num_units = 62, output_nonlinearity = softmax,

    batch_iterator_train = TransIterator(batch_size = 500),
    batch_iterator_test = BatchIterator(batch_size = 500),

    update = updates.adam,

    use_label_encoder = True,
    regression = False,
    max_epochs = 300,
    verbose = 1,
)

In [111]:
pylab.legend?

In [67]:
# train nn
#net.load_params_from(os.path.join(model_root, 'recog_trial_icdar.pkl')); # or load a pretrained model!
net.fit(data_train_x, data_train_y);


# Neural Network with 7523262 learnable parameters

## Layer information

  #  name       size
---  ---------  ---------
  0  input      1x32x32
  1  conv1      128x32x32
  2  conv2      128x32x32
  3  pool3      128x16x16
  4  conv4      256x16x16
  5  conv5      256x16x16
  6  pool6      256x8x8
  7  dropout7   256x8x8
  8  conv8      256x8x8
  9  conv9      256x8x8
 10  pool10     256x4x4
 11  dropout11  256x4x4
 12  conv12     256x4x4
 13  conv13     256x4x4
 14  pool14     256x2x2
 15  dropout15  256x2x2
 16  conv16     512x2x2
 17  conv17     512x2x2
 18  pool18     512x1x1
 19  dropout19  512x1x1
 20  hidden20   1024
 21  dropout21  1024
 22  output     62

  epoch    train loss    valid loss    train/val    valid acc  dur
-------  ------------  ------------  -----------  -----------  ------
      1       4.07758       3.95875      1.03002      0.05041  21.97s
      2       3.80873       3.76531      1.01153      0.06164  22.19s
      3       3.72656       3.76362      0.99015      0.06164  23.01s
      4       3.71182       3.75835      0.98762      0.06164  22.69s
      5       3.70867       3.75674      0.98721      0.06164  22.69s
      6       3.70278       3.75344      0.98650      0.06164  22.71s
      7       3.69897       3.74354      0.98809      0.06164  22.58s
      8       3.67668       3.72218      0.98778      0.06164  22.67s
      9       3.70237       3.73959      0.99005      0.06164  22.64s
     10       3.69875       3.72754      0.99228      0.06164  22.71s
     11       3.64074       3.64879      0.99780      0.07424  22.55s
     12       3.64625       3.69824      0.98594      0.04209  22.67s
     13       3.58524       3.61724      0.99115      0.09041  22.56s
     14       3.56009       3.61233      0.98554      0.07209  22.64s
     15       3.41893       3.43287      0.99594      0.12981  22.51s
     16       3.29297       3.36283      0.97923      0.14243  22.51s
     17       3.12075       3.35859      0.92919      0.12441  22.53s
     18       3.22327       3.18254      1.01280      0.15792  22.56s
     19       3.11614       3.18756      0.97760      0.18614  22.50s
     20       3.07253       3.30796      0.92883      0.14994  22.55s
     21       3.18524       3.08477      1.03257      0.19194  22.54s
     22       2.97445       3.02637      0.98284      0.19117  22.51s
     23       3.01827       3.10243      0.97287      0.17540  22.57s
     24       3.00162       2.99617      1.00182      0.22089  22.50s
     25       3.07370       2.92284      1.05161      0.23500  22.51s
     26       2.91797       2.87714      1.01419      0.23627  22.55s
     27       2.86024       2.66920      1.07157      0.30870  22.49s
     28       2.63567       2.67158      0.98656      0.29195  22.61s
     29       2.66711       2.59122      1.02929      0.33059  22.51s
     30       2.60661       2.54504      1.02419      0.30334  22.51s
     31       2.64424       2.56572      1.03060      0.34408  22.51s
     32       2.55410       2.46039      1.03809      0.35872  22.51s
     33       2.41672       2.52630      0.95662      0.34883  22.63s
     34       2.46273       2.29558      1.07281      0.40251  22.51s
     35       2.27856       2.27370      1.00214      0.40971  22.56s
     36       2.14798       2.11447      1.01585      0.46550  22.52s
     37       2.01223       2.13246      0.94362      0.45948  22.61s
     38       2.01737       2.09777      0.96168      0.47341  22.56s
     39       1.88294       1.91831      0.98156      0.52103  22.53s
     40       1.92046       1.97819      0.97082      0.50970  22.65s
     41       1.82337       1.89582      0.96178      0.51161  22.51s
     42       1.76837       1.98094      0.89269      0.48075  22.62s
     43       1.94860       1.80674      1.07851      0.51518  22.58s
     44       1.62480       1.63548      0.99347      0.57682  22.62s
     45       1.38771       1.64493      0.84363      0.57738  22.56s
     46       1.35303       1.61987      0.83527      0.59946  22.67s
     47       1.38548       1.51164      0.91654      0.63114  22.54s
     48       1.30669       1.40787      0.92813      0.63861  22.68s
     49       1.23588       1.32571      0.93224      0.66459  22.55s
     50       1.18205       1.31741      0.89725      0.66265  22.68s
     51       1.09163       1.23458      0.88421      0.69343  22.54s
     52       1.00206       1.24515      0.80477      0.69024  22.63s
     53       1.00888       1.17990      0.85505      0.71711  22.64s
     54       1.01476       1.16400      0.87179      0.71153  22.69s
     55       0.89947       1.14835      0.78327      0.72700  22.52s
     56       0.89093       1.14639      0.77717      0.71831  22.70s
     57       0.84392       1.14303      0.73832      0.71906  22.64s
     58       0.86013       1.14677      0.75005      0.73382  22.59s
     59       0.87409       1.08513      0.80552      0.72373  22.70s
     60       0.75988       1.08344      0.70136      0.74782  22.66s
     61       0.94120       1.07070      0.87906      0.73246  22.65s
     62       0.70003       1.01910      0.68691      0.74506  22.67s
     63       0.76705       0.98383      0.77965      0.76005  22.69s
     64       0.73264       0.94445      0.77573      0.77332  22.58s
     65       0.79410       0.91757      0.86544      0.77943  22.68s
     66       0.72552       1.01416      0.71539      0.75995  22.63s
     67       0.62614       0.87822      0.71297      0.77814  22.69s
     68       0.65784       0.92467      0.71143      0.77883  22.58s
     69       0.66632       0.92762      0.71831      0.75734  22.65s
     70       0.66536       0.95111      0.69956      0.76861  22.63s
     71       0.67640       0.90717      0.74561      0.77752  22.68s
     72       0.57525       0.91777      0.62680      0.78085  22.68s
     73       0.58397       0.93079      0.62739      0.76921  22.59s
     74       0.64705       0.87181      0.74220      0.79227  22.70s
     75       0.54634       0.96742      0.56474      0.78079  22.62s
     76       0.60751       0.85921      0.70705      0.79283  22.66s
     77       0.58193       0.94107      0.61837      0.78685  22.71s
     78       0.46683       0.91153      0.51214      0.79487  22.62s
     79       0.51365       0.83183      0.61750      0.80502  22.63s
     80       0.49193       0.87054      0.56508      0.78947  22.66s
     81       0.52241       0.88754      0.58861      0.79485  22.62s
     82       0.44776       0.80447      0.55659      0.81171  22.66s
     83       0.48389       0.86000      0.56266      0.81302  22.60s
     84       0.55434       0.99035      0.55974      0.77416  22.67s
     85       0.43897       0.87744      0.50029      0.79476  22.72s
     86       0.51114       0.95351      0.53607      0.78274  22.68s
     87       0.42000       0.89857      0.46741      0.78958  22.56s
     88       0.51010       0.98780      0.51640      0.76984  22.71s
     89       0.51139       0.94151      0.54316      0.78408  22.64s
     90       0.48961       0.92647      0.52846      0.78156  22.65s
     91       0.52003       0.91964      0.56547      0.78208  22.74s
     92       0.47650       0.91263      0.52212      0.79621  22.63s
     93       0.39075       0.86996      0.44916      0.80567  22.69s
     94       0.43638       0.82201      0.53087      0.81094  22.58s
     95       0.34792       0.80787      0.43067      0.81909  22.66s
     96       0.43707       0.85552      0.51088      0.79947  22.72s
     97       0.33348       0.90130      0.37000      0.78679  22.61s
     98       0.34769       0.83027      0.41877      0.81307  22.65s
     99       0.37139       0.86993      0.42692      0.79356  22.68s
    100       0.40490       0.90867      0.44560      0.79087  22.73s
    101       0.39752       0.96263      0.41295      0.78341  22.57s
    102       0.40623       0.90434      0.44920      0.80844  22.64s
    103       0.46840       0.86770      0.53982      0.80214  22.62s
    104       0.43120       0.90463      0.47666      0.79502  22.69s
    105       0.41047       0.87514      0.46904      0.79700  22.58s
    106       0.26696       0.99108      0.26937      0.78874  22.71s
    107       0.33312       0.87851      0.37918      0.80505  22.67s
    108       0.33677       0.83798      0.40189      0.80431  22.69s
    109       0.39834       0.86115      0.46257      0.81047  22.66s
    110       0.33812       0.90757      0.37256      0.79760  22.70s
    111       0.29641       0.87694      0.33801      0.79958  22.64s
    112       0.36906       0.93130      0.39629      0.80296  22.64s
    113       0.35633       0.85516      0.41668      0.80689  22.67s
    114       0.27786       0.88692      0.31329      0.79627  22.64s
    115       0.39711       0.99197      0.40032      0.79296  22.68s
    116       0.36275       0.93620      0.38747      0.78829  22.67s
    117       0.30116       0.98077      0.30706      0.77545  22.61s
    118       0.29187       1.06545      0.27394      0.77212  22.69s
    119       0.37304       0.95195      0.39187      0.78760  22.64s
    120       0.31273       0.95676      0.32686      0.78072  22.74s
    121       0.35368       0.93561      0.37802      0.79547  22.97s

In [68]:
pred = net.predict(data_test_x)
print accuracy_score(data_test_y, pred)


0.817438185536

In [69]:
print classification_report(data_test_y, pred)


             precision    recall  f1-score   support

          0       0.60      0.07      0.12        46
          1       0.94      0.67      0.78        46
          2       0.90      0.92      0.91        49
          3       0.89      0.47      0.62        17
          4       0.80      0.50      0.62        24
          5       0.79      0.66      0.72        29
          6       0.81      0.87      0.84        15
          7       0.67      0.20      0.31        10
          8       0.43      1.00      0.60         6
          9       0.33      0.07      0.11        15
          A       0.94      0.90      0.92       223
          B       0.79      0.89      0.84        47
          C       0.83      0.76      0.79       153
          D       0.85      0.81      0.83        74
          E       0.90      0.91      0.90       322
          F       0.91      0.91      0.91        76
          G       0.81      0.87      0.84        63
          H       0.90      0.94      0.92        97
          I       0.55      0.69      0.61       163
          J       0.60      0.46      0.52        13
          K       0.91      0.85      0.88        46
          L       0.89      0.77      0.82       131
          M       0.85      0.84      0.85        89
          N       0.93      0.92      0.92       153
          O       0.62      0.68      0.65       187
          P       0.87      0.86      0.86        91
          Q       0.00      0.00      0.00         4
          R       0.90      0.89      0.89       205
          S       0.82      0.87      0.85       229
          T       0.81      0.86      0.83       205
          U       0.90      0.80      0.85        92
          V       0.78      0.69      0.73        26
          W       0.72      0.85      0.78        39
          X       0.94      0.84      0.89        19
          Y       0.92      0.86      0.89        42
          Z       0.50      0.14      0.22         7
          a       0.85      0.89      0.87       171
          b       0.80      0.83      0.82        24
          c       0.73      0.78      0.75       100
          d       0.82      0.91      0.86        54
          e       0.90      0.89      0.89       331
          f       0.84      0.87      0.85        47
          g       0.64      0.84      0.73        38
          h       0.97      0.85      0.91        86
          i       0.78      0.87      0.82       182
          j       0.44      1.00      0.62         4
          k       0.84      0.79      0.81        33
          l       0.56      0.26      0.35       105
          m       0.90      0.84      0.87        51
          n       0.89      0.93      0.91       162
          o       0.66      0.78      0.72       194
          p       0.88      0.80      0.84        56
          q       0.00      0.00      0.00         3
          r       0.83      0.88      0.86       177
          s       0.78      0.81      0.79       154
          t       0.88      0.93      0.90       173
          u       0.56      0.82      0.67        67
          v       0.81      0.54      0.65        24
          w       0.50      0.53      0.51        19
          x       0.82      0.75      0.78        12
          y       0.91      0.74      0.82        57
          z       0.00      0.00      0.00         2

avg / total       0.82      0.82      0.81      5379


In [70]:
net.save_params_to(os.path.join(model_root, 'recog_trial_icdar.pkl'))

In [ ]:


In [91]:
visualize.plot_conv_weights(net.layers_['conv1'], figsize=(5, 5))
pylab.savefig('fig_weights')



In [93]:
visualize.plot_conv_activity(net.layers_['conv1'], data_train_x[0, ...].reshape((1, 1, 32, 32)), figsize=(7, 7))
pylab.savefig('fig_activity')



In [119]:
visualize.plot_loss(net)
pylab.savefig('fig_loss')



In [114]:
acc_plt = []
for i in range(len(net.train_history_)):
    acc_plt.append(net.train_history_[i]['valid_accuracy']*100)

In [120]:
pylab.plot(acc_plt, 'r')
pylab.xlabel('epoch')
pylab.ylabel('accuracy (in %)')
pylab.ylim(0, 100)
pylab.legend(['validation accuracy'])
pylab.savefig('fig_accuracy')