Pipelines Test Library Notebook

Imports


In [1]:
# Import piplines

from binary_nets import binary_nets_wrapper # this is actually pipeline1
from pipeline2 import pipeline2
from pipeline3 import pipeline3
from pipeline4 import pipeline4

from ensemble_builder import ensemble_builder

import image_utils as iu

#Import datasets
import dataset_generator as dataset

from time import time
import os
import numpy as np


Using TensorFlow backend.

Data Load


In [2]:
(X_train_ocr, y_train_ocr, X_test_ocr, y_test_ocr, _) = dataset.generate_all_chars_with_class(verbose=0, plot=False)
print("Loaded dataset for all characters")

(X_train_cut, y_train_cut, X_test_cut, y_test_cut) = dataset.generate_dataset_for_segmentator(verbose=0, plot=False)
print("Loaded dataset for segmentator")

X_train_char = {}
y_train_char = {}
X_test_char = {}
y_test_char = {}

for char in dataset.ALPHABET_ALL:
    (X_train_char[char], y_train_char[char], X_test_char[char], y_test_char[char]) = \
                                                    dataset.generate_positive_and_negative_labeled(char, verbose=0)
    print("Loaded dataset for character " + char)


Loaded dataset for all characters
Loaded dataset for segmentator
Loaded dataset for character a
Loaded dataset for character c
Loaded dataset for character d_mediana
Loaded dataset for character e
Loaded dataset for character i
Loaded dataset for character m
Loaded dataset for character n
Loaded dataset for character o
Loaded dataset for character r
Loaded dataset for character s_mediana
Loaded dataset for character t
Loaded dataset for character u
Loaded dataset for character d_alta
Loaded dataset for character s_alta
Loaded dataset for character b
Loaded dataset for character f
Loaded dataset for character h
Loaded dataset for character l
Loaded dataset for character g
Loaded dataset for character p
Loaded dataset for character q
Loaded dataset for character s_bassa

Setting and training of the nets

Cut classifier


In [3]:
path_cut_classifier = "checkpoints/letter_not_letter"

cut_class = ensemble_builder(2, 800, number_of_nets=2,\
                 path=path_cut_classifier, nb_filters1=50, nb_filters2=100,\
                 dense_layer_size1=250)

#cut_class.fit(X_train_cut, y_train_cut, X_test_cut, y_test_cut, forceRetrain=True)

Ocr classifier


In [4]:
path_ocr_class = "checkpoints/09_22-classes"

ocr_classifier = ensemble_builder(22, 800, number_of_nets=2, path=path_ocr_class,\
            nb_filters1=50, nb_filters2=100, dense_layer_size1=250)

#ocr_classifier.fit(X_train_ocr, y_train_ocr, X_test_ocr, y_test_ocr, forceRetrain=False)

Classification

Pipeline 2


In [5]:
pip2 = pipeline2(cut_class, ocr_classifier)

asseras_good_cuts = iu.open_many_samples( \
                    ["not_code/words/good_cuts/asseras/a1.png",
                     "not_code/words/good_cuts/asseras/f1.png",
                     "not_code/words/good_cuts/asseras/f2.png",
                     "not_code/words/good_cuts/asseras/e.png",
                     "not_code/words/good_cuts/asseras/r.png",
                     "not_code/words/good_cuts/asseras/a2.png",
                     "not_code/words/good_cuts/asseras/s.png"])

prediction_pip2 = pip2.predict(asseras_good_cuts)

In [6]:
prediction_pip2


Out[6]:
[(True, array([('a',   9.99758005e+01), ('o',   1.13711780e-02),
         ('i',   7.22903933e-03)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')])),
 (True,
  array([('s_alta',  96.79138064), ('l',   3.00127901), ('f',   0.20640711)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')])),
 (False, []),
 (True, array([('e',  98.46054316), ('c',   1.17276963), ('o',   0.17953472)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')])),
 (True, array([('r',  95.26131153), ('e',   3.89830396), ('i',   0.29342179)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')])),
 (True, array([('a',   9.99343634e+01), ('o',   2.17619818e-02),
         ('i',   2.11800376e-02)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')])),
 (True, array([('s_alta',   9.99579728e+01), ('f',   4.13175847e-02),
         ('l',   7.13807640e-04)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')]))]

OCR only


In [7]:
prediction_ocr = ocr_classifier.predict(asseras_good_cuts)

In [10]:
prediction_ocr


Out[10]:
array([[  9.99758005e-01,   8.25010318e-07,   7.22997555e-08,
          1.27235296e-07,   7.22903933e-05,   1.33472123e-14,
          6.79485890e-09,   1.13711780e-04,   8.05368472e-07,
          1.28696684e-07,   2.46105574e-05,   2.37562775e-10,
          8.82225066e-08,   6.82115653e-09,   9.23949894e-09,
          1.31897477e-12,   3.61448788e-11,   3.64067958e-08,
          4.31447278e-09,   1.28284536e-10,   2.94126148e-05,
          1.29770239e-09],
       [  2.80618909e-12,   2.19876980e-14,   2.45565625e-19,
          2.26161407e-12,   4.87990383e-06,   4.85573276e-21,
          9.10305729e-18,   5.44996245e-15,   2.00463431e-11,
          1.18415737e-12,   3.26492375e-11,   5.52152742e-15,
          5.33688653e-13,   9.67913806e-01,   3.48455160e-06,
          2.06407113e-03,   8.03345586e-07,   3.00127901e-02,
          5.42448438e-14,   3.09895581e-10,   9.27629154e-12,
          1.02551965e-07],
       [  1.24780253e-12,   8.26853499e-16,   2.19702440e-16,
          4.40592929e-14,   1.49518387e-09,   2.37754877e-22,
          2.93496000e-12,   1.15925234e-14,   2.74912537e-09,
          3.71334612e-16,   5.67781499e-13,   6.46906167e-16,
          2.17692592e-15,   4.66622472e-01,   3.09813186e-05,
          5.16422806e-05,   2.79413634e-05,   5.33203423e-01,
          1.80271502e-15,   5.41954432e-06,   3.30844182e-06,
          5.47717646e-05],
       [  1.97351328e-04,   1.17276963e-02,   4.43819488e-07,
          9.84605432e-01,   5.90862706e-04,   1.86609412e-08,
          4.01596225e-07,   1.79534720e-03,   1.84416655e-04,
          4.46717240e-06,   8.82566150e-04,   1.03798379e-06,
          1.15785133e-06,   2.10760891e-06,   1.21922639e-07,
          2.69693086e-08,   1.79202893e-08,   1.82679486e-07,
          2.94458630e-07,   1.21745160e-07,   3.19225677e-08,
          5.97949338e-06],
       [  2.40467815e-03,   3.87310924e-04,   1.41676992e-07,
          3.89830396e-02,   2.93421792e-03,   1.82639270e-09,
          2.67756914e-05,   1.09157227e-04,   9.52613115e-01,
          1.99650259e-07,   1.72440356e-04,   2.35180929e-03,
          3.08441798e-07,   8.88067575e-08,   5.79647804e-06,
          1.98462180e-09,   2.68912203e-07,   5.95310121e-06,
          6.30195771e-08,   9.97321536e-07,   5.64769778e-08,
          3.69100144e-06],
       [  9.99343634e-01,   1.66848821e-07,   1.30660374e-06,
          1.56745937e-05,   2.11800376e-04,   1.10034104e-09,
          5.28247183e-05,   2.17619818e-04,   1.10810970e-06,
          2.22251265e-06,   1.09689550e-04,   2.88280535e-05,
          4.19408991e-07,   2.22359901e-07,   4.44418674e-06,
          6.64126754e-09,   7.61007044e-08,   4.61437793e-07,
          1.60214270e-07,   2.96649816e-08,   8.40677512e-06,
          8.55761868e-07],
       [  4.47273928e-18,   1.03764524e-18,   2.32555670e-21,
          1.70375189e-14,   1.42360134e-11,   4.61558837e-28,
          1.11592986e-21,   6.70414118e-19,   3.85800807e-15,
          2.60021009e-15,   1.40887153e-13,   7.56701949e-23,
          1.60847952e-20,   9.99579728e-01,   1.60868964e-12,
          4.13175847e-04,   4.49734271e-13,   7.13807640e-06,
          2.20958057e-17,   6.33302369e-11,   1.10816606e-16,
          1.07725417e-09]], dtype=float32)

Obtaining the ranking


In [8]:
prediction = []

for i,_ in enumerate(asseras_good_cuts):
    sorted_indexes = (-prediction_ocr[i]).argsort()[:3]
    ranking = [(dataset.ALPHABET_ALL[j], prediction_ocr[i][j]*100) for j in sorted_indexes]
    dt = np.dtype([('letters', np.str_, 16), ('grades', np.float64)])
    ranking = np.array(ranking, dtype=dt)
    prediction.append((True, ranking))

In [9]:
prediction


Out[9]:
[(True, array([('a',   9.99758005e+01), ('o',   1.13711780e-02),
         ('i',   7.22903933e-03)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')])),
 (True,
  array([('s_alta',  96.79138064), ('l',   3.00127901), ('f',   0.20640711)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')])),
 (True, array([('l',   5.33203423e+01), ('s_alta',   4.66622472e+01),
         ('s_bassa',   5.47717646e-03)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')])),
 (True, array([('e',  98.46054316), ('c',   1.17276963), ('o',   0.17953472)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')])),
 (True, array([('r',  95.26131153), ('e',   3.89830396), ('i',   0.29342179)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')])),
 (True, array([('a',   9.99343634e+01), ('o',   2.17619818e-02),
         ('i',   2.11800376e-02)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')])),
 (True, array([('s_alta',   9.99579728e+01), ('f',   4.13175847e-02),
         ('l',   7.13807640e-04)], 
        dtype=[('letters', '<U16'), ('grades', '<f8')]))]