In [ ]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
import time
import math
from collections import Counter
import unidecode
from abc import ABC, abstractmethod
# Import Widgets
from ipywidgets import Button, Text, HBox, VBox
from IPython.display import display, clear_output
sys.path.append('../src')
from ocr import characters
from ocr.normalization import word_normalization, letter_normalization
# Helpers
from ocr.helpers import implt, resize, img_extend
from ocr.datahelpers import load_words_data, idx2char
from ocr.tfhelpers import Model
from ocr.viz import print_progress_bar
In [5]:
# ONLY 'en' is supported right now
LANG = 'en'
In [ ]:
charClass_1 = Model('../models/char-clas/' + LANG + '/CharClassifier')
wordClass = Model('../models/word-clas/' + LANG + '/WordClassifier2', 'prediction_infer')
wordClass2 = Model('../models/word-clas/' + LANG + '/SeqRNN/Classifier', 'word_prediction') # None
wordClass3 = Model('../models/word-clas/' + LANG + '/CTC/Classifier2', 'word_prediction')
In [4]:
images, labels = load_words_data('../data/sets/test.csv', is_csv=True)
for i in range(len(images)):
print_progress_bar(i, len(images))
images[i] = word_normalization(
cv2.cvtColor(images[i], cv2.COLOR_GRAY2RGB),
60,
border=False,
tilt=True,
hystNorm=True)
if LANG == 'en':
for i in range(len(labels)):
labels[i] = unidecode.unidecode(labels[i])
print()
print('Number of chars:', sum(len(l) for l in labels))
In [5]:
# Load Words
WORDS = {}
with open('../data/dictionaries' + LANG + '_50k.txt') as f:
for line in f:
if LANG == 'en':
WORDS[unidecode.unidecode(line.split(" ")[0])] = int(line.split(" ")[1])
else:
WORDS[line.split(" ")[0]] = int(line.split(" ")[1])
WORDS = Counter(WORDS)
def P(word, N=sum(WORDS.values())):
"Probability of word."
return WORDS[word] / N
def correction(word):
"Most probable spelling correction for word."
if word in WORDS:
return word
return max(candidates(word), key=P)
def candidates(word):
"Generate possible spelling corrections for word."
return (known([word]) or known(edits1(word)) or known(edits2(word)) or [word])
def known(words):
"The subset of words that appear in the dictionary of WORDS."
return set(w for w in words if w in WORDS)
def edits1(word):
"All edits that are one edit away from `word`."
if LANG == 'cz':
letters = 'aábcčdďeéěfghiíjklmnňoópqrřsštťuúůvwxyýzž'
else:
letters = 'abcdefghijklmnopqrstuvwxyz'
splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
deletes = [L + R[1:] for L, R in splits if R]
transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
replaces = [L + c + R[1:] for L, R in splits if R for c in letters]
inserts = [L + c + R for L, R in splits for c in letters]
return set(deletes + transposes + replaces + inserts)
def edits2(word):
"All edits that are two edits away from `word`."
return (e2 for e1 in edits1(word) for e2 in edits1(e1))
In [6]:
def cer(r, h):
"""
From two strings calculate character error rate (insert, delete or substitution).
"""
r = list(r)
h = list(h)
d = np.zeros((len(r) + 1) * (len(h) + 1), dtype=np.uint16)
d = d.reshape((len(r) + 1, len(h) + 1))
for i in range(len(r) + 1):
for j in range(len(h) + 1):
if i == 0:
d[0][j] = j
elif j == 0:
d[i][0] = i
for i in range(1, len(r) + 1):
for j in range(1, len(h) + 1):
if r[i - 1] == h[j - 1]:
d[i][j] = d[i - 1][j - 1]
else:
substitution = d[i - 1][j - 1] + 1
insertion = d[i][j - 1] + 1
deletion = d[i - 1][j] + 1
d[i][j] = min(substitution, insertion, deletion)
# result = float(d[len(r)][len(h)]) / len(r) * 100
# print('CER %.4f %%' % result)
# print(d[len(r)][len(h)])
return(d[len(r)][len(h)])
In [7]:
class Cycler(ABC):
""" Abstract cycler class """
def __init__(self,
images,
labels,
charClass,
stats="No Stats Provided",
slider=(60, 15),
ctc=False,
seq2seq=False,
charRNN=False):
self.images = images
self.labels = labels
self.charClass = charClass
self.slider = slider
self.totalChars = sum([len(l) for l in labels])
self.ctc = ctc
self.seq2seq = seq2seq
self.charRNN = charRNN
self.stats = stats
self.evaluate()
@abstractmethod
def recogniseWord(self, img):
pass
def countCorrect(self, pred, label, lower=False):
correct = 0
for i in range(min(len(pred), len(label))):
if ((not lower and pred[i] == label[i])
or (lower and pred[i] == label.lower()[i])):
correct += 1
return correct
def evaluate(self):
""" Evaluate accuracy of the word classification """
print()
print("STATS:", self.stats)
print(self.labels[1], ':', self.recogniseWord(self.images[1]))
start_time = time.time()
for i in range(len(self.images)):
word = self.recogniseWord(self.images[i])
# a = correction(word.lower()
print("--- %s seconds ---" % round(time.time() - start_time, 2))
ccer = 0
correctLetters = 0
correctWords = 0
correctWordsCorrection = 0
correctLettersCorrection = 0
for i in range(len(self.images)):
word = self.recogniseWord(self.images[i])
correctLetters += self.countCorrect(word,
self.labels[i])
# Correction works only for lower letters
correctLettersCorrection += self.countCorrect(correction(word.lower()),
self.labels[i],
lower=True)
ccer += cer(word, self.labels[i])
# Words accuracy
if word == self.labels[i]:
correctWords += 1
if correction(word.lower()) == self.labels[i].lower():
correctWordsCorrection += 1
print("Correct/Total: %s / %s" % (correctLetters, self.totalChars))
print("CERacc: %s %%" % round(100 - ccer/self.totalChars * 100, 4))
print("Letter Accuracy: %s %%" % round(correctLetters/self.totalChars * 100, 4))
print("Letter Accuracy with Correction: %s %%" % round(correctLettersCorrection/self.totalChars * 100, 4))
print("Word Accuracy: %s %%" % round(correctWords/len(self.images) * 100, 4))
print("Word Accuracy with Correction: %s %%" % round(correctWordsCorrection/len(self.images) * 100, 4))
# print("--- %s seconds ---" % round(time.time() - start_time, 2))
In [8]:
class WordCycler(Cycler):
""" Cycle through the words and recognise them """
def recogniseWord(self, img):
slider = self.slider
if self.ctc:
step = 10 # 10 for (60, 60) slider
img = cv2.copyMakeBorder(
img,
0, 0, self.slider[1]//2, self.slider[1]//2,
cv2.BORDER_CONSTANT,
value=[0, 0, 0])
img = img_extend(
img,
(img.shape[0], max(-(-img.shape[1] // step) * step, self.slider[1] + step)))
length = (img.shape[1]-slider[1]) // step
input_seq = np.zeros((1, length, slider[0] * slider[1]), dtype=np.float32)
input_seq[0][:] = [img[:, loc*step: loc*step + slider[1]].flatten()
for loc in range(length)]
input_seq = input_seq.swapaxes(0, 1)
pred = self.charClass.eval_feed({'inputs:0': input_seq,
'inputs_length:0': [length],
'keep_prob:0': 1})[0]
word = ''
for i in pred:
if word == 0 and i != 0:
break
else:
word += idx2char(i)
else:
length = img.shape[1]//slider[1]
input_seq = np.zeros((1, length, slider[0] * slider[1]), dtype=np.float32)
input_seq[0][:] = [img[:, loc * slider[1]: (loc+1) * slider[1]].flatten()
for loc in range(length)]
input_seq = input_seq.swapaxes(0, 1)
if self.seq2seq:
targets = np.zeros((1, 1), dtype=np.int32)
pred = self.charClass.eval_feed({'encoder_inputs:0': input_seq,
'encoder_inputs_length:0': [length],
'decoder_targets:0': targets,
'keep_prob:0': 1})[0]
else:
targets = np.zeros((1, 1, 4096), dtype=np.int32)
pred = self.charClass.eval_feed({'encoder_inputs:0': input_seq,
'encoder_inputs_length:0': [length],
'letter_targets:0': targets,
'is_training:0': False,
'keep_prob:0': 1})[0]
word = ''
for i in pred:
if word == 1:
break
else:
word += idx2char(i, True)
return word
In [9]:
class CharCycler(Cycler):
""" Cycle through the words and recognise them """
def recogniseWord(self, img):
img = cv2.copyMakeBorder(img,
0, 0, 30, 30,
cv2.BORDER_CONSTANT,
value=[0, 0, 0])
gaps = characters.segment(img, RNN=True)
chars = []
for i in range(len(gaps)-1):
char = img[:, gaps[i]:gaps[i+1]]
# TODO None type error after treshold
char, dim = letter_normalization(char, is_thresh=True, dim=True)
# TODO Test different values
if dim[0] > 4 and dim[1] > 4:
chars.append(char.flatten())
chars = np.array(chars)
word = ''
if len(chars) != 0:
if self.charRNN:
pred = self.charClass.eval_feed({'inputs:0': [chars],
'length:0': [len(chars)],
'keep_prob:0': 1})[0]
else:
pred = self.charClass.run(chars)
for c in pred:
# word += CHARS[charIdx]
word += idx2char(c)
return word
In [10]:
# Class cycling through words
WordCycler(images,
labels,
wordClass,
stats='Seq2Seq',
slider=(60, 2),
seq2seq=True)
WordCycler(images,
labels,
wordClass2,
stats='Seq2SeqX',
slider=(60, 2))
WordCycler(images,
labels,
wordClass3,
stats='CTC',
slider=(60, 60),
ctc=True)
CharCycler(images,
labels,
charClass_1,
stats='Bi-RNN and CNN',
charRNN=False)
# Cycler(images,
# labels,
# charClass_2,
# charRNN=True)
# Cycler(images,
# labels,
# charClass_3,
# charRNN=True)
Out[10]: