In [2]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
import ipywidgets as widgets
from IPython.display import display, clear_output
from operator import itemgetter
import unidecode
sys.path.append('src')
from ocr import page, words
from ocr.normalization import word_normalization, letter_normalization
from ocr.tfhelpers import Model
from ocr.datahelpers import idx2char
from ocr.helpers import implt, resize
plt.rcParams['figure.figsize'] = (12, 9)
In [3]:
IMG = "text1"
LANG = 'en'
CLASS = 53
In [4]:
image = cv2.cvtColor(cv2.imread("data/pages/%s.jpg" % IMG), cv2.COLOR_BGR2RGB)
implt(image)
In [5]:
crop = page.detection(image)
implt(crop)
bBoxes = words.detection(crop)
In [6]:
char_classifier = Model('models/char-clas/' + LANG + '/CharClassifier', operation='y_conv')
print("Successfully loaded.")
In [7]:
WORDS = []
with open('data/dictionaries/' + LANG + '_50k.txt') as f:
for line in f:
if LANG == 'en':
WORDS += [unidecode.unidecode(line.split(" ")[0])]
else:
WORDS += [line.split(" ")[0]]
In [8]:
print(WORDS[:10])
In [9]:
class Cycler:
""" Cycle through boxes, separate words """
height = 60
step = 2
def __init__(self, image, boxes, idx):
self.boxes = boxes # Array of bounding boxes
self.image = image # Whole image
self.index = idx # Index of current bounding box
self.nextImg()
def separateWord(self, img):
""" Separating word into letters """
implt(img, 'gray')
self.img = img
self.run = True
self.recWordFind('', 0)
# print(gaps)
# img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
# for gap in gaps:
# cv2.line(img, (gap,0),(gap,60),(0,255,0),1)
# implt(img)
def recWordFind(self, word, x):
if self.run:
if x + 10 < self.img.shape[1]:
gaps = self.findMax(x)
for gap in gaps:
if gap[1][0] > 0.7 and (gap[0].islower() or x == 0):
self.recWordFind(word+gap[0], x+gap[1][1])
else:
if word.lower() in WORDS:
# print('RESULT:')
# self.run = False
print(word)
def findMax(self, x):
""" Find max letter from given position x """
offset = 6 # Test different values
idx = 0
idx += offset
d = {}
for i in range(CLASS):
d[idx2char(i)] = [0, 0]
while (x + idx <= self.img.shape[1] and idx <= 120):
crop = self.img[:, x:x + idx]
char, dim = letter_normalization(crop, is_thresh=True, dim=True)
if dim[0] > 4 and dim[1] > 4:
values = char_classifier.run([char.flatten()])
sm = self.softmax(values)[0]
l = idx2char(np.argmax(sm))
d[l] = d[l] if d[l][0] > max(sm) else [max(sm), idx]
idx += 2
top3 = sorted(d.items(), key=itemgetter(1), reverse=True)[:5]
#print(top3)
#print()
return top3
def softmax(self, x):
""" Compute softmax values for each sets of scores in x """
return np.exp(x) / np.sum(np.exp(x), axis=1)
def nextImg(self, btn=None):
""" Getting next image from the array """
clear_output()
if self.index < len(self.boxes):
b = self.boxes[self.index]
x1, y1, x2, y2 = b
# Cuting out the word image and resizing to standard height
img = resize(self.image[y1:y2, x1:x2], self.height, True)
implt(img, t='Original')
self.separateWord(
word_normalization(
img,
self.height,
border=False,
tilt=True,
hyst_norm=True))
# Printing index for recovery
print("Index: " + str(self.index))
# Create button for cycling through images
bNexi = widgets.Button(description="Next Image")
bNexi.on_click(self.nextImg)
display(bNexi)
self.index += 1
return 0
else:
print("END")
return -1
In [10]:
LAST_INDEX = 10
cycler = Cycler(crop, bBoxes, LAST_INDEX)