# Word Detection

Finding bounding boxes of words

### TODO

``````- Percentage of intersection, fully intesecting rectangles...

``````

## Import Packages

``````

In [2]:

import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2

sys.path.append('../src')
from ocr.helpers import implt, resize, ratio
from ocr import page

%matplotlib inline
plt.rcParams['figure.figsize'] = (15.0, 10.0)

``````

### Global Variables

``````

In [3]:

IMG = "page09"    # Image name/number

``````

# Finding the text areas and words

``````

In [4]:

image = cv2.cvtColor(cv2.imread("../data/pages/%s.jpg" % IMG), cv2.COLOR_BGR2RGB)
image = page.detection(image)
img = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
implt(img, 'gray')

``````
``````

``````
``````

In [5]:

def sobel(channel):
""" The Sobel Operator"""
sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0)
sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1)
# Combine x, y gradient magnitudes sqrt(x^2 + y^2)
sobel = np.hypot(sobelX, sobelY)
sobel[sobel > 255] = 255
return np.uint8(sobel)

def edge_detect(im):
"""
Edge detection
The Sobel operator is applied for each image layer (RGB)
"""
return np.max(np.array([sobel(im[:,:, 0]), sobel(im[:,:, 1]), sobel(im[:,:, 2]) ]), axis=0)

# Image pre-processing - blur, edges, threshold, closing
blurred = cv2.GaussianBlur(image, (5, 5), 18)
edges = edge_detect(blurred)
ret, edges = cv2.threshold(edges, 50, 255, cv2.THRESH_BINARY)
bw_image = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, np.ones((20,20), np.uint8))

implt(edges, 'gray', 'Sobel operator')
implt(bw_image, 'gray', 'Final closing')

``````
``````

``````
``````

In [6]:

## NOT IN USE, NEED MORE WORK ##
def del_lines(gray):
""" Delete page lines """
linek = np.ones((1,11),np.uint8)
x = cv2.morphologyEx(gray, cv2.MORPH_OPEN, linek ,iterations=1)
i = gray-x
closing = cv2.morphologyEx(dil, cv2.MORPH_CLOSE, np.ones((17,17), np.uint8))
implt(closing, 'gray', 'Del Lines')
return closing

def del_big_areas(img):
""" Find and remove contours too big for a word """
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# ret, gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
implt(gray, 'gray')

gray2 = gray.copy()

im2, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

for cnt in contours:
if (200 < cv2.contourArea(cnt) < 5000):
cv2.drawContours(img,[cnt],0,(0,255,0),2)

implt(img)

``````

## Word Detection

``````

In [7]:

def union(a,b):
x = min(a[0], b[0])
y = min(a[1], b[1])
w = max(a[0]+a[2], b[0]+b[2]) - x
h = max(a[1]+a[3], b[1]+b[3]) - y
return [x, y, w, h]

def intersect(a,b):
x = max(a[0], b[0])
y = max(a[1], b[1])
w = min(a[0]+a[2], b[0]+b[2]) - x
h = min(a[1]+a[3], b[1]+b[3]) - y
if w<0 or h<0:
return False
return True

def group_rectangles(rec):
"""
Uion intersecting rectangles
Args:
rec - list of rectangles in form [x, y, w, h]
Return:
list of grouped ractangles
"""
tested = [False for i in range(len(rec))]
final = []
i = 0
while i < len(rec):
if not tested[i]:
j = i+1
while j < len(rec):
if not tested[j] and intersect(rec[i], rec[j]):
rec[i] = union(rec[i], rec[j])
tested[j] = True
j = i
j += 1
final += [rec[i]]
i += 1

return final

``````
``````

In [8]:

def text_detect(img, original):
""" Text detection using contours """
# Resize image
small = resize(img, 2000)
image = resize(original, 2000)

# Finding contours
im2, cnt, hierarchy = cv2.findContours(np.copy(small), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

implt(img, 'gray')

# Variables for contour index and words' bounding boxes
index = 0
boxes = []
# CCOMP hierarchy: [Next, Previous, First Child, Parent]
# cv2.RETR_CCOMP - contours into 2 levels
# Go through all contours in first level
while (index >= 0):
x,y,w,h = cv2.boundingRect(cnt[index])
# Get only the contour
cv2.drawContours(mask, cnt, index, (255, 255, 255), cv2.FILLED)
# Ratio of white pixels to area of bounding rectangle
r = cv2.countNonZero(maskROI) / (w * h)

# Limits for text (white pixel ratio, width, height)
# TODO Test h/w and w/h ratios
if r > 0.1 and 2000 > w > 10 and 1600 > h > 10 and h/w < 3 and w/h < 10:
boxes += [[x, y, w, h]]

# Index of next contour
index = hierarchy[0][index][0]

# Group intersecting rectangles
boxes = group_rectangles(boxes)
bounding_boxes = np.array([0,0,0,0])
for (x, y, w, h) in boxes:
cv2.rectangle(image, (x, y),(x+w,y+h), (0, 255, 0), 8)
bounding_boxes = np.vstack((bounding_boxes, np.array([x, y, x+w, y+h])))

implt(image, t='Bounding rectangles')

# Recalculate coordinates to original scale
boxes = bounding_boxes.dot(ratio(image, small.shape[0])).astype(np.int64)
return boxes[1:]

``````
``````

In [9]:

# Get text bounding boxes
boxes = text_detect(bw_image, image)
print("Number of boxes:", len(boxes))

``````
``````

Number of boxes: 4

``````

## Watershed Algorithm

``````

In [10]:

def textDetectWatershed(thresh, original):
""" Text detection using watershed algorithm """
# According to: http://docs.opencv.org/trunk/d3/db4/tutorial_py_watershed.html
img = resize(original, 3000)
thresh = resize(thresh, 3000)
# noise removal
kernel = np.ones((3,3),np.uint8)
opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 3)

# sure background area
sure_bg = cv2.dilate(opening,kernel,iterations=3)

# Finding sure foreground area
dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
ret, sure_fg = cv2.threshold(dist_transform,0.01*dist_transform.max(),255,0)

# Finding unknown region
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg,sure_fg)

# Marker labelling
ret, markers = cv2.connectedComponents(sure_fg)

# Add one to all labels so that sure background is not 0, but 1
markers += 1

# Now, mark the region of unknown with zero
markers[unknown == 255] = 0

markers = cv2.watershed(img, markers)
implt(markers, t='Markers')
image = img.copy()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Creating result array
boxes = []
for mark in np.unique(markers):
# mark == 0 --> background
if mark == 0:
continue

# Draw it on mask and detect biggest contour

c = max(cnts, key=cv2.contourArea)

# Draw a bounding rectangle if it contains text
x,y,w,h = cv2.boundingRect(c)
cv2.drawContours(mask, c, 0, (255, 255, 255), cv2.FILLED)
# Ratio of white pixels to area of bounding rectangle
r = cv2.countNonZero(maskROI) / (w * h)

# Limits for text
if r > 0.1 and 2000 > w > 15 and 1500 > h > 15:
boxes += [[x, y, w, h]]

# Group intersecting rectangles
boxes = group_rectangles(boxes)
bounding_boxes = np.array([0,0,0,0])
for (x, y, w, h) in boxes:
cv2.rectangle(image, (x, y),(x+w,y+h), (0, 255, 0), 8)
bounding_boxes = np.vstack((bounding_boxes, np.array([x, y, x+w, y+h])))

implt(image)

# Recalculate coordinates to original size
boxes = bounding_boxes.dot(ratio(original, img.shape[0])).astype(np.int64)
return boxes[1:]

``````
``````

In [11]:

# Testing different alg - Watershed
boxes = textDetectWatershed(bw_image, image)
print("Number of boxes:", len(boxes))

``````
``````

``````