In [ ]:
%load_ext autoreload
%autoreload 1
%aimport TableRecognition
import cv2
from matplotlib import pyplot as plt
import numpy as np
import time as t
import cv_algorithms
import collections
import operator
import scipy.signal
import scipy.spatial.distance
import TableRecognition
from UliEngineering.SignalProcessing.Selection import *
print ("OpenCV Version : %s " % cv2.__version__)

Configuration


In [ ]:
# Run './download-oldweather.py Northwind 1947' to get the files
filename = "Northwind/Northwind-WAG-282-1947-0063-0.jpg"

Image preprocessing


In [ ]:
%matplotlib inline
img = cv2.imread(filename, flags=cv2.IMREAD_COLOR)
if img is None:
    raise ValueError("File {0} does not exist".format(filename))
imgGrey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
imgThresh = cv2.threshold(imgGrey, 150, 255, cv2.THRESH_BINARY_INV)[1]
imgThreshInv = cv2.threshold(imgGrey, 150, 255, cv2.THRESH_BINARY)[1]

imgDil = cv2.dilate(imgThresh, np.ones((5, 5), np.uint8))
imgEro = cv2.erode(imgDil, np.ones((4, 4), np.uint8))

plt.gcf().set_size_inches(20,18)
plt.imshow(imgDil, cmap="Greys_r")

In [ ]:
#
ix = img.copy()
contour_analyzer = TableRecognition.ContourAnalyzer(imgDil)
contour_analyzer.filter_contours(min_area=400)
contour_analyzer.build_graph()
contour_analyzer.remove_non_table_nodes()
contour_analyzer.compute_contour_bounding_boxes()
contour_analyzer.separate_supernode()
print(contour_analyzer.does_page_have_valid_table())
contour_analyzer.find_empty_cells(imgThreshInv)
contour_analyzer.visualize_contours(ix)

plt.gcf().set_size_inches(20,18)
plt.imshow(ix)

#cv2.imwrite("/ram/Northwind-1.png", ix)

Find bounding box corner clusters


In [ ]:
contour_analyzer.find_corner_clusters()

In [ ]:
%matplotlib inline
imgCopy = img.copy()
plt.gcf().set_size_inches(20,18)
contour_analyzer.visualize_corner_clusters(imgCopy)
plt.imshow(imgCopy)
plt.title("Corner color shows number of merged nodes")

Compute cells from contours and clusters


In [ ]:
%matplotlib inline
contour_analyzer.compute_cell_hulls()

Recompute cell boundaries based on nodes & compute table angle


In [ ]:
#### %matplotlib inline
ix = img.copy()
plt.gcf().set_size_inches(20,18)

contour_analyzer.draw_all_cell_hulls(ix, xscale=0.8, yscale=0.8)
plt.imshow(ix)
#cv2.imwrite("/ram/ATR.png", ix)

Find missing cells by masking


In [ ]:
ix = img.copy()

contour_analyzer.find_fine_table_corners()
missing_contours = contour_analyzer.compute_filtered_missing_cell_contours()

icm = cv2.drawContours(ix, missing_contours, -1, (0, 255, 0), 3)

plt.gcf().set_size_inches(20,18)
plt.imshow(ix, cmap="Greys_r")

Insert clusters into main contours & perform second run


In [ ]:
contour_analyzer.contours += missing_contours

contour_analyzer.compute_contour_bounding_boxes()
contour_analyzer.find_empty_cells(imgThreshInv)

contour_analyzer.find_corner_clusters()
contour_analyzer.compute_cell_hulls()
contour_analyzer.find_fine_table_corners()

Find cluster centers and group into hgroups and vgroups


In [ ]:
contour_analyzer.compute_table_coordinates(5.)

In [ ]:
ix = img.copy()

contour_analyzer.draw_table_coord_cell_hulls(ix, xscale=.9, yscale=.9)

plt.gcf().set_size_inches(20,18)
plt.imshow(ix)
#cv2.imwrite("/ram/ATR2.png", ix)

Extract table cell image


In [ ]:
plt.gcf().set_size_inches(20,18)

#ix = cv2.cvtColor(ix, cv2.COLOR_GRAY2BGR)
ix = img.copy()
ix = contour_analyzer.extract_cell_from_image(ix, (14,6))

plt.imshow(ix, cmap="Greys_r")
#cv2.imwrite("/ram/cell.png", ix)

In [ ]:
plt.gcf().set_size_inches(20,18)

ix = img.copy()
ix = contour_analyzer.extract_cell_from_image(ix, (14,6), xscale=1, yscale=1, mark_color=None)
plt.imshow(ix, cmap="Greys_r")
cv2.imwrite("/ram/airwetbulb.png", ix)

In [ ]: