In [1]:
import os
import math
import itertools as it
import numpy as np
import cv2
import pyocr
import matplotlib.cm as cm
from matplotlib import pyplot as plt
from PIL import Image
%matplotlib inline
In [2]:
## Set dir
#os.chdir('/home/vtm/Desktop/Captchas')
In [164]:
## Load image and add white border
imgInput = cv2.imread('TestData/captcha (02).jpg')
imgInput = cv2.cvtColor(imgInput, cv2.COLOR_BGR2BGRA)
imgInput = cv2.copyMakeBorder(imgInput, 10, 10, 10, 10,cv2.BORDER_CONSTANT,value = [255, 255, 255])
## Draw
plt.imshow(imgInput)
Out[164]:
In [165]:
## Create a Blank Image
imgBlank = np.zeros_like(imgInput)
## Draw
plt.imshow(imgBlank)
Out[165]:
In [166]:
## Convert to Grayscale
imgGray = cv2.cvtColor(imgInput, cv2.COLOR_BGR2GRAY)
## Draws
plt.imshow(imgGray, cmap=cm.gray)
Out[166]:
In [167]:
## Use Threshold to convert to B&W
ret,imgBW = cv2.threshold(imgGray, 230, 255, cv2.THRESH_BINARY)
## Draw
plt.imshow(imgBW, cmap=cm.gray)
Out[167]:
In [168]:
## HSV
imgInputHSV = cv2.cvtColor(imgNoNoise, cv2.COLOR_BGR2HSV)
imgHue = imgInputHSV[:,:,0]
imgNoNoiseGray = cv2.cvtColor(imgNoNoise, cv2.COLOR_BGR2GRAY)
ret3,th3 = cv2.threshold(imgHue, 0, 180, cv2.THRESH_BINARY)
colorsCount = np.bincount(imgNoNoiseGray.ravel())
colorsUniq = np.nonzero(colorsCount)[0]
colorsDict = dict(zip(colorsUniq, colorsCount))
sorted(np.vstack((uniq,bins[uniq])).T, key = lambda x: x[1], reverse = True)
#plt.hist(imgNoNoiseGray.ravel(), 100, [5, 254])
plt.imshow(imgNoNoiseGray)
In [169]:
## Remove Noise using filter 5x5 and Threshold
kernel = np.ones((3, 3), np.float32) / 9
dst = cv2.filter2D(imgBW, -1, kernel)
ret, imgNoise = cv2.threshold(dst, 30, 255, cv2.THRESH_BINARY)
imgNoiseMask = cv2.bitwise_not(imgNoise)
## Draw
plt.imshow(imgNoiseMask, cmap = cm.gray)
Out[169]:
In [170]:
## Clean image
imgNoNoise = cv2.bitwise_and(imgInput, imgInput, mask = imgNoiseMask)
imgNoNoise += cv2.cvtColor(imgNoise, cv2.COLOR_GRAY2BGRA)
## Draw
plt.imshow(imgNoNoise)
Out[170]:
In [181]:
## K-Means
imgNoNoiseRGB = cv2.cvtColor(imgNoNoise, cv2.COLOR_BGRA2BGR)
Z = imgNoNoiseRGB.reshape((-1,3))
# convert to np.float32
Z = np.float32(Z)
# define criteria, number of clusters(K) and apply kmeans()
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
K = 6
ret,label,center=cv2.kmeans(Z,K,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
# Now convert back into uint8, and make original image
center = np.uint8(center)
res = center[label.flatten()]
res2 = res.reshape(imgNoNoiseRGB.shape)
colorsCount = np.bincount(label.ravel())
colorsUniq = np.nonzero(colorsCount)[0]
print np.vstack((colorsUniq, colorsCount[colorsUniq])).T
cv2.imwrite('out.png', res2)
plt.imshow(res2)
Out[181]:
In [189]:
## K-Means 2
for i in np.unique(label):
KMeansMask = imgBW.copy()
labelReshaped = label.reshape(KMeansMask.shape)
KMeansMask[labelReshaped != i] = 255
plt.subplot((100 * math.ceil(K / 2.0)) + 20 + i)
#plt.imshow(KMeansMask, cmap = cm.gray)
## Contours
cnts, hierarchy = cv2.findContours(KMeansMask.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnts = list(it.compress(cnts, (hierarchy[:, :, 3] == 0)[0])) ## Keep only the ones that have the root as parent
cnts = filter(lambda x:cv2.contourArea(x) > 100, cnts) ## Remove the small ones
## Draw Contours
cntsImg = imgInput.copy()
cv2.drawContours(cntsImg, cnts, -1, (255,0,128, 255), 2)
plt.imshow(cntsImg)
plt.show()
In [96]:
## Find Contourns
cnts, hierarchy = cv2.findContours(imgNoise.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnts = list(it.compress(cnts, (hierarchy[:, :, 3] == 0)[0])) ## Keep only the ones that have the root as parent
cnts = filter(lambda x:cv2.contourArea(x) > 100, cnts) ## Remove the small ones
cnts = sorted(cnts, key = lambda x: min([p[0][0] for p in x])) ## Sort by min(X)
## Draw
cntsImg = imgInput.copy()
cv2.drawContours(cntsImg, cnts, -1, (255,0,128, 255), 2)
plt.imshow(cntsImg)
Out[96]:
In [97]:
## Extract contours images
imgCnts = []
for i in range(len(cnts)):
## Create contour maks
mask = cv2.cvtColor(imgBlank.copy(), cv2.COLOR_BGRA2GRAY)
cv2.drawContours(mask, cnts, i, 255, -1)
## Apply mask to Input Img
out = imgBlank.copy()
out[mask == 255] = imgNoNoise[mask == 255]
imgCnts.append(out)
## Draw
imgCntsLen = len(imgCnts)
for i, img in enumerate(imgCnts):
plt.subplot((100 * math.ceil(imgCntsLen / 2.0)) + 20 + i)
plt.imshow(img)
plt.show()
In [98]:
## Find minimal area rectangle arround the letter
rects = map(cv2.minAreaRect, cnts)
angles = [a for x, y, a in rects]
bRects = map(cv2.boundingRect, cnts)
boxs = map(cv2.cv.BoxPoints, rects)
boxsInt = [np.int0(np.around(box)) for box in boxs]
## Draw
rectsLen = len(rects)
for i in range(rectsLen):
cntImg = imgInput.copy()
plt.subplot((100 * math.ceil(rectsLen / 2.0)) + 20 + i)
cv2.drawContours(cntImg, boxsInt, i, (255, 0, 0, 255), 2)
plt.imshow(cntImg)
plt.show()
In [99]:
## Crops contours
imgContoursCrops = [img[y:y+h, x:x+w] for (x, y, w, h), img in zip(bRects, imgCnts)]
## Draw
cropsLen = len(imgContoursCrops)
for i, crop in enumerate(imgContoursCrops):
plt.subplot((100 * math.ceil(cropsLen / 2.0)) + 20 + i)
plt.imshow(crop)
plt.show()
In [100]:
## Rotate the crops to the Right Angle
imgCropsNormAngle = []
for crop, angle in zip(imgContoursCrops, angles):
img = cv2.copyMakeBorder(crop, 10, 10, 10, 10,cv2.BORDER_CONSTANT,value = [255, 255, 255, 255])
rows, cols, ch = img.shape
a = min(90 + angle, abs(angle)) * (1 if abs(angle) > 45 else -1)
M = cv2.getRotationMatrix2D((cols/2,rows/2), a, 1)
dst = cv2.warpAffine(img, M, (cols,rows))
imgCropsNormAngle.append(dst)
## Draw
cropsLen = len(imgCropsNormAngle)
for i, crop in enumerate(imgCropsNormAngle):
plt.subplot((100 * math.ceil(cropsLen / 2.0)) + 20 + i)
plt.imshow(crop)
plt.show()
In [101]:
## Threshold crops to B&W
imgsOut = []
for img in imgCropsNormAngle:
alpha = img[:, :, 3]
img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
img[alpha < 250] = 255
ret,imgBW = cv2.threshold(img, 230, 255, cv2.THRESH_BINARY)
imgsOut.append(imgBW)
## Draw
outLen = len(imgsOut)
for i, img in enumerate(imgsOut):
plt.subplot((100 * math.ceil(cropsLen / 3.0)) + 30 + i)
plt.imshow(img, cmap = cm.gray)
plt.show()
In [102]:
ocr = pyocr.tesseract
ocrBuilder = pyocr.tesseract.builders.TextBuilder()
ocrLang = 'eng'
ocrBuilder.tesseract_configs[1] = '10'
ocrBuilder.tesseract_configs.append('letters')
imgsOutPIL = [Image.fromarray(img) for img in imgsOut]
lettersOut = [ocr.image_to_string(img, lang = ocrLang, builder = ocrBuilder) for img in imgsOutPIL]
print lettersOut
## Draw
outLen = len(imgsOut)
for i, img, l in zip(range(outLen), imgsOut, lettersOut):
plt.subplot((100 * math.ceil(cropsLen / 3.0)) + 30 + i)
plt.imshow(img, cmap = cm.gray)
plt.title(l)
plt.show()
In [589]:
print ''.join(lettersOut)
plt.imshow(imgInput)
Out[589]: