In [12]:
def corrode(img,o):
size = img.size
nimg = Image.new('1',size,255)
mat = img.load()
nmat = nimg.load()
for i in range(o[0],size[0]-o[1]):
for j in range(o[2],size[1]-o[3]):
nmat[i,j] = 0
for k in range(i - o[0],i + o[1] + 1):
for l in range(j - o[2],j + o[3] + 1):
if mat[k,l] == 255:
nmat[i,j] = 255
return nimg
def expand(image,o):
size = img.size
nimg = Image.new('1',size,255)
mat = img.load()
nmat = nimg.load()
for i in range(o[0],size[0]-o[1]):
for j in range(o[2],size[1]-o[3]):
if mat[i,j] == 0:
for k in range(i - o[0],i + o[1] + 1):
for l in range(j - o[2],j + o[3] + 1):
nmat[i,j] = 0
return nimg
def gray(img,k):
size = img.size
nimg = Image.new('L',size,255)
mat = img.load()
nmat = nimg.load()
for i in range(size[0]):
for j in range(size[1]):
pix = mat[i,j]
nmat[i,j] = pix[0] * k[0] + pix[1] * k[1] + pix[2] * k[2]
# if pix[0] * k[0] + pix[1] * k[1] + pix[2] * k[2] >= 128:
# nmat[i,j] = 255
# else:
# nmat[i,j] = 0
return nimg
In [2]:
from PIL import Image
im = imread('CheckCode.gif')
img = Image.fromarray(im)
#img = img.convert('1')
#nimg = gray(img,(0.3,0.59,0.11))
nimg = gray(img,(1,1,-1))
img = nimg.convert('1')
o = (0,1,0,1)
n1 = corrode(img,o)
n2 = expand(img,o)
n3 = expand(n1,o)
n4 = corrode(n2,o)
imshow(nimg)
Out[2]:
In [28]:
imshow(n1)
Out[28]:
In [29]:
imshow(n2)
Out[29]:
In [30]:
imshow(n3)
Out[30]:
In [31]:
imshow(n4)
Out[31]:
In [19]:
from sklearn import cluster, datasets
from PIL import Image
im = imread('CheckCode.gif')
size = (len(im[0]),len(im))
l = []
for i in range(size[1]):
for j in range(size[0]):
l.append(list(im[i][j]) + [i] + [j])
iris = datasets.load_iris()
k_means = cluster.KMeans(n_clusters = 2)
k_means.fit(l)
img = Image.new('1',size)
mat = img.load()
for i in range(size[1]):
for j in range(size[0]):
mat[j,i] = k_means.labels_[i*size[0]+j] * 255
imshow(img)
Out[19]:
In [25]:
from PIL import Image
import urllib.request
from io import BytesIO
f = urllib.request.urlopen('http://xk.suda.edu.cn/CheckCode.aspx')
s = BytesIO(f.read())
im = imread(s,format='gif')
subplot(2,1,1)
imshow(Image.fromarray(im))
size = (len(im[0]),len(im))
for i in range(size[1]):
for j in range(size[0]):
if tuple(im[i,j]) == (0,0,153,255):
im[i][j] = [0,0,0,255]
else:
im[i][j] = [255,255,255,255]
img = Image.fromarray(im)
subplot(2,1,2)
imshow(img)
Out[25]:
In [26]:
def shadow(mat,shape):
l = []
for i in range(shape[1]):
c = 0
for j in range(shape[0]):
if mat[j,i][0] == 0:
c = c + 1
l.append(c)
return l
l = shadow(im,im.shape)
sub = []
last = 0
for i in range(len(l)):
if l[i] == 0:
if i - last > 1:
sub.append(Image.fromarray(im[:,last:i+1]).convert('1'))
last = i
c = 1
for i in sub:
subplot(1,4,c)
c = c + 1
imshow(i)
In [27]:
def shadow(mat,shape):
l = []
for i in range(shape[0]):
c = 0
for j in range(shape[1]):
if mat[i,j] == 0:
c = c + 1
l.append(c)
return l
font = []
c = 1
for img in sub:
subplot(2,4,c)
imshow(img)
minl = 100
img = img.point(lambda i:255-i)
for i in range(-10,10):
t = img.rotate(i,expand=1)
t = t.point(lambda i:255-i)
size = t.size
l = shadow(t.load(),size)
for i in range(size[0]):
if l[i] > 0:
break
for j in range(size[0] - 1,-1,-1):
if l[j] > 0:
break
if j - i < minl:
minp = t
minl = j - i
font.append(minp)
subplot(2,4,c+4)
c = c + 1
imshow(minp)
In [62]:
import pyocr
tool = pyocr.get_available_tools()[0]
for i in font:
print(tool.image_to_string(i))
In [28]:
import os
res = []
for i in font:
i.convert('RGB').save('tmp.gif')
os.system('tesseract tmp.gif tmp -psm 10 tesseract_config')
with open('tmp.txt','r') as f:
res.append(f.read()[0])
for i in res:
print(i)