验证码识别 简单版本


In [1]:
import time
import os
from multiprocessing import Pool

from captcha.image import ImageCaptcha
import numpy as np
import skimage.io as io
import tensorflow as tf

import matplotlib.pylab as plt
%matplotlib inline

生成验证码


In [2]:
IMG_H = 64
IMG_W = 160
IMG_CHANNALS = 1
CAPTCHA_SIZE = 4
CAPTCHA_NUM =  36
N_CLASSES = CAPTCHA_SIZE * CAPTCHA_NUM

In [15]:
# 生成验证码,大小64*160, 灰色
def gen_baptcha(text):
    image = ImageCaptcha()
    img = image.generate_image(text)
    img = img.convert("L").resize([IMG_W, IMG_H])
    ret = np.array(img,dtype=np.uint8).reshape([IMG_H,IMG_W,1])
    return ret


def text_2_label(text):
    key_list = list('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    value_list = np.eye(CAPTCHA_NUM, dtype=np.int32).tolist()
    label_dict = dict(zip(key_list, value_list))
    label_ = map(lambda t: label_dict[t], list(text.upper()))
    ret = np.array(label_, dtype=np.uint8).flatten()
    return ret


def label_2_text(label):
    key_list = list('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    ret_list = [key_list[t] for t in label.reshape([CAPTCHA_SIZE,CAPTCHA_NUM]).argmax(axis=1)]
    return ''.join(ret_list)

def get_data(batch_size):
    char_set = list('1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
    text_arr = np.random.choice(char_set, batch_size * CAPTCHA_SIZE, replace=True)
    text_list = [''.join(t) for t in np.split(text_arr, batch_size)]
    images = np.asarray([gen_baptcha(text) for text in text_list], dtype=np.float32)
    labels = np.asarray([text_2_label(text) for text in text_list], dtype=np.int32)
    return images, labels

In [18]:
def test_plot():
    nr,nc = 10, 5
    batch = nr*nc
    images,labels = get_data(batch)
    plt.figure(figsize=(12,5))
    for i in range(batch):
        plt.subplot(nr,nc,i+1)
        plt.axis("off")
        plt.subplots_adjust(top=1.5)
        plt.imshow(images[i,:,:,0])
    plt.show()      
    return images,labels

images, labels = test_plot()



In [19]:
label_2_text(labels[0])


Out[19]:
'WDZT'

模型


In [20]:
def interface(x):
    with tf.name_scope("conv-1"):
        w = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=0.01))
        b = tf.Variable(tf.constant(0., shape=[32]))
        x = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
        x = tf.nn.bias_add(x, b)
        m,v = tf.nn.moments(x,[0])
        x = tf.nn.batch_normalization(x, mean=m, variance=v, offset=None, scale=None, variance_epsilon=1e-6)
        x = tf.nn.relu(x)
    
    with tf.name_scope("pool-1"):
        x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1],padding='SAME')

    with tf.name_scope("conv-2"):
        w = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=0.01))
        b = tf.Variable(tf.constant(0., shape=[64]))
        x = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
        x = tf.nn.bias_add(x, b)
        m,v = tf.nn.moments(x,[0])
        x = tf.nn.batch_normalization(x, mean=m, variance=v, offset=None, scale=None, variance_epsilon=1e-6)
        x = tf.nn.relu(x)
        
    with tf.name_scope("pool-2"):
        x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1],padding='SAME')

    with tf.name_scope("conv-3"):
        w = tf.Variable(tf.random_normal([3, 3, 64, 64], stddev=0.01))
        b = tf.Variable(tf.constant(0., shape=[64]))
        x = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
        x = tf.nn.bias_add(x, b)
        m,v = tf.nn.moments(x,[0])
        x = tf.nn.batch_normalization(x, mean=m, variance=v, offset=None, scale=None, variance_epsilon=1e-6)
        x = tf.nn.relu(x)
        
    with tf.name_scope("pool-3"):
        x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1],padding='SAME') # N * 8 * 20 * 64
        
    with tf.name_scope("fc-4"):
        shape = x.get_shape()
        size = shape[1].value * shape[2].value * shape[3].value
        x = tf.reshape(x, [-1, size])
        w = tf.Variable(tf.random_normal([size, 1024], stddev=0.01))
        b = tf.Variable(tf.constant(0., shape=[1024]))
        x = tf.matmul(x, w) + b
        m,v = tf.nn.moments(x,[0])
        x = tf.nn.batch_normalization(x, mean=m, variance=v, offset=None, scale=None, variance_epsilon=1e-6)
        x = tf.nn.relu(x)

    with tf.name_scope("fc-5"):
        w = tf.Variable(tf.random_normal([1024, N_CLASSES], stddev=0.01))
        b = tf.Variable(tf.constant(0., shape=[N_CLASSES]))
        x = tf.matmul(x, w) + b

    return x

模型一共有36.5万参数。


In [29]:
MAX_STEP = 500 #100000
BATCH_SIZE = 64

def train():
    x = tf.placeholder(tf.float32, [BATCH_SIZE,IMG_H, IMG_W, IMG_CHANNALS])
    y = tf.placeholder(tf.int32, [BATCH_SIZE, N_CLASSES])

    x_ = x/255.0 - 0.5   # 归一化
    
    logits = interface(x_)
    logits_ = tf.reshape(logits, [-1, CAPTCHA_SIZE, CAPTCHA_NUM])
    labels_ = tf.reshape(y, [-1, CAPTCHA_SIZE, CAPTCHA_NUM])

    with tf.name_scope('loss'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits_, labels=labels_, dim=-1)
        loss = tf.reduce_mean(cross_entropy, name="loss")

    with tf.name_scope("accuracy"):
        correct = tf.equal(tf.argmax(logits_, -1), tf.argmax(labels_, -1))
        accuracy_one = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy_one")
        correct_all = tf.reduce_all(correct, axis=-1)
        accuracy_all = tf.reduce_mean(tf.cast(correct_all, tf.float32), name="accuracy_all")

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    for i in range(MAX_STEP):
        x_bt, y_bt = get_data(BATCH_SIZE)       
        _ = sess.run(optimizer, feed_dict={x: x_bt, y:y_bt})
        if i % 100 == 0 or (i+1) == MAX_STEP:
            x_bt, y_bt = get_data(BATCH_SIZE)
            acc_one, acc_all = sess.run([accuracy_one, accuracy_all], feed_dict={x: x_bt, y: y_bt})
            print "step: %d, accuracy: %.4f | %.4f" % (i, acc_one, acc_all)
    sess.close()

In [30]:
train()


step: 0, accuracy: 0.0391 | 0.0000
step: 100, accuracy: 0.4609 | 0.0469
step: 200, accuracy: 0.6914 | 0.2500
step: 300, accuracy: 0.7109 | 0.2656
step: 400, accuracy: 0.8008 | 0.4844
step: 499, accuracy: 0.8438 | 0.5312

step vs accuracy

不同环境下模型精度

System MaxStep Time AccuracyOne AccuracyAll
cpu i7 10K 6.5 h 95.8% 81.3%
1 GTX1080 100K 2 h 15 m 99.2% 96.5%
2 GTX1080 100K 1 h 9 m 99.2% 96.5%
4 GTX1080 100K 31 m 99.0% 96.1%

In [ ]: