In [1]:
import time
import os
from multiprocessing import Pool
from captcha.image import ImageCaptcha
import numpy as np
import skimage.io as io
import tensorflow as tf
import matplotlib.pylab as plt
%matplotlib inline
In [2]:
IMG_H = 64
IMG_W = 160
IMG_CHANNALS = 1
CAPTCHA_SIZE = 4
CAPTCHA_NUM = 36
N_CLASSES = CAPTCHA_SIZE * CAPTCHA_NUM
In [15]:
# 生成验证码,大小64*160, 灰色
def gen_baptcha(text):
image = ImageCaptcha()
img = image.generate_image(text)
img = img.convert("L").resize([IMG_W, IMG_H])
ret = np.array(img,dtype=np.uint8).reshape([IMG_H,IMG_W,1])
return ret
def text_2_label(text):
key_list = list('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
value_list = np.eye(CAPTCHA_NUM, dtype=np.int32).tolist()
label_dict = dict(zip(key_list, value_list))
label_ = map(lambda t: label_dict[t], list(text.upper()))
ret = np.array(label_, dtype=np.uint8).flatten()
return ret
def label_2_text(label):
key_list = list('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
ret_list = [key_list[t] for t in label.reshape([CAPTCHA_SIZE,CAPTCHA_NUM]).argmax(axis=1)]
return ''.join(ret_list)
def get_data(batch_size):
char_set = list('1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
text_arr = np.random.choice(char_set, batch_size * CAPTCHA_SIZE, replace=True)
text_list = [''.join(t) for t in np.split(text_arr, batch_size)]
images = np.asarray([gen_baptcha(text) for text in text_list], dtype=np.float32)
labels = np.asarray([text_2_label(text) for text in text_list], dtype=np.int32)
return images, labels
In [18]:
def test_plot():
nr,nc = 10, 5
batch = nr*nc
images,labels = get_data(batch)
plt.figure(figsize=(12,5))
for i in range(batch):
plt.subplot(nr,nc,i+1)
plt.axis("off")
plt.subplots_adjust(top=1.5)
plt.imshow(images[i,:,:,0])
plt.show()
return images,labels
images, labels = test_plot()
In [19]:
label_2_text(labels[0])
Out[19]:
In [20]:
def interface(x):
with tf.name_scope("conv-1"):
w = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=0.01))
b = tf.Variable(tf.constant(0., shape=[32]))
x = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
m,v = tf.nn.moments(x,[0])
x = tf.nn.batch_normalization(x, mean=m, variance=v, offset=None, scale=None, variance_epsilon=1e-6)
x = tf.nn.relu(x)
with tf.name_scope("pool-1"):
x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1],padding='SAME')
with tf.name_scope("conv-2"):
w = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=0.01))
b = tf.Variable(tf.constant(0., shape=[64]))
x = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
m,v = tf.nn.moments(x,[0])
x = tf.nn.batch_normalization(x, mean=m, variance=v, offset=None, scale=None, variance_epsilon=1e-6)
x = tf.nn.relu(x)
with tf.name_scope("pool-2"):
x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1],padding='SAME')
with tf.name_scope("conv-3"):
w = tf.Variable(tf.random_normal([3, 3, 64, 64], stddev=0.01))
b = tf.Variable(tf.constant(0., shape=[64]))
x = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
m,v = tf.nn.moments(x,[0])
x = tf.nn.batch_normalization(x, mean=m, variance=v, offset=None, scale=None, variance_epsilon=1e-6)
x = tf.nn.relu(x)
with tf.name_scope("pool-3"):
x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1],padding='SAME') # N * 8 * 20 * 64
with tf.name_scope("fc-4"):
shape = x.get_shape()
size = shape[1].value * shape[2].value * shape[3].value
x = tf.reshape(x, [-1, size])
w = tf.Variable(tf.random_normal([size, 1024], stddev=0.01))
b = tf.Variable(tf.constant(0., shape=[1024]))
x = tf.matmul(x, w) + b
m,v = tf.nn.moments(x,[0])
x = tf.nn.batch_normalization(x, mean=m, variance=v, offset=None, scale=None, variance_epsilon=1e-6)
x = tf.nn.relu(x)
with tf.name_scope("fc-5"):
w = tf.Variable(tf.random_normal([1024, N_CLASSES], stddev=0.01))
b = tf.Variable(tf.constant(0., shape=[N_CLASSES]))
x = tf.matmul(x, w) + b
return x
模型一共有36.5万参数。
In [29]:
MAX_STEP = 500 #100000
BATCH_SIZE = 64
def train():
x = tf.placeholder(tf.float32, [BATCH_SIZE,IMG_H, IMG_W, IMG_CHANNALS])
y = tf.placeholder(tf.int32, [BATCH_SIZE, N_CLASSES])
x_ = x/255.0 - 0.5 # 归一化
logits = interface(x_)
logits_ = tf.reshape(logits, [-1, CAPTCHA_SIZE, CAPTCHA_NUM])
labels_ = tf.reshape(y, [-1, CAPTCHA_SIZE, CAPTCHA_NUM])
with tf.name_scope('loss'):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits_, labels=labels_, dim=-1)
loss = tf.reduce_mean(cross_entropy, name="loss")
with tf.name_scope("accuracy"):
correct = tf.equal(tf.argmax(logits_, -1), tf.argmax(labels_, -1))
accuracy_one = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy_one")
correct_all = tf.reduce_all(correct, axis=-1)
accuracy_all = tf.reduce_mean(tf.cast(correct_all, tf.float32), name="accuracy_all")
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for i in range(MAX_STEP):
x_bt, y_bt = get_data(BATCH_SIZE)
_ = sess.run(optimizer, feed_dict={x: x_bt, y:y_bt})
if i % 100 == 0 or (i+1) == MAX_STEP:
x_bt, y_bt = get_data(BATCH_SIZE)
acc_one, acc_all = sess.run([accuracy_one, accuracy_all], feed_dict={x: x_bt, y: y_bt})
print "step: %d, accuracy: %.4f | %.4f" % (i, acc_one, acc_all)
sess.close()
In [30]:
train()
step vs accuracy
不同环境下模型精度
System | MaxStep | Time | AccuracyOne | AccuracyAll |
---|---|---|---|---|
cpu i7 | 10K | 6.5 h | 95.8% | 81.3% |
1 GTX1080 | 100K | 2 h 15 m | 99.2% | 96.5% |
2 GTX1080 | 100K | 1 h 9 m | 99.2% | 96.5% |
4 GTX1080 | 100K | 31 m | 99.0% | 96.1% |
In [ ]: