In [1]:
import os
import uuid
import random
import cv2
import numpy as np
from captcha.image import ImageCaptcha
import matplotlib.pyplot as plt
from helpers import corpus, corpus_len, show_img, prepare_data, one_hot_to_label, showcase
%matplotlib inline
In [2]:
train_data = np.load('./data/data.npz')
train_X=train_data['X']
train_y=train_data['y']
print('shape X {0}, y {1}'.format(train_X.shape, train_y.shape))
In [3]:
validate_data = np.load('./data/val.npz')
validate_X=validate_data['X']
validate_y=validate_data['y']
print('shape X {0}, y {1}'.format(validate_X.shape, validate_y.shape))
In [4]:
test_data = np.load('./data/test.npz')
test_X=test_data['X']
test_y=test_data['y']
print('shape X {0}, y {1}'.format(test_X.shape, test_y.shape))
In [5]:
train_X, train_y = prepare_data(train_X, train_y)
validate_X, validate_y = prepare_data(validate_X, validate_y)
test_X, test_y = prepare_data(test_X, test_y)
In [6]:
sample = random.randint(0, train_X.shape[0] - 1)
show_img(train_X[sample])
print('label is "{0}"'.format(one_hot_to_label(train_y[sample])))
In [7]:
import tensorflow as tf
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Flatten, Activation
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import BatchNormalization, Dropout
from keras.optimizers import SGD, Adam
In [8]:
m1 = Sequential()
m1.add(Flatten(input_shape=(30, 30)))
m1.add(Dense(26))
m1.add(Activation('softmax'))
m1.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
m1.summary()
In [9]:
m1.fit(train_X, train_y,
validation_data=(validate_X, validate_y),
epochs=10,
batch_size=64,
verbose=1)
Out[9]:
In [10]:
m1_h = m1.predict(test_X)
showcase(test_X, test_y, m1_h, case_num=10)
In [11]:
mk = Sequential()
mk.add(Flatten(input_shape=(30, 30)))
mk.add(Dense(900))
mk.add(BatchNormalization())
mk.add(Dropout(0.5))
mk.add(Activation('relu'))
mk.add(Dense(200))
mk.add(BatchNormalization())
mk.add(Dropout(0.5))
mk.add(Activation('relu'))
mk.add(Dense(26))
mk.add(Activation('softmax'))
mk.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
mk.summary()
In [12]:
mk.fit(train_X, train_y,
validation_data=(validate_X, validate_y),
epochs=10,
batch_size=64,
verbose=1)
Out[12]:
In [21]:
mk_h = mk.predict(test_X)
In [22]:
showcase(test_X, test_y, mk_h, case_num=10)
In [23]:
mc = Sequential()
mc.add(Convolution2D(64, kernel_size=3, strides=(1, 1), padding='valid', input_shape=(30, 30, 1)))
mc.add(BatchNormalization())
mc.add(Dropout(0.5))
mc.add(Activation('relu'))
mc.add(MaxPooling2D(pool_size=(2, 2), padding='valid'))
mc.add(Flatten())
mc.add(Dense(200))
mc.add(BatchNormalization())
mc.add(Dropout(0.5))
mc.add(Activation('relu'))
mc.add(Dense(26))
mc.add(Activation('softmax'))
mc.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
mc.summary()
In [27]:
mc.fit(train_X.reshape((train_X.shape[0], 30, 30, 1)), train_y,
validation_data=(validate_X.reshape((validate_X.shape[0], 30, 30, 1)), validate_y),
epochs=10,
batch_size=64,
verbose=1)
Out[27]:
In [28]:
mc_h = mc.predict(test_X.reshape((test_X.shape[0], 30, 30, 1)))
In [29]:
showcase(test_X, test_y, mc_h, case_num=10)
In [30]:
feature_1 = m1.predict(train_X)
feature_2 = mk.predict(train_X)
feature_3 = mc.predict(train_X.reshape(train_X.shape[0], 30, 30, 1))
In [31]:
feature_X = np.ndarray((train_X.shape[0], corpus_len*3), dtype=np.float32)
feature_X.shape
Out[31]:
In [32]:
def combine_feature(X):
f1 = m1.predict(X)
f2 = mk.predict(X)
f3 = mc.predict(X.reshape(X.shape[0], X.shape[1], X.shape[2], 1))
comp = np.ndarray((X.shape[0], corpus_len*3), dtype=np.float32)
comp[:, 0:corpus_len] = f1
comp[:,corpus_len:corpus_len*2] = f2
comp[:,corpus_len*2:corpus_len*3] = f3
return comp
feature_X = combine_feature(train_X)
print(feature_X[0])
In [33]:
me = Sequential()
me.add(Dense(26, input_shape=(78,)))
me.add(Activation('softmax'))
me.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
me.summary()
In [34]:
me.fit(feature_X, train_y,
validation_data=(combine_feature(validate_X), validate_y),
epochs=20,
batch_size=50,
verbose=1)
Out[34]:
In [35]:
me_h = me.predict(combine_feature(test_X))
In [36]:
showcase(test_X, test_y, me_h, case_num=10)
In [88]:
m1.save('./m1.h5')
mk.save('./mk.h5')
mc.save('./mc.h5')
me.save('./me.h5')
In [ ]: