In [1]:
import warnings
warnings.filterwarnings('ignore')
In [2]:
%matplotlib inline
%pylab inline
In [3]:
import matplotlib.pylab as plt
# https://docs.scipy.org/doc/numpy/reference/routines.math.html
import numpy as np
In [4]:
from datetime import tzinfo, timedelta, datetime
In [5]:
from distutils.version import StrictVersion
In [6]:
import sklearn
assert StrictVersion(sklearn.__version__ ) >= StrictVersion('0.18.1')
sklearn.__version__
Out[6]:
In [7]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
assert StrictVersion(tf.__version__) >= StrictVersion('1.1.0')
tf.__version__
Out[7]:
In [8]:
import keras
assert StrictVersion(keras.__version__) >= StrictVersion('2.0.0')
keras.__version__
Out[8]:
In [9]:
# !curl -O https://raw.githubusercontent.com/DJCordhose/speed-limit-signs/master/data/speed-limit-signs.zip
# !curl -O https://raw.githubusercontent.com/DJCordhose/speed-limit-signs/master/data/augmented-signs.zip
In [10]:
# https://docs.python.org/3/library/zipfile.html
# from zipfile import ZipFile
# zip = ZipFile(r'speed-limit-signs.zip')
# zip.extractall('.')
# zip = ZipFile(r'augmented-signs.zip')
# zip.extractall('.')
In [11]:
# !ls -l speed-limit-signs
In [12]:
# !ls -l augmented-signs
In [13]:
import os
import skimage.data
import skimage.transform
from keras.utils.np_utils import to_categorical
import numpy as np
def load_data(data_dir, type=".ppm"):
num_categories = 6
# Get all subdirectories of data_dir. Each represents a label.
directories = [d for d in os.listdir(data_dir)
if os.path.isdir(os.path.join(data_dir, d))]
# Loop through the label directories and collect the data in
# two lists, labels and images.
labels = []
images = []
for d in directories:
label_dir = os.path.join(data_dir, d)
file_names = [os.path.join(label_dir, f) for f in os.listdir(label_dir) if f.endswith(type)]
# For each label, load it's images and add them to the images list.
# And add the label number (i.e. directory name) to the labels list.
for f in file_names:
images.append(skimage.data.imread(f))
labels.append(int(d))
images64 = [skimage.transform.resize(image, (64, 64)) for image in images]
y = np.array(labels)
y = to_categorical(y, num_categories)
X = np.array(images64)
return X, y
In [14]:
# Load datasets.
ROOT_PATH = "./"
In [15]:
original_dir = os.path.join(ROOT_PATH, "speed-limit-signs")
original_images, original_labels = load_data(original_dir, type=".ppm")
In [16]:
data_dir = os.path.join(ROOT_PATH, "augmented-signs")
X, y = load_data(data_dir, type=".png")
In [17]:
from sklearn.model_selection import train_test_split
In [18]:
checkpoint_callback = keras.callbacks.ModelCheckpoint('../tmp/model-checkpoints/weights.epoch-{epoch:02d}-val_loss-{val_loss:.2f}.hdf5');
In [25]:
early_stopping_callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=300, verbose=1)
In [21]:
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tensorboard/README.md
# https://keras.io/callbacks/#tensorboard
# http://stackoverflow.com/questions/42112260/how-do-i-use-the-tensorboard-callback-of-keras
tb_callback = keras.callbacks.TensorBoard(log_dir='../tmp/tf_log')
# histogram_freq=1, write_graph=True, write_images=True)
# histogram_freq=1, write_graph=True, write_images=True)
# tbCallBack = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
# To start tensorboard
# tensorboard --logdir=/mnt/c/Users/olive/Development/ml/tf_log
# open http://localhost:6006
In [22]:
# we want to distribute our different classes equally over test and train, this works using stratify
# https://github.com/amueller/scipy-2017-sklearn/blob/master/notebooks/04.Training_and_Testing_Data.ipynb
# http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3, stratify=y, )
In [23]:
X_train.shape, y_train.shape
Out[23]:
In [24]:
from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Flatten, Input
from keras.layers import Convolution2D, MaxPooling2D
# drop_out = 0.9
# drop_out = 0.75
drop_out = 0.5
# drop_out = 0.25
# drop_out = 0.0
# input tensor for a 3-channel 64x64 image
inputs = Input(shape=(64, 64, 3))
# one block of convolutional layers
x = Convolution2D(64, 3, 3, activation='relu')(inputs)
# x = Dropout(drop_out)(x)
x = Convolution2D(64, 3, 3, activation='relu')(x)
# x = Dropout(drop_out)(x)
x = Convolution2D(64, 3, 3, activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(drop_out)(x)
# one more block
x = Convolution2D(128, 3, 3, activation='relu')(x)
# x = Dropout(drop_out)(x)
x = Convolution2D(128, 3, 3, activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(drop_out)(x)
# one more block
x = Convolution2D(256, 3, 3, activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(drop_out)(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(drop_out)(x)
# softmax activation, 6 categories
predictions = Dense(6, activation='softmax')(x)
model = Model(input=inputs, output=predictions)
model.summary()
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
In [26]:
!rm -rf ../tmp/tf_log
!rm -rf ../tmp/model-checkpoints
!mkdir ../tmp/model-checkpoints
!mkdir ../tmp/tf_log
In [ ]:
# Running on a GPU bach size might be critical depdendng on the GPU memory available
# more is desirable, but we might end up using 50 only
print(datetime.utcnow().isoformat())
# BE CAREFUL, validation data is always the last data sets and not shuffled
# https://keras.io/getting-started/faq/#how-is-the-validation-split-computed
model.fit(X_train, y_train, epochs=2000, batch_size=500, validation_split=0.3,
callbacks=[tb_callback, early_stopping_callback])
# callbacks=[tb_callback])
# model.fit(X_train, y_train, epochs=50, batch_size=200, validation_split=0.3)
print(datetime.utcnow().isoformat())
In [27]:
train_loss, train_accuracy = model.evaluate(X_train, y_train, batch_size=500)
train_loss, train_accuracy
Out[27]:
In [28]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, batch_size=500)
test_loss, test_accuracy
Out[28]:
In [29]:
original_loss, original_accuracy = model.evaluate(original_images, original_labels, batch_size=500)
original_loss, original_accuracy
Out[29]:
In [30]:
!mkdir models
In [31]:
model.save('models/conv-vgg-augmented.hdf5')
In [32]:
!ls -lh models
In [33]:
!curl --upload-file ./models/conv-vgg-augmented.hdf5 https://transfer.sh/conv-vgg-augmented.hdf5
In [ ]: