In [1]:
# import modules
%matplotlib inline
import random
import pylab
import pandas as pd
import numpy as np
import cPickle as pkl
from PIL import Image
from lasagne import layers
from lasagne.updates import nesterov_momentum
from theano.tensor.nnet import softmax
from nolearn.lasagne import NeuralNet, BatchIterator
from sklearn.cross_validation import train_test_split


Using gpu device 0: GeForce GT 740M (CNMeM is disabled)

In [63]:
# test image
from scipy.misc import imread as ims
img = ims('/home/faizy/workspace/project/project/datasets/svt/svt1/img/00_13.jpg')#[292:450, 176:850, :]#img -> 00_12
print img.shape
# image[y:y + image_height, x:x + image_width, :]
pylab.imshow(img)
pylab.show()


(768, 1024, 3)

In [3]:
# extra functions
def unpickle(filer):
    f = open(filer, 'rb')
    d_dict = pkl.load(f)
    f.close()
    return d_dict

In [4]:
def shiftup(dataset):
    shifted_dataset = np.zeros(dataset.shape)
    # loop for images
    for i in range(dataset.shape[0]):
        # loop for shift up
        for j in range(16):
            shifted_dataset[i, 0, j:j+1, :] = dataset[i, 0, 16 + j : 16 + j + 1, :]
        for j in range(16, 32):
            shifted_dataset[i, 0, j:j+1, :] = shifted_dataset[i, :, 15, :]
    return shifted_dataset

In [5]:
def shiftdown(dataset):
    shifted_dataset = np.zeros(dataset.shape)
    # loop for images
    for i in range(dataset.shape[0]):
        # loop for shift up
        for j in range(16, 32):
            shifted_dataset[i, 0, j:j+1, :] = dataset[i, 0, j - 16 : j + 1 - 16, :]
        for j in range(16):
            shifted_dataset[i, 0, j:j+1, :] = shifted_dataset[i, :, 16, :]
    return shifted_dataset

In [6]:
def shiftleft(dataset):
    shifted_dataset = np.zeros(dataset.shape)
    # loop for images
    for i in range(dataset.shape[0]):
        for j in range(16):
            shifted_dataset[i, 0, :, j:j+1] = dataset[i, 0, :, 16 + j: 16 + j + 1]
        for j in range(16, 32):
            shifted_dataset[i, :, :, j] = shifted_dataset[i, :, :, 15]
    
    return shifted_dataset

In [7]:
def shiftright(dataset):
    shifted_dataset = np.zeros(dataset.shape)
    # loop for images
    for i in range(dataset.shape[0]):
        for j in range(16, 32):
            shifted_dataset[i, 0, :, j : j + 1] = dataset[i, 0, :, j - 16 : j + 1 - 16]
        for j in range(16):
            shifted_dataset[i, 0, :, j] = dataset[i, 0, :, 15]
    
    return shifted_dataset

In [8]:
# load train_test set
# cifar
train_dict = unpickle('/home/faizy/workspace/cifar/cifar-10-batches-py/data_batch_1')
train2_images = train_dict['data'].astype('float32')
train2_y = np.zeros((10000, )).astype('int')
test_dict = unpickle('/home/faizy/workspace/cifar/cifar-10-batches-py/test_batch')
test2_images = test_dict['data'].astype('float32')

# chars74k
data = pd.read_csv('/home/faizy/workspace/project/project/scripts/LISTFILE.txt', sep = ' ', header = None)
root = '/home/faizy/workspace/project/project/datasets/English/'
data_x = np.zeros((data.shape[0], 1, 32, 32))
data_y = np.ones((data.shape[0], )).astype('int32')
from scipy.misc import imread, imresize
for idx, path in enumerate(data[0]):
    img = imread(root + path)
    img = imresize(img, (32, 32))
    if len(img.shape) == 3:
        data_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data_x[idx, ...] = img
        
data_x = data_x.astype('float32')
train1_x, test1_x, train1_y, test1_y = train_test_split(data_x, data_y, test_size = 0.2)

In [9]:
# Augmented data
train3_x = shiftup(train1_x)
train4_x = shiftdown(train1_x)
train5_x = shiftleft(train1_x)
train6_x = shiftright(train1_x)

train3_y = np.zeros((train3_x.shape[0], )).astype('int')
train4_y = np.zeros((train4_x.shape[0], )).astype('int')
train5_y = np.zeros((train5_x.shape[0], )).astype('int')
train6_y = np.zeros((train6_x.shape[0], )).astype('int')

In [10]:
print train1_x.shape, train2_images.shape, train3_x.shape, train4_x.shape, train5_x.shape, train6_x.shape


(6164, 1, 32, 32) (10000, 3072) (6164, 1, 32, 32) (6164, 1, 32, 32) (6164, 1, 32, 32) (6164, 1, 32, 32)

In [11]:
# preprocess
# cifar
train2_images /= train2_images.std(axis = None)
train2_images -= train2_images.mean()

test2_images /= test2_images.std(axis = None)
test2_images -= test2_images.mean()

# chars74k
train1_x /= train1_x.std(axis = None)
train1_x -= train1_x.mean()

test1_x /= test1_x.std(axis = None)
test1_x -= test1_x.mean()

# augmented data

train3_x /= train3_x.std(axis = None)
train3_x -= train3_x.mean()

train4_x /= train4_x.std(axis = None)
train4_x -= train4_x.mean()

train5_x /= train5_x.std(axis = None)
train5_x -= train5_x.mean()

train6_x /= train6_x.std(axis = None)
train6_x -= train6_x.mean()

In [12]:
# reshape dataset
# cifar
# grayscaling and cropping to size
train2_x_rgb = train2_images.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)

train2_x = np.zeros((10000, 1, 32, 32))
for i in range(10000):
    train2_x[i, :, :, :] = np.dot(train2_x_rgb[i, :, :, :], [0.299, 0.587, 0.144])

test2_x_rgb = test2_images.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)

test2_x = np.zeros((10000, 1, 32, 32))
for i in range(10000):
    test2_x[i, :, :, :] = np.dot(test2_x_rgb[i, :, :, :], [0.299, 0.587, 0.144])

In [13]:
# finally
#train_x = np.vstack((train1_x, train2_x))
train_x = np.vstack((train1_x, train3_x))
train_x = np.vstack((train_x, train4_x))
train_x = np.vstack((train_x, train5_x))
train_x = np.vstack((train_x, train6_x))

train_y = np.concatenate([train1_y, train3_y, train4_y, train5_y, train6_y])#train2_y, 

test_x = test1_x#np.vstack((test1_x, test2_x))

test_y = np.concatenate([test1_y, np.zeros((10000,))])

In [14]:
# check again
train_x = train_x.astype('float32')
train_y = train_y.astype('int32')
test_x = test_x.astype('float32')
test_y = test_y.astype('int32')

In [15]:
# setting nn 
net = NeuralNet(
    layers = [
        ('input', layers.InputLayer),
        ('conv1', layers.Conv2DLayer),
        ('pool1', layers.MaxPool2DLayer),
        ('dropout1', layers.DropoutLayer),
        ('conv2', layers.Conv2DLayer),
        ('pool2', layers.MaxPool2DLayer),
        ('dropout2', layers.DropoutLayer),
        ('conv3', layers.Conv2DLayer),
        ('hidden4', layers.DenseLayer),
        ('output', layers.DenseLayer),
    ],

    input_shape = (None, 1, 32, 32),
    conv1_num_filters = 32, conv1_filter_size = (5, 5),
    pool1_pool_size = (2, 2),
    dropout1_p = 0.2,
    conv2_num_filters = 64, conv2_filter_size = (5, 5),
    pool2_pool_size = (2, 2),
    dropout2_p = 0.2,
    conv3_num_filters = 128, conv3_filter_size = (5, 5),
    hidden4_num_units = 128,
    output_num_units = 2, output_nonlinearity = softmax,

    batch_iterator_train = BatchIterator(batch_size = 1000),
    batch_iterator_test = BatchIterator(batch_size = 1000),

    update=nesterov_momentum,
    update_learning_rate = 0.01,
    update_momentum = 0.9,

    use_label_encoder = True,
    regression = False,
    max_epochs = 50,
    verbose = 1,
)

In [16]:
# train and test nn
net.fit(train_x, train_y)
pred = net.predict(test_x)


# Neural Network with 273794 learnable parameters

## Layer information

  #  name      size
---  --------  --------
  0  input     1x32x32
  1  conv1     32x28x28
  2  pool1     32x14x14
  3  dropout1  32x14x14
  4  conv2     64x10x10
  5  pool2     64x5x5
  6  dropout2  64x5x5
  7  conv3     128x1x1
  8  hidden4   128
  9  output    2

  epoch    train loss    valid loss    train/val    valid acc  dur
-------  ------------  ------------  -----------  -----------  ------
      1       0.48714       1.25730      0.38745      0.82386  23.93s
      2       0.85560       0.47422      1.80423      0.82386  24.01s
      3       0.50895       0.46565      1.09299      0.82386  23.63s
      4       0.51924       0.46565      1.11509      0.82386  23.92s
      5       0.52448       0.46551      1.12668      0.82386  23.69s
      6       0.52448       0.46543      1.12686      0.82386  23.68s
      7       0.52389       0.46541      1.12566      0.82386  23.67s
      8       0.52318       0.46531      1.12435      0.82386  23.66s
      9       0.52288       0.46525      1.12388      0.82386  23.67s
     10       0.52232       0.46509      1.12304      0.82386  23.67s
     11       0.52226       0.46501      1.12312      0.82386  23.66s
     12       0.52135       0.46467      1.12198      0.82386  23.67s
     13       0.52187       0.46482      1.12273      0.82386  23.67s
     14       0.51935       0.46418      1.11886      0.82386  23.67s
     15       0.52379       0.46488      1.12672      0.82386  23.67s
     16       0.52032       0.46389      1.12165      0.82386  23.67s
     17       0.51807       0.46324      1.11835      0.82386  23.86s
     18       0.51825       0.46200      1.12176      0.82386  23.68s
     19       0.51306       0.46135      1.11208      0.82386  23.67s
     20       0.51398       0.45919      1.11932      0.82386  23.67s
     21       0.48856       0.48775      1.00166      0.82386  23.69s
     22       0.53357       0.45678      1.16812      0.82386  24.49s
     23       0.48554       0.46244      1.04996      0.82386  24.07s
     24       0.49502       0.45487      1.08827      0.82443  24.60s
     25       0.43591       0.50116      0.86981      0.82371  23.67s
     26       0.48804       0.47436      1.02883      0.82313  23.67s
     27       0.44505       0.46915      0.94863      0.82700  23.67s
     28       0.42090       0.43202      0.97425      0.83613  23.67s
     29       0.36677       0.44852      0.81773      0.84485  24.22s
     30       0.36789       0.41730      0.88159      0.85212  23.67s
     31       0.34491       0.41055      0.84012      0.85741  23.67s
     32       0.31317       0.37950      0.82522      0.86670  23.80s
     33       0.31107       0.37326      0.83338      0.87654  23.76s
     34       0.27264       0.33703      0.80895      0.87841  23.66s
     35       0.27498       0.36946      0.74426      0.88124  23.67s
     36       0.26171       0.31473      0.83152      0.89569  23.67s
     37       0.23129       0.27095      0.85360      0.91665  23.67s
     38       0.18283       0.20291      0.90105      0.92765  23.67s
     39       0.16225       0.21693      0.74796      0.93323  23.67s
     40       0.14313       0.16836      0.85010      0.94209  23.67s
     41       0.13754       0.17432      0.78899      0.94037  23.67s
     42       0.12682       0.15179      0.83547      0.94409  23.67s
     43       0.11447       0.15145      0.75583      0.94709  23.67s
     44       0.11192       0.13869      0.80697      0.94866  23.67s
     45       0.09772       0.12761      0.76577      0.95381  23.84s
     46       0.09468       0.12757      0.74218      0.95453  23.59s
     47       0.08887       0.10956      0.81112      0.95897  23.56s
     48       0.09936       0.13223      0.75142      0.95195  23.56s
     49       0.09732       0.11675      0.83360      0.95609  23.56s
     50       0.07969       0.10730      0.74266      0.96167  23.57s

In [17]:
f = open('/home/faizy/workspace/project/project/models/detector_2.pkl', 'wb')
pkl.dump(net, f)
f.close()

In [18]:
from sklearn.metrics import accuracy_score, classification_report
print classification_report(test_y, pred)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-18-46ff5c594fac> in <module>()
      1 from sklearn.metrics import accuracy_score, classification_report
----> 2 print classification_report(test_y, pred)

/home/faizy/anaconda/lib/python2.7/site-packages/sklearn/metrics/classification.pyc in classification_report(y_true, y_pred, labels, target_names, sample_weight, digits)
   1248                                                   labels=labels,
   1249                                                   average=None,
-> 1250                                                   sample_weight=sample_weight)
   1251 
   1252     for i, label in enumerate(labels):

/home/faizy/anaconda/lib/python2.7/site-packages/sklearn/metrics/classification.pyc in precision_recall_fscore_support(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight)
    856         raise ValueError("beta should be >0 in the F-beta score")
    857 
--> 858     y_type, y_true, y_pred = _check_targets(y_true, y_pred)
    859 
    860     if average == 'binary' and (y_type != 'binary' or pos_label is None):

/home/faizy/anaconda/lib/python2.7/site-packages/sklearn/metrics/classification.pyc in _check_targets(y_true, y_pred)
     72     y_pred : array or indicator matrix
     73     """
---> 74     check_consistent_length(y_true, y_pred)
     75     type_true = type_of_target(y_true)
     76     type_pred = type_of_target(y_pred)

/home/faizy/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc in check_consistent_length(*arrays)
    172     if len(uniques) > 1:
    173         raise ValueError("Found arrays with inconsistent numbers of samples: "
--> 174                          "%s" % str(uniques))
    175 
    176 

ValueError: Found arrays with inconsistent numbers of samples: [ 1541 11541]

In [72]:
from sklearn.feature_extraction import image
patches = image.extract_patches(img, (100, 60, 3), extraction_step = 5)
print patches.shape


(134, 193, 1, 100, 60, 3)

In [73]:
from scipy.misc import imresize
new_lst = []
for i in range(patches.shape[0]):
    for j in range(patches.shape[1]):
        new_lst.append(imresize(patches[i, j, 0, :, :, :], (32, 32)))
        
print len(new_lst)


25862

In [74]:
new_list = np.stack(new_lst)
new_list = new_list.dot([0.299, 0.587, 0.144])
tester = new_list.reshape(patches.shape[0]*patches.shape[1], 1, 32, 32)

In [75]:
tester /= tester.std(axis = None)
tester -= tester.mean()
tester = tester.astype('float32')

In [76]:
print tester.shape


(25862, 1, 32, 32)

In [77]:
preder = net.predict_proba(tester)

In [78]:
heatmap = preder[:, 1].reshape((patches.shape[0], patches.shape[1]))
print heatmap.shape


(134, 193)

In [79]:
pylab.pcolor(heatmap[::-1])
pylab.axis('off')
pylab.show()
pylab.imshow(img)
pylab.axis('off')
pylab.show()



In [54]:
preder.shape


Out[54]:
(22940,)

In [ ]: