In [44]:
import re
from PIL import Image
import os
import numpy as np
from tempfile import mkdtemp
import os.path as path
import matplotlib.pyplot as plt
import numpy as np
import theano
import keras
import traceback
from keras.models import Sequential
from scipy.misc import imread, imresize, imsave
from keras.layers.core import Flatten, Dense, Dropout,Activation
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D,UpSampling2D,UpSampling1D,Cropping2D
from keras.optimizers import SGD
from keras.utils.np_utils import to_categorical
%matplotlib inline
from keras import backend as K
from keras.utils import np_utils
K.set_image_dim_ordering('th')
import traceback
from scipy import ndimage
from sklearn.cross_validation import train_test_split
from keras import callbacks
import glob
from shutil import copyfile

In [45]:
def imageResize(basename,imageName):
    """
    resize image
    basename : eg. /home/username/XYZFolder
    image name : xyz.jpg
    New folder in the working directory will be created with '_resized' as suffix
    """
    new_width  = 128
    new_height = 128
    try:  
        img = Image.open(basename+"/"+imageName) # image extension *.png,*.jpg
        img = img.resize((new_width, new_height), Image.ANTIALIAS)
        img.save(basename+'_resized/'+imageName)
    except:
        os.mkdir(basename+'_resized/')
        img = Image.open(basename+"/"+imageName) # image extension *.png,*.jpg
        img = img.resize((new_width, new_height), Image.ANTIALIAS)
        img.save(basename+'_resized/'+imageName)

In [46]:
def resizer(folderPath):
    """
    to resize all files present in a folder
    resizer('/home/username/XYZFolder')
    """
    
    for subdir, dirs, files in os.walk(folderPath):
        for fileName in files:
            try:
#                 print os.path.join(subdir, file)
                filepath = subdir + os.sep + fileName
#                 print filepath
                if filepath.endswith(".jpg" or ".jpeg" or ".png" or ".gif"):
                    imageResize(subdir,fileName)
            except:
                print traceback.print_exc()

In [47]:
# resizer('imdb_crop')

In [48]:
def load_image( infilename ) :
    img = ndimage.imread( infilename )
    data = np.asarray( img, dtype="int32" )
    resized = data.reshape(data.shape[2],data.shape[0],data.shape[1])
    return resized

In [49]:
# some precautionary functions
def loadImageTest(fileName):
    """
    imdb database contain many images which are not in proper format, Due to format issue, fails to load raising an exception.
    To prevent such incidents, images are checked, if loads propely then only taken, else not considered
    """
    try:
        load_image(fileName)
        return 1
    except :
        return 0
    
def responseTest(diff):
    """
    all image names do not obey regex => r'nm*\d+_rm\d+_(\d+)-\d+-\d+_(\d+).jpg'
    possible reasons are as given below:
    1) no having proper format for naming
    2) due to naming error, sometime age are shown above 100 years, which is not valid for class size we have selected
    
    Images not satisfing age criterion are omitted
    """
    try:
        if diff > 0 and diff <= 100:
            return 1
    except:
        return 0

In [50]:
def turnToNumpy(files):
    """
    turn stored images on disk to numpy
    turnToNumpy('/home/username/XYZFolder/
    """
    temp = []
    y = []
    print "Total Count of Files",len(files)
    responseArray = []
    for fileName in files:
                try:
                    if fileName.endswith(".jpg" or ".jpeg" or ".png"):
                        
                        matchObj = re.search( r'nm*\d+_rm\d+_(\d+)-\d+-\d+_(\d+).jpg', fileName, re.M|re.I)
                        # yob - year of birth
                        # dopt - date of photo taken
                        yob =  matchObj.group(1)
                        dopt =  matchObj.group(2)
#                         print yob,dopt, int(dopt)-int(yob)
                        diff  = int(dopt)-int(yob)
                        if int(diff) > 0 and int(diff) <100:
#                             print type(int(dopt)-int(yob)),diff,diff > 0
                            individualResponse = [0]*100
                            individualResponse[diff] =1 
                            responseArray.append(individualResponse)
#                             responseArray.append(diff/100)
                            temp.append(load_image(fileName))
#                             y.append(abs(int(dopt)-int(yob)))
                except:
                    ""
    return np.asarray(temp, dtype='float32'),np.asarray(responseArray, dtype='bool')

In [51]:
# defining convolutional network
model = Sequential()
model.add(ZeroPadding2D((1,1),input_shape=(3,128,128)))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Flatten())
model.add(Dense(4090, activation='relu'))
model.add(Dense(4090, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(100, activation='softmax'))
# model.summary()

In [60]:
# this function will load images iteratively in memory 
# CPU and GPU memory friendly iterator
def myGeneratorEX(samples_per_epoch,epoch):
    """
    samples_per_epoch : number of images to be loaded in CPU memory at a time
    epoch : number of epochs for training 
    """
    # defining optimizer function
    sgd = SGD(lr=0.01, momentum=0.1, decay=0.0, nesterov=True)
    # compiling model
    model.compile(optimizer=sgd, loss='categorical_crossentropy',metrics=['accuracy'])
    
    
    folderName = "imdb_crop_resized" # folder name where resized images are placed
    
    fileNames =  glob.glob(folderName+"/*.jpg")#All file names with .jpg extension
    
    # first 100 imageswill be ued for onthe fly visual performace checking at each iteration
    initialFileNames = fileNames[:100]

    k =0 
    while k < epoch: # for each epoch do following
        print "Epoch : ",k," | Total Images : ",len(fileNames)
        for i in range(len(fileNames)/samples_per_epoch): 
            #All files (~438189) are loaded in memory with batch of size  'samples_per_epoch' e.g.1000
            try:
                # loaded images are converted to numpy array
                x_batch,y_batch = turnToNumpy(fileNames[i*samples_per_epoch:(i+1)*samples_per_epoch])

                # such all images are made up of numpy array of range integer 0 - 255(8 bit image)
                # all images are normalised between 0-1 float
                x_batch = x_batch/255.0
            
                # to check wheather or not our algorithm is learning. to cheack wheather our algorith started differentiating between age.
                x_batch_test,y_batch_test = turnToNumpy(initialFileNames)
                x_batch_test = x_batch_test/255.0
                
                # fit the data on model
                model.fit(x_batch,y_batch,batch_size=50,nb_epoch=1, verbose=1,validation_split=0.2)
                # test on initial 100 files at each iteration
                test_output = model.predict_classes(x_batch_test)
                print test_output
            except IndexError:
                print traceback.print_exc()
        k = k+1

In [61]:
myGeneratorEX(10000,100)

In [11]:
# save model
# model.save_weights("modelWith128and100epoch")

In [58]:
# load model
# model.load_weights("modelWith256and25epoch")

In [24]:
def getRealAge(fileName):
    """
    To get real age from file name
    """
    matchObj = re.search(r'nm*\d+_rm\d+_(\d+)-\d+-\d+_(\d+).jpg', eachFilename, re.M | re.I)
    yob =  matchObj.group(1)
    dopt =  matchObj.group(2)
    #print yob,dopt, int(dopt)-int(yob)
    diff  = int(dopt)-int(yob)
    return diff

In [62]:
#testing 
folderName = "imdb_crop_resized128*128" 
fileNames =  glob.glob(folderName+"/*.jpg")
NumberOfFileToBetested = 500

# First 500 file will be taken for testing
x_batch,y_batch = turnToNumpy(fileNames[:NumberOfFileToBetested])
predictedAGE = model.predict_classes(x_batch)
realAGE = []
imagesCount = 0
os.mkdir('testImages')

for eachFilename in fileNames[:NumberOfFileToBetested]:
    realAGE = getRealAge(eachFilename)
    #saving Images
#     print eachFilename
    x = load_image(eachFilename)
#     print x.shape #(3, 128, 128)
    x = x.reshape(x.shape[1],x.shape[2],x.shape[0])
    renamedImage = "testImages/"+str(realAGE)+"_"+str(predictedAGE[imagesCount])+".jpg"
#     print (renamedImage)                                             
    imsave(renamedImage, x)
    imagesCount = imagesCount + 1