In [44]:
import re
from PIL import Image
import os
import numpy as np
from tempfile import mkdtemp
import os.path as path
import matplotlib.pyplot as plt
import numpy as np
import theano
import keras
import traceback
from keras.models import Sequential
from scipy.misc import imread, imresize, imsave
from keras.layers.core import Flatten, Dense, Dropout,Activation
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D,UpSampling2D,UpSampling1D,Cropping2D
from keras.optimizers import SGD
from keras.utils.np_utils import to_categorical
%matplotlib inline
from keras import backend as K
from keras.utils import np_utils
K.set_image_dim_ordering('th')
import traceback
from scipy import ndimage
from sklearn.cross_validation import train_test_split
from keras import callbacks
import glob
from shutil import copyfile
In [45]:
def imageResize(basename,imageName):
"""
resize image
basename : eg. /home/username/XYZFolder
image name : xyz.jpg
New folder in the working directory will be created with '_resized' as suffix
"""
new_width = 128
new_height = 128
try:
img = Image.open(basename+"/"+imageName) # image extension *.png,*.jpg
img = img.resize((new_width, new_height), Image.ANTIALIAS)
img.save(basename+'_resized/'+imageName)
except:
os.mkdir(basename+'_resized/')
img = Image.open(basename+"/"+imageName) # image extension *.png,*.jpg
img = img.resize((new_width, new_height), Image.ANTIALIAS)
img.save(basename+'_resized/'+imageName)
In [46]:
def resizer(folderPath):
"""
to resize all files present in a folder
resizer('/home/username/XYZFolder')
"""
for subdir, dirs, files in os.walk(folderPath):
for fileName in files:
try:
# print os.path.join(subdir, file)
filepath = subdir + os.sep + fileName
# print filepath
if filepath.endswith(".jpg" or ".jpeg" or ".png" or ".gif"):
imageResize(subdir,fileName)
except:
print traceback.print_exc()
In [47]:
# resizer('imdb_crop')
In [48]:
def load_image( infilename ) :
img = ndimage.imread( infilename )
data = np.asarray( img, dtype="int32" )
resized = data.reshape(data.shape[2],data.shape[0],data.shape[1])
return resized
In [49]:
# some precautionary functions
def loadImageTest(fileName):
"""
imdb database contain many images which are not in proper format, Due to format issue, fails to load raising an exception.
To prevent such incidents, images are checked, if loads propely then only taken, else not considered
"""
try:
load_image(fileName)
return 1
except :
return 0
def responseTest(diff):
"""
all image names do not obey regex => r'nm*\d+_rm\d+_(\d+)-\d+-\d+_(\d+).jpg'
possible reasons are as given below:
1) no having proper format for naming
2) due to naming error, sometime age are shown above 100 years, which is not valid for class size we have selected
Images not satisfing age criterion are omitted
"""
try:
if diff > 0 and diff <= 100:
return 1
except:
return 0
In [50]:
def turnToNumpy(files):
"""
turn stored images on disk to numpy
turnToNumpy('/home/username/XYZFolder/
"""
temp = []
y = []
print "Total Count of Files",len(files)
responseArray = []
for fileName in files:
try:
if fileName.endswith(".jpg" or ".jpeg" or ".png"):
matchObj = re.search( r'nm*\d+_rm\d+_(\d+)-\d+-\d+_(\d+).jpg', fileName, re.M|re.I)
# yob - year of birth
# dopt - date of photo taken
yob = matchObj.group(1)
dopt = matchObj.group(2)
# print yob,dopt, int(dopt)-int(yob)
diff = int(dopt)-int(yob)
if int(diff) > 0 and int(diff) <100:
# print type(int(dopt)-int(yob)),diff,diff > 0
individualResponse = [0]*100
individualResponse[diff] =1
responseArray.append(individualResponse)
# responseArray.append(diff/100)
temp.append(load_image(fileName))
# y.append(abs(int(dopt)-int(yob)))
except:
""
return np.asarray(temp, dtype='float32'),np.asarray(responseArray, dtype='bool')
In [51]:
# defining convolutional network
model = Sequential()
model.add(ZeroPadding2D((1,1),input_shape=(3,128,128)))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Flatten())
model.add(Dense(4090, activation='relu'))
model.add(Dense(4090, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(100, activation='softmax'))
# model.summary()
In [60]:
# this function will load images iteratively in memory
# CPU and GPU memory friendly iterator
def myGeneratorEX(samples_per_epoch,epoch):
"""
samples_per_epoch : number of images to be loaded in CPU memory at a time
epoch : number of epochs for training
"""
# defining optimizer function
sgd = SGD(lr=0.01, momentum=0.1, decay=0.0, nesterov=True)
# compiling model
model.compile(optimizer=sgd, loss='categorical_crossentropy',metrics=['accuracy'])
folderName = "imdb_crop_resized" # folder name where resized images are placed
fileNames = glob.glob(folderName+"/*.jpg")#All file names with .jpg extension
# first 100 imageswill be ued for onthe fly visual performace checking at each iteration
initialFileNames = fileNames[:100]
k =0
while k < epoch: # for each epoch do following
print "Epoch : ",k," | Total Images : ",len(fileNames)
for i in range(len(fileNames)/samples_per_epoch):
#All files (~438189) are loaded in memory with batch of size 'samples_per_epoch' e.g.1000
try:
# loaded images are converted to numpy array
x_batch,y_batch = turnToNumpy(fileNames[i*samples_per_epoch:(i+1)*samples_per_epoch])
# such all images are made up of numpy array of range integer 0 - 255(8 bit image)
# all images are normalised between 0-1 float
x_batch = x_batch/255.0
# to check wheather or not our algorithm is learning. to cheack wheather our algorith started differentiating between age.
x_batch_test,y_batch_test = turnToNumpy(initialFileNames)
x_batch_test = x_batch_test/255.0
# fit the data on model
model.fit(x_batch,y_batch,batch_size=50,nb_epoch=1, verbose=1,validation_split=0.2)
# test on initial 100 files at each iteration
test_output = model.predict_classes(x_batch_test)
print test_output
except IndexError:
print traceback.print_exc()
k = k+1
In [61]:
myGeneratorEX(10000,100)
In [11]:
# save model
# model.save_weights("modelWith128and100epoch")
In [58]:
# load model
# model.load_weights("modelWith256and25epoch")
In [24]:
def getRealAge(fileName):
"""
To get real age from file name
"""
matchObj = re.search(r'nm*\d+_rm\d+_(\d+)-\d+-\d+_(\d+).jpg', eachFilename, re.M | re.I)
yob = matchObj.group(1)
dopt = matchObj.group(2)
#print yob,dopt, int(dopt)-int(yob)
diff = int(dopt)-int(yob)
return diff
In [62]:
#testing
folderName = "imdb_crop_resized128*128"
fileNames = glob.glob(folderName+"/*.jpg")
NumberOfFileToBetested = 500
# First 500 file will be taken for testing
x_batch,y_batch = turnToNumpy(fileNames[:NumberOfFileToBetested])
predictedAGE = model.predict_classes(x_batch)
realAGE = []
imagesCount = 0
os.mkdir('testImages')
for eachFilename in fileNames[:NumberOfFileToBetested]:
realAGE = getRealAge(eachFilename)
#saving Images
# print eachFilename
x = load_image(eachFilename)
# print x.shape #(3, 128, 128)
x = x.reshape(x.shape[1],x.shape[2],x.shape[0])
renamedImage = "testImages/"+str(realAGE)+"_"+str(predictedAGE[imagesCount])+".jpg"
# print (renamedImage)
imsave(renamedImage, x)
imagesCount = imagesCount + 1