In [1]:
import matplotlib.pyplot as plt
import urllib
from bs4 import BeautifulSoup
from selenium import webdriver
import re
import os,sys
import time
from datetime import date
try:
    import cPickle as pickle
except:
    import pickle
import pprint
from collections import deque
from shutil import copyfile
import random
import glob
# Import the required modules
import cv2, os
import numpy as np
from PIL import Image

In [2]:
pkl_fl = open("linkedin_profiles.pickle","rb")
my_original_list=pickle.load(pkl_fl) # errors out here
pkl_fl.close()

In [7]:
my_original_list[0]['User_ID']


Out[7]:
'eryn-olson-50328143'

In [46]:
fileList = glob.glob("./Images/*.*")

In [47]:
directory = "Male"
    
if not os.path.exists(directory):
    os.makedirs(directory)

directory1 = "Female"

if not os.path.exists(directory1):
    os.makedirs(directory1)    

    
for id,fp in enumerate(fileList):
    filename, file_extension = os.path.splitext(fp)
    uid = filename.split('/')[-1]
    #print fp
    for prof in my_original_list:
        if prof['User_ID'] == uid:
            new_file_extension = prof['Gender']
            new_file_extension = new_file_extension.title()
            #os.rename(fp, filename+"."+new_file_extension)
            copyfile(filename+".jpg", './Lable_Images1/'+ uid + '.' + str(id) + "."+new_file_extension+'.jpg')
            #copyfile(filename+".jpg", new_file_extension +'/'+uid+"."+new_file_extension)

In [48]:
import os
for file in os.listdir("Lable_Images1"):
    file_path = os.path.join("Lable_Images1", file)
    try:
        if not file.endswith('.jpg'):
            os.unlink(file_path)
        #elif os.path.isdir(file_path): shutil.rmtree(file_path)
    except Exception as e:
        print(e)

In [49]:
# For face detection we will use the Haar Cascade provided by OpenCV.
cascadePath = "haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascadePath)

# For face recognition we will the the LBPH Face Recognizer 
recognizer = cv2.createLBPHFaceRecognizer()

In [50]:
def get_images_and_labels(path):
    # Append all the absolute image paths in a list image_paths
    # We will not read the image with the .sad extension in the training set
    # Rather, we will use them to test our accuracy of the training
    image_paths = [os.path.join(path, f) for f in os.listdir(path)]
    # images will contains face images
    images = []
    # labels will contains the label that is assigned to the image
    labels = []
    #gender will contains 1 or 0 indecating male or female
    gender =[]
    for image_path in image_paths:
        # Read the image and convert to grayscale
        try:
            image_pil = Image.open(image_path).convert('L')
            # Convert the image format into numpy array
            image = np.array(image_pil, 'uint8')
            # Get the label of the image
        except:
            pass
        #print image_path
        
        
        nbr = int(os.path.split(image_path)[1].split(".")[1])
        gender_current = os.path.split(image_path)[1].split(".")[2]
        print nbr
        
        # Detect the face in the image
        faces = faceCascade.detectMultiScale(image)
        # If face is detected, append the face to images and the label to labels
        for (x, y, w, h) in faces:
            images.append(image[y: y + h, x: x + w])
            labels.append(nbr)
            gender.append(gender_current)
            
            cv2.imshow("Adding faces to traning set...", image[y: y + h, x: x + w])
            cv2.waitKey(50)
    # return the images list and labels list
    print("lables")
    print(labels)
    print("gender_current")
    print(gender)
    
    return images, labels, gender

In [57]:
def image_prediction(image_path):   #comparing the image in image_path to the data base
    
    print(image_path)
    counter_above=0
    counter_correct=0
    
    found_flag=0
    predict_image_pil = Image.open(image_path).convert('L')
    predict_image = np.array(predict_image_pil, 'uint8')
    faces = faceCascade.detectMultiScale(predict_image)

    print(faces)

    for (x, y, w, h) in faces:
        nbr_predicted, conf = recognizer.predict(predict_image[y: y + h, x: x + w])
        nbr_actual = os.path.split(image_path)[1].split(".")[0]
            
        if nbr_actual == nbr_predicted:
            print "{} is Correctly Recognized with confidence {}".format(nbr_actual, conf)
            cv2.imshow("Recognizing Face", predict_image[y: y + h, x: x + w])
            cv2.waitKey(1000)
            found_flag=1;
            counter_correct=counter_correct+1

            if conf >= 50:
                counter_above=counter_above+1
            break
        else:
            print "{} is Incorrect Recognized as {}".format(nbr_actual, nbr_predicted)
        
    if found_flag == 0:
        print('Identified as a new image')
        
    return nbr_predicted

In [56]:
images, labels, gender = get_images_and_labels('Lable_Images1')

cv2.destroyAllWindows()

recognizer.train(images, np.array(labels))

image_path = 'ankitanarula.jpg'
nbr_predicted = image_prediction(image_path)

print("gender number is %d", nbr_predicted)
counter=0


for f in labels:
 
    if f==nbr_predicted:
        current_gender=gender[counter]
        break
    counter=counter+1 

if current_gender == 'Male':
    print('male')
if current_gender == 'Female':
    print('female')


1076
378
363
750
1100
427
470
578
1083
580
170
205
831
1153
501
184
607
782
919
1119
70
693
1105
906
190
497
1127
938
271
688
116
473
949
30
1045
60
110
433
380
100
1081
913
1116
971
429
12
18
889
230
920
lables
[378, 378, 363, 750, 1100, 1100, 427, 470, 578, 1083, 580, 170, 205, 831, 1153, 1153, 501, 184, 607, 782, 782, 919, 1119, 70, 693, 1105, 906, 190, 190, 497, 497, 497, 1127, 938, 271, 271, 688, 116, 473, 949, 30, 1045, 1045, 60, 110, 433, 433, 380, 380, 100, 1081, 913, 1116, 971, 429, 12, 12, 889, 230, 920]
gender_current
['Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Female', 'Female', 'Male', 'Female', 'Female', 'Male', 'Female', 'Female', 'Male', 'Female', 'Female', 'Male', 'Male', 'Female', 'Female', 'Male', 'Female', 'Female', 'Female', 'Male', 'Male', 'Male', 'Male', 'Male', 'Female', 'Male', 'Female', 'Female', 'Female', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Female', 'Female', 'Female', 'Male', 'Male', 'Male', 'Female', 'Male', 'Male', 'Male', 'Female', 'Male', 'Male']
ankitanarula.jpg
[[129  68 157 157]]
ankitanarula is Incorrect Recognized as 919
new image
('gender number is %d', 919)
female

In [ ]: