In [25]:
import os
import cv2
import dlib
import scipy.misc
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import cm

detector = dlib.get_frontal_face_detector()
read_dir = '/home/mckc/Downloads/data bases/General'
os.chdir(read_dir)
save_dir = '/home/mckc/Downloads/bigdb'
face_cascade = cv2.CascadeClassifier('/home/mckc/Downloads/opencv_build/opencv/data/haarcascades_GPU/haarcascade_frontalface_default.xml')

In [26]:
folders = os.listdir(read_dir)

In [50]:
os.mkdir(save_dir)
count = 0
length = len(folders)
for folder in folders:
    if count%250==0:
        print(str(round(float(count)/length,2)*100) + '%')
    files = os.listdir(read_dir +'/' +folder)
    os.mkdir(save_dir +'/' +folder)
    os.chdir(save_dir +'/' +folder)
    i = 0
    for file in files:
        i+=1
        image = cv2.imread(read_dir +'/' +folder+'/'+file,0)
        if image is None:
            continue
        if(image.shape[0] > 70 and image.shape[1] > 0):
            faces = detector(image, 1)
            for a,b in enumerate(faces):
                h_off = 0
                v_off = 0
                if(b.top()<0):
                    h_off = b.top()
                if(b.left()<0):
                    v_off = b.left()
                fac = image[b.top()-h_off:b.bottom()-h_off,b.left()-v_off:b.right()-v_off]   
                fac = cv2.resize(fac,(96,96))
                scipy.misc.toimage(fac).save(folder+'_'+str(i)+'.jpg')
    count+=1


0.0%
4.0%
7.0%
11.0%
14.0%
18.0%
22.0%
25.0%
29.0%
32.0%
36.0%
39.0%
43.0%
47.0%
50.0%
54.0%
57.0%
61.0%
65.0%
68.0%
72.0%
75.0%
79.0%
82.0%
86.0%
90.0%
93.0%
97.0%

In [52]:
file_count = []
for folder in folders:
    files = os.listdir(save_dir+'/'+folder)
    file_count.append(len(files))

In [88]:
unique, counts = np.unique(file_count, return_counts=True)
zip(unique,counts)


Out[88]:
[(0, 238),
 (1, 4042),
 (2, 777),
 (3, 313),
 (4, 221),
 (5, 152),
 (6, 106),
 (7, 105),
 (8, 94),
 (9, 100),
 (10, 315),
 (11, 15),
 (12, 14),
 (13, 14),
 (14, 17),
 (15, 15),
 (16, 25),
 (17, 22),
 (18, 21),
 (19, 46),
 (20, 91),
 (21, 11),
 (22, 13),
 (23, 4),
 (24, 8),
 (25, 4),
 (26, 3),
 (27, 1),
 (28, 4),
 (29, 3),
 (30, 5),
 (31, 6),
 (32, 5),
 (33, 3),
 (34, 1),
 (35, 3),
 (36, 4),
 (37, 5),
 (38, 2),
 (39, 5),
 (40, 3),
 (41, 6),
 (42, 2),
 (43, 2),
 (44, 3),
 (45, 2),
 (46, 2),
 (48, 5),
 (49, 3),
 (50, 2),
 (51, 1),
 (52, 4),
 (53, 1),
 (54, 1),
 (55, 2),
 (56, 2),
 (57, 3),
 (58, 2),
 (59, 1),
 (60, 2),
 (61, 1),
 (62, 1),
 (63, 3),
 (64, 2),
 (67, 1),
 (68, 1),
 (69, 1),
 (70, 1),
 (71, 2),
 (73, 1),
 (76, 3),
 (77, 1),
 (78, 1),
 (79, 3),
 (82, 1),
 (83, 2),
 (85, 1),
 (86, 1),
 (87, 1),
 (89, 1),
 (91, 2),
 (92, 1),
 (93, 1),
 (94, 1),
 (100, 1),
 (103, 1),
 (104, 1),
 (105, 2),
 (109, 1),
 (116, 2),
 (120, 1),
 (127, 2),
 (131, 1),
 (132, 1),
 (137, 1),
 (140, 1),
 (144, 1),
 (145, 1),
 (148, 1),
 (149, 1),
 (151, 1),
 (153, 1),
 (154, 1),
 (158, 1),
 (169, 1),
 (170, 1),
 (175, 1),
 (179, 1),
 (186, 1),
 (220, 2),
 (221, 1),
 (223, 1),
 (236, 1),
 (253, 1),
 (281, 1),
 (283, 1),
 (299, 1),
 (303, 1),
 (312, 2),
 (317, 2),
 (319, 1),
 (324, 1),
 (325, 2),
 (328, 1),
 (331, 2),
 (340, 1),
 (349, 1),
 (351, 1),
 (357, 1),
 (371, 1),
 (528, 1)]

In [102]:
len(folders) - ( np.count_nonzero(np.array(file_count)==0) + np.count_nonzero(np.array(file_count)==1))


Out[102]:
2691

In [105]:
import shutil

for folder in folders:
    files = os.listdir(save_dir+'/'+folder)
    if(len(files)<2):
        shutil.rmtree(save_dir+'/'+folder)

In [ ]: