In [1]:
import os,sys,re
import numpy as np
from sklearn.cross_validation import train_test_split
path = '/home/mckc/Downloads/bigdb///'
os.chdir(path)
os.getcwd()


Out[1]:
'/home/mckc/Downloads/bigdb'

In [2]:
#directs = os.listdir('K:/COMMON/face onn/')
all_images = []
subject = []
directs = os.listdir(path)
for dir in directs:
    files = os.listdir(path+dir)
    os.chdir(path+dir)
    for i in files:
        all_images = np.append(all_images,(path+dir+'/'+i))
        subject = np.append(subject,dir)

In [3]:
data = np.array(zip(all_images,subject))
data = data.astype(str)
data.shape


Out[3]:
(34898, 2)

In [4]:
unique, counts = np.unique(subject, return_counts=True)
dict(zip(unique, counts))


Out[4]:
{'Abhay ': 25,
 'Abhinav ': 26,
 'Akhilesh Kumar': 36,
 'Anand naidu': 24,
 'Anirban Das': 18,
 'Aparajitha': 4,
 'Aparna Ram': 17,
 'Arun Dhanapal': 31,
 'Ashwin': 14,
 'Bhavya Prakash': 33,
 'Chandra': 35,
 'Deb': 10,
 'Deepa': 30,
 'Devi': 19,
 'Gokul': 21,
 'Gopika': 26,
 'Jayanthi Ramachandran': 29,
 'Kinkar': 62,
 'Linson Jinto': 26,
 'Mahesh PV': 10,
 'Maheshwar Muralidharan': 48,
 'Narasimhan Seshadri': 40,
 'Naresh Raj': 27,
 'Neha': 28,
 'Nivetha': 13,
 'Omar': 54,
 'Pandian': 24,
 'Phani': 17,
 'Ponraj S': 6,
 'Pooja Ann Martin': 16,
 'Praba': 36,
 'Priya darshini': 36,
 'Raashi Chhalani': 28,
 'Raghuram R D': 11,
 'Raghuvar Choppakatla': 34,
 'Rhea': 19,
 'Roshni': 9,
 'Sankkar Narayan S': 23,
 'Security 1': 26,
 'Security 2': 12,
 'Selva Chellaiah': 25,
 'Sharath': 24,
 'Shyam Venkatraman': 16,
 'Siddharth': 15,
 'Sneha Sanjana R': 17,
 'Subiksha Natarajan': 296,
 'Surekha Nyapati': 36,
 'Swathi': 22,
 'VISHNU R': 5,
 'Varun Vijay': 29,
 'Vendor': 6,
 'Vigneshwaran': 31,
 'Vijay raj': 5,
 'admin 1 ': 17,
 'anees': 44,
 'anirudh patil': 22,
 'anju': 11,
 'apporva': 29,
 'approv': 10,
 'ashrav': 20,
 'daisy': 6,
 'deep': 17,
 'dhruva': 22,
 'himanshu': 22,
 'jamal': 8,
 'jayanthi': 29,
 'kani': 9,
 'karan': 14,
 'kartheek': 9,
 'keerthana': 10,
 'kreeshna': 14,
 'naveen': 15,
 'pankaj': 18,
 'pranesh': 23,
 'priya jayanth': 25,
 'rohit': 23,
 'sai prakash': 21,
 'sai shree': 5,
 'sri ram': 16,
 'srikanth s': 35,
 'staff 1': 42,
 'staff 2': 36,
 'staff 4': 35,
 'staff 5': 40,
 'staff 6': 21,
 'staff 7': 11,
 'staff 8': 13,
 'swetha': 39,
 'unknown 1': 36,
 'unknown 10': 8,
 'unknown 11': 6,
 'unknown 2': 39,
 'unknown 5': 84,
 'unknown 8': 14,
 'vanathi': 27,
 'vijay kumar': 34,
 'yogesh': 20}

In [4]:
train , test = train_test_split(data, test_size=0.25, random_state=42,stratify=subject)
os.chdir(path)
np.savetxt("test.csv", test, delimiter=",",header='file,subject',fmt='%s')
np.savetxt("train.csv", train, delimiter=",",header="file,subject",fmt='%s')

In [ ]: