In [68]:
import os, sys
import numpy as np
import glob as glob
from shutil import copyfile
LESSON_HOME_DIR = '/home/ubuntu/courses/kevin_files/'
DATA_HOME_DIR = LESSON_HOME_DIR+'data/distracted-driving/'
print DATA_HOME_DIR


/home/ubuntu/courses/kevin_files/data/distracted-driving/

In [4]:
#Create directories
%cd $DATA_HOME_DIR
%mkdir valid
%mkdir results
%mkdir -p sample/train
%mkdir -p sample/test
%mkdir -p sample/valid
%mkdir -p sample/results
%mkdir -p test/unknown


/home/ubuntu/courses/kevin_files/data/distracted-driving

In [64]:
##Create regex for pulling file and subdir out of full filename
import re
train_regex = re.compile('/train/.*\.jpg')
result = train_regex.findall('/home/ubuntu/courses/kevin_files/data/distracted-driving/train/c7/img_71703.jpg')
print result[0][7:]


c7/img_71703.jpg

In [67]:
##Make valid directory and files
for subdir, dirs, files in os.walk(DATA_HOME_DIR+"train"):
    
    if subdir == DATA_HOME_DIR+"train":
        for dir_name in dirs:
            try: ##Kinda Hacky
                os.makedirs(DATA_HOME_DIR+'valid/'+dir_name)
            except:
                pass
            
    else:
        g = glob.glob(subdir+'/*.jpg')
        shuf = np.random.permutation(g)
        for i in range(len(files)/10): 
            file_and_dir = train_regex.findall(shuf[i])[0][7:] ##Kinda Hacky
            #print(shuf[i], DATA_HOME_DIR+'valid/' + file_and_dir)
            os.rename(shuf[i], DATA_HOME_DIR+'valid/' + file_and_dir)

In [69]:
##Make sample/valid directory and files
for subdir, dirs, files in os.walk(DATA_HOME_DIR+"train"):
    
    if subdir == DATA_HOME_DIR+"train":
        for dir_name in dirs:
            try: ##Kinda Hacky
                os.makedirs(DATA_HOME_DIR+'sample/valid/'+dir_name)
            except:
                pass
            
    else:
        g = glob.glob(subdir+'/*.jpg')
        shuf = np.random.permutation(g)
        for i in range(20): 
            file_and_dir = train_regex.findall(shuf[i])[0][7:] ##Kinda Hacky
            #print(shuf[i], DATA_HOME_DIR+'valid/' + file_and_dir)
            os.rename(shuf[i], DATA_HOME_DIR+'sample/valid/' + file_and_dir)

In [70]:
##Make sample/train directory and files
for subdir, dirs, files in os.walk(DATA_HOME_DIR+"train"):
    
    if subdir == DATA_HOME_DIR+"train":
        for dir_name in dirs:
            try: ##Kinda Hacky
                os.makedirs(DATA_HOME_DIR+'sample/train/'+dir_name)
            except:
                pass
            
    else:
        g = glob.glob(subdir+'/*.jpg')
        shuf = np.random.permutation(g)
        for i in range(50): 
            file_and_dir = train_regex.findall(shuf[i])[0][7:] ##Kinda Hacky
            #print(shuf[i], DATA_HOME_DIR+'valid/' + file_and_dir)
            os.rename(shuf[i], DATA_HOME_DIR+'sample/train/' + file_and_dir)

In [72]:
##Make sample/test files
test_regex = re.compile('/test/.*\.jpg')
g = glob.glob(DATA_HOME_DIR+'/test/*.jpg')
shuf = np.random.permutation(g)
for i in range(500): 
    file_and_dir = test_regex.findall(shuf[i])[0] ##Kinda Hacky
    #print(shuf[i], DATA_HOME_DIR+'valid/' + file_and_dir)
    os.rename(shuf[i], DATA_HOME_DIR+'sample/' + file_and_dir)

In [ ]: