In [13]:
GPU_NUMBER = 3
root = '/data/vision/torralba/health-habits/other/enes/'

%matplotlib inline
import matplotlib.pyplot as plt

import os
import sys
import random
import json
import math
import fnmatch
import os
sys.path.append( root + 'Utils/')

import pandas as pd
import numpy as np
import tensorflow as tf

from PIL import Image
from IPython.display import display
from pprint import pprint
from notebook_utils import *
from skimage import color, io

In [14]:
matches = []
for root, dirnames, filenames in os.walk('/data/vision/torralba/yusuf/imagenet/data/images/train256/'):
  write(root)
  for filename in fnmatch.filter(filenames, '*.JPEG'):
    matches.append(os.path.join(root, filename))


 /data/vision/torralba/yusuf/imagenet/data/images/train256/n03804744

In [15]:
len(matches)


Out[15]:
1281146

In [17]:
with open('all_paths.txt', 'w') as f:
  for match in matches:
    f.write( match + '\n' )

In [36]:
def check_grayscale(path):
  img = io.imread( path )
  if len(img.shape) < 3: 
    return False
  img = color.rgb2lab(img)
  return np.sum( abs(img[:,:,1:3]) > 5 ) > 0

In [ ]:
color_paths = []

for i in range(len(matches)):
  path = matches[i]
  write(i)
  if check_grayscale(path):
    color_paths.append( path )


 1584

In [39]:
with open('color_paths.txt', 'w') as f:
  for path in color_paths:
    f.write( path + '\n' )

In [38]:
print 'asdf'


asdf

In [40]:
print len(color_paths)


915345

In [42]:
print len(matches)


1281146

In [43]:
len(matches) - len(color_paths)


Out[43]:
365801

In [ ]: