In [1]:
import os
import itertools
from PIL import Image
import hashlib
import mmh3

In [15]:
def iter_image_bytes(filepath):
    #image_bytes = bytearray(iter_image_bytes(jpeg_filepath))
    image = Image.open(filepath)
    for r, g, b in image.getdata():
        yield r
        yield g
        yield b

In [16]:
def hash_jpeg(jpeg_filepath):
    jpeg = Image.open(jpeg_filepath)
    image_bytes = jpeg.tostring()
    md5sum = hashlib.md5()
    md5sum.update(image_bytes)
    return md5sum.hexdigest()

In [ ]:
def hash_jpeg_2(jpeg_filepath):

    image_bytes = bytearray(iter_image_bytes(jpeg_filepath))
    mmh3.hash()
    md5sum.update(image_bytes)
    return md5sum.hexdigest()

In [4]:
local_directory = '/Users/chbrown/Desktop/iPhone/100APPLE'
local_filepaths = [os.path.join(local_directory, filename)
                   for filename in os.listdir(local_directory)
                   if filename.lower().endswith('jpg')]

In [17]:
hash_jpeg('/Users/chbrown/Desktop/2014-03-28 15.50.56.jpg')


Out[17]:
'49e23c859c7a276764d370c0632d07ab'

In [18]:
hash_jpeg('/Users/chbrown/Desktop/IMG_1278.JPG')


Out[18]:
'5d0e1d753c43ccbb0badce1a3c082dc6'

In [14]:
hash_jpeg(local_filepaths[8])


Out[14]:
'c17f4d25d1bc00919acd76d87a15c722'

In [6]:
jpeg_filepath = local_filepaths[2]
byte_iter = iter_image_bytes(jpeg_filepath)
byte_arr = bytearray(byte_iter)

In [8]:
pixels = jpeg.load()

In [11]:
jpeg.size


Out[11]:
(3264, 2448)

In [14]:
[pixel for pixel in pixels]


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-14-f90bf3609204> in <module>()
----> 1 [pixel for pixel in pixels]

TypeError: 'PixelAccess' object is not iterable

In [22]:
pixels

In [ ]: