Unpack the TRAIN data into 50k PNG files

  • And the smaller holdout set

In [41]:
import gzip
import png

In [117]:
height = 100
width = 36

In [43]:
idx = 0
with gzip.open('../TRAIN/images.raw.gz') as f:
    while True:
        # Read record and make it the right shape
        buffer = f.read(width*height)
    
        if len(buffer) != (width*height):
            print('EOF')
            break
        data = np.frombuffer(buffer, dtype='B', count=width*height)
        pixels = np.reshape(data, [height, width])

        # Image name
        fname = '../TRAIN/img{:06}.png'.format(idx) 

        # Create image file
        png.fromarray(pixels, 'L').save(fname)
        
        idx = idx + 1


File truncated!

In [119]:
idx = 0
with gzip.open('../VALIDATION/images.raw.gz') as f:
    while True:
        # Read record and make it the right shape
        buffer = f.read(width*height)
    
        if len(buffer) != (width*height):
            print('EOF')
            break
        data = np.frombuffer(buffer, dtype='B', count=width*height)
        pixels = np.reshape(data, [height, width])

        # Image name
        fname = '../VALIDATION/img{:06}.png'.format(idx) 

        # Create image file
        png.fromarray(pixels, 'L').save(fname)
        
        idx = idx + 1


EOF