In [1]:
!pip install six pypng pillow


Requirement already satisfied: six in /usr/lib/python2.7/site-packages (1.11.0)
Requirement already satisfied: pypng in /usr/lib/python2.7/site-packages (0.0.18)
Requirement already satisfied: pillow in /usr/lib64/python2.7/site-packages (5.3.0)

In [2]:
import PIL
from PIL import Image
from PIL import ImageFilter, ImageEnhance
import numpy as np
import matplotlib.pyplot as plt
import six
from six.moves import cPickle as pickle
import png
import os

In [3]:
def getDataDictionary(path, file):
    print ("unpickling ....")
    f = open(path+file, 'rb')
    dict = pickle.load(f)
    print ("unpickling ....Done")
    return dict

def extractImages(dict):
    print ("extracting images ....")
    images = dict['data']
    print (images.shape)
    print ("extracting images ....Done")
    return images

def transform(images, new_dim):
    print ("transforming ....")
    bigdata=[]
    for imagedata in images:
        #create png from data
        png.from_array(imagedata.reshape((32, 32, 3), order='F').swapaxes(0,1), mode='RGB').save('tmp.png')
        img = Image.open( 'tmp.png' )
        img.load()
        #resize image
        img = img.resize((new_dim, new_dim), Image.ANTIALIAS)
        img = img.filter( ImageFilter.EDGE_ENHANCE_MORE )
        #get data back from image
        data = np.asarray( img, dtype="int32" )
        #flatten data for storage
        data = data.flatten()
        bigdata.append(data)        
    print ("transforming ....Done")
    return bigdata

In [4]:
new_dim = 100
datapath = "data/CIFAR-10/cifar-10-batches-py/"
trainfiles = ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5']
testfiles = ['test_batch']

for trainfile in trainfiles:
    print ("transforming .... "+trainfile)
    datadict = getDataDictionary(datapath, trainfile)
    images = extractImages(datadict)
    transformeddatalist = transform(images, new_dim)
    datadict["bigdata"] = transformeddatalist
    pickle.dump(datadict, open( datapath+trainfile+"_"+str(new_dim), "wb" ) )
    print ("transforming .... "+trainfile+" ... Done")


transforming .... data_batch_1
unpickling ....
unpickling ....Done
extracting images ....
(10000, 3072)
extracting images ....Done
transforming ....
transforming ....Done
transforming .... data_batch_1 ... Done
transforming .... data_batch_2
unpickling ....
unpickling ....Done
extracting images ....
(10000, 3072)
extracting images ....Done
transforming ....
transforming ....Done
transforming .... data_batch_2 ... Done
transforming .... data_batch_3
unpickling ....
unpickling ....Done
extracting images ....
(10000, 3072)
extracting images ....Done
transforming ....
transforming ....Done
transforming .... data_batch_3 ... Done
transforming .... data_batch_4
unpickling ....
unpickling ....Done
extracting images ....
(10000, 3072)
extracting images ....Done
transforming ....
transforming ....Done
transforming .... data_batch_4 ... Done
transforming .... data_batch_5
unpickling ....
unpickling ....Done
extracting images ....
(10000, 3072)
extracting images ....Done
transforming ....
transforming ....Done
transforming .... data_batch_5 ... Done

In [6]:
for testfile in testfiles:
    print ("transforming .... "+testfile)
    datadict = getDataDictionary(datapath, testfile)
    images = extractImages(datadict)
    transformeddatalist = transform(images, new_dim)
    datadict["bigdata"] = transformeddatalist
    pickle.dump(datadict, open( datapath+testfile+"_"+str(new_dim), "wb" ) )
    print ("transforming .... "+testfile+" ... Done")


transforming .... test_batch
unpickling ....
unpickling ....Done
extracting images ....
(10000, 3072)
extracting images ....Done
transforming ....
transforming ....Done
transforming .... test_batch ... Done

In [5]:
print ("done")


done