In [31]:
import tensorflow as tf
from scipy import misc
import numpy as np    # for labels reading
import matplotlib.pyplot as plt
import glob
import pandas as pd

%matplotlib inline

In [4]:
def read_img(path):
    """Read all images into array"""
    img = []
    for image_path in glob.glob(path):
        img.append(misc.imread(image_path))
    
    return img

In [61]:
def read_labels(path):
    """Read labels for images"""
    df=pd.read_csv(path, sep=',',header=0)
    one_hot = pd.get_dummies(df.label)
    lbl = one_hot.as_matrix(columns=['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog'
                                     , 'frog', 'horse', 'ship', 'truck'])
    
    return lbl

In [ ]:
class DataSet(object):
    
    def __init__(self, images, labels):
        self.images
        self.labels

In [5]:
# Read data from folder
data = read_img('../../cifrar_kagle/train/*.png')

In [6]:
# Confirm array size
len(data)


Out[6]:
50000

In [56]:
# Plot one picture from dataset
plt.imshow(data[2])


Out[56]:
<matplotlib.image.AxesImage at 0x7fc4696701d0>

In [27]:
labels = np.genfromtxt('../../cifrar_kagle/trainLabels.csv',delimiter=',')

In [35]:
df=pd.read_csv('../../cifrar_kagle/trainLabels.csv', sep=',',header=0)
df.head(5)


Out[35]:
id label
0 1 frog
1 2 truck
2 3 truck
3 4 deer
4 5 automobile

In [40]:
one_hot = pd.get_dummies(df.label)

In [44]:
one_hot.head()


Out[44]:
airplane automobile bird cat deer dog frog horse ship truck
0 0 0 0 0 0 0 1 0 0 0
1 0 0 0 0 0 0 0 0 0 1
2 0 0 0 0 0 0 0 0 0 1
3 0 0 0 0 1 0 0 0 0 0
4 0 1 0 0 0 0 0 0 0 0

In [50]:
lbl = one_hot.as_matrix(columns=['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'])

In [51]:
lbl.shape


Out[51]:
(50000, 10)

In [60]:
len(data[1][1][1])


Out[60]:
3

In [ ]: