In [94]:
import tensorflow as tf
import numpy as np
import pandas as pd
from IPython.display import display,Image
from scipy import ndimage
import cPickle as pickle
import os
import matplotlib.pyplot as plt
%matplotlib inline 
plt.rcParams['figure.figsize'] = (10.0, 20.0)

In [18]:
#pd.read_csv("../data/trainLabels.csv")
for image in os.listdir("../data/trainResized/"):
    name = image[:-4]
    print type(name),int(name)
    break


<type 'str'> 1

In [40]:
image_size = 20
pixel_depth = 255.0

def load(folder, num_images, attr):
    dataset = np.zeros(
        shape=(num_images, image_size, image_size, 3), dtype=np.float32)
    for image in os.listdir(folder):
        #print image
        image_file = os.path.join(folder,image)
        image_data = (ndimage.imread(image_file).astype(float) - pixel_depth/2)/pixel_depth
        if image_data.shape != (image_size,image_size,3):
            print image,("Unexpected image shape: %s" % str(image_data.shape) )
            continue
        if attr == "train":            
            index = int(image[:-4])-1
        else:
            index = int(image[:-4]) - 6283 - 1
        dataset[index,:,:,:] = image_data
    return dataset

#labels = np.ndarray(shape=(num_images), dtype=np.int32) 
train_dataset = load("../data/trainResized/",6283,"train")
test_dataset = load("../data/testResized/",6220,"test")


2290.Bmp Unexpected image shape: (20, 20)
284.Bmp Unexpected image shape: (20, 20)
3136.Bmp Unexpected image shape: (20, 20)
12150.Bmp Unexpected image shape: (20, 20)
7318.Bmp Unexpected image shape: (20, 20)

In [45]:
id_class = pd.read_csv("../data/trainLabels.csv")
#id_class["id"]

In [116]:
train_labels[train_labels == 20]


Out[116]:
21     20
57     20
100    20
124    20
164    20
181    20
274    20
347    20
352    20
356    20
437    20
519    20
520    20
550    20
568    20
...
5933    20
5951    20
5952    20
5964    20
5981    20
5983    20
5988    20
6020    20
6046    20
6052    20
6060    20
6081    20
6105    20
6148    20
6159    20
Name: Class, Length: 165, dtype: int64

In [126]:
def transform2number(label):
    lb = ord(label) - ord("0")
    if lb > 57 and lb < 97:
        lb -= 7
    elif lb >= 97:
        lb -= 13
    return lb
train_labels = id_class["Class"].apply(transform2number)

In [127]:
train_labels[train_labels > 61]
#train_labels


Out[127]:
0      62
13     69
14     68
23     69
24     71
39     66
50     63
71     63
73     71
84     66
93     66
98     66
109    62
125    63
131    66
...
6221    63
6225    63
6229    62
6230    64
6236    62
6239    74
6241    63
6245    68
6248    67
6251    62
6252    66
6266    65
6268    63
6270    62
6271    68
Name: Class, Length: 810, dtype: int64

In [128]:
train_labels.max()


Out[128]:
74

In [124]:
for label in range(0,10):
    idxs = np.random.choice(np.flatnonzero(train_labels == label),9,replace=False)
    for i,idx in enumerate(idxs):
        pos = i*10+1+label
        plt.subplot(9,10,pos)
        plt.imshow(train_dataset[idx,])
        plt.axis("off")
plt.show()



In [ ]: