In [1]:
import string
import pandas as pd
import numpy as np
from numpy.random import shuffle
#import skimage.io import imread
from scipy.misc import imread
import tensorflow as tf

tf.app.flags.DEFINE_boolean("debug", True, "for debug models")
tf.app.flags.DEFINE_boolean("use_fp16", False, "data type")
FLAGS = tf.app.flags.FLAGS


data_dir = "/home/guo/haplox/Github/first_step_with_julia_kaggle/data/data"
image_size = 400
labels = string.digits + string.lowercase + string.uppercase
label_dict = {l:i for (i,l) in enumerate(labels)}
num_class = len(labels) # 62

def data_type():
    if FLAGS.use_fp16:
        return tf.float16
    else:
        return tf.float32

def read_data(type_data, labels_info, path):
    x = np.zeros((labels_info.shape[0], 20, 20, 3)) # 20 x 20 x 3 image
    for (index, id_image) in enumerate(labels_info["ID"]):
        file_name = "{0}/{1}Resized/{2}.Bmp".format(path, type_data, id_image)
        img = imread(file_name)
        try:
            x[index,:] = img
        except:
            print(file_name)
    return x


# Read train matrix
labels_info_train = pd.read_csv("{0}/trainLabels.csv".format(data_dir))
xTrain = read_data("train", labels_info_train, data_dir)

# Read test matrix
labels_info_test = pd.read_csv("{0}/sampleSubmission.csv".format(data_dir))
xTest  = read_data("test", labels_info_test, data_dir)

yTrain = map(lambda x:label_dict[x], labels_info_train["Class"])
print(yTrain[1:10])
yTrain = np.array(yTrain)
print(map(lambda x: labels[x], yTrain[1:10]))

print(labels_info_test.head())
### shuffle
print(xTrain.shape)
print(yTrain.shape)



#if __name__ == "__main__":
#    tf.app.run()


/home/guo/haplox/Github/first_step_with_julia_kaggle/data/data/trainResized/284.Bmp
/home/guo/haplox/Github/first_step_with_julia_kaggle/data/data/trainResized/2290.Bmp
/home/guo/haplox/Github/first_step_with_julia_kaggle/data/data/trainResized/3136.Bmp
/home/guo/haplox/Github/first_step_with_julia_kaggle/data/data/testResized/7318.Bmp
/home/guo/haplox/Github/first_step_with_julia_kaggle/data/data/testResized/12150.Bmp
[8, 55, 44, 53, 58, 47, 21, 46, 42]
['8', 'T', 'I', 'R', 'W', 'L', 'l', 'K', 'G']
     ID Class
0  6284     A
1  6285     A
2  6286     A
3  6287     A
4  6288     A
(6283, 20, 20, 3)
(6283,)

In [2]:
xTrain.shape


Out[2]:
(6283, 20, 20, 3)

In [5]:
xTrain[0],yTrain[0]


Out[5]:
(array([[[ 148.,   24.,   33.],
         [ 152.,   25.,   31.],
         [ 169.,   52.,   68.],
         ..., 
         [ 152.,   20.,   30.],
         [ 154.,   23.,   28.],
         [ 151.,   26.,   30.]],
 
        [[ 154.,   25.,   32.],
         [ 152.,   27.,   33.],
         [ 153.,   26.,   32.],
         ..., 
         [ 151.,   27.,   33.],
         [ 148.,   28.,   33.],
         [ 153.,   28.,   34.]],
 
        [[ 152.,   27.,   31.],
         [ 155.,   26.,   32.],
         [ 153.,   23.,   35.],
         ..., 
         [ 154.,   30.,   33.],
         [ 157.,   29.,   36.],
         [ 154.,   29.,   33.]],
 
        ..., 
        [[  52.,   28.,   31.],
         [ 105.,   22.,   22.],
         [  88.,   26.,   41.],
         ..., 
         [  54.,   22.,   30.],
         [  55.,   22.,   34.],
         [ 150.,   32.,   29.]],
 
        [[ 156.,   35.,   40.],
         [ 156.,   34.,   32.],
         [ 160.,   35.,   36.],
         ..., 
         [ 153.,   30.,   29.],
         [ 157.,   35.,   33.],
         [ 155.,   30.,   28.]],
 
        [[ 161.,   35.,   38.],
         [ 161.,   31.,   35.],
         [ 164.,   40.,   38.],
         ..., 
         [ 159.,   33.,   34.],
         [ 159.,   29.,   39.],
         [ 158.,   34.,   36.]]]), 23)

In [6]:
xTrain.shape


Out[6]:
(6283, 20, 20, 3)

In [ ]: