Here is my attempt to write a model based on an old Kaggle Cats vs Dog dataset. The dataset can be seen at the links shown and has 25,000 images of cats and dogs.

https://www.kaggle.com/c/dogs-vs-cats/data


In [118]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from subprocess import check_output
print(check_output(["ls", "../koyel/input"]).decode("utf8"))

# Any results you write to the current directory are saved as output.


test
train


In [120]:
train_path = "../koyel/input/train"
test_path = "../koyel/input/test"

ROWS = 64
COLS = 64
CHANNELS = 3

images      = [img for img in os.listdir(train_path)]
images_dog  = [img for img in os.listdir(train_path) if "dog" in img]
images_cat  = [img for img in os.listdir(train_path) if "cat" in img]

images_test = [img for img in os.listdir(test_path)]

In [121]:
#only taking a subset (less accuracy but faster training)
train_dog = images_dog[:1000]
train_cat = images_cat[:1000]
valid_dog = images_dog[1000:1100]
valid_cat = images_cat[1000:1100]

train_list = train_dog + train_cat
valid_list = valid_dog + valid_cat
test_list  = images_test[0:]

shuffle(train_list)

train = np.ndarray(shape=(len(train_list),ROWS, COLS))
train_color = np.ndarray(shape=(len(train_list), ROWS, COLS, CHANNELS), dtype=np.uint8)
test = np.ndarray(shape=(len(test_list),ROWS, COLS))
test_color = np.ndarray(shape=(len(images_test), ROWS, COLS, CHANNELS), dtype=np.uint8)
valid = np.ndarray(shape=(len(valid_list), ROWS, COLS))
valid_color = np.ndarray(shape=(len(valid_list), ROWS, COLS, CHANNELS), dtype=np.uint8)

In [122]:
labels = np.ndarray(len(train_list))

for i, img_path in enumerate(train_list):
    img_color = cv2.imread(os.path.join(train_path, img_path), 1)
    img_color = cv2.resize(img_color, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
    img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
    
    train[i] = img
    train_color[i] = img_color
   
    if "dog" in img_path:
        labels[i] = 0
    else:
        labels[i] = 1

In [123]:
valid_labels = np.ndarray(len(valid_list))

for i, img_path in enumerate(valid_list):
    img_color = cv2.imread(os.path.join(train_path, img_path), 1)
    img_color = cv2.resize(img_color, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
    img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
    
    valid[i] = img
    valid_color[i] = img_color
    
    if "dog" in img_path:
        valid_labels[i] = 0
    else:
        valid_labels[i] = 1

In [124]:
for i, img_path in enumerate(test_list):
    img_color = cv2.imread(os.path.join(test_path, img_path), 1)
    img_color = cv2.resize(img_color, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
    img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
    
    test[i] = img
    test_color[i] = img_color

In [125]:
for i, img_path in enumerate(test_list):
    img_color = cv2.imread(os.path.join(test_path, img_path), 1)
    img_color = cv2.resize(img_color, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
    img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
    
    test[i] = img
    test_color[i] = img_color

In [126]:
from keras.utils import np_utils

X_train = train_color / 255
X_valid = valid_color / 255
X_test  = test_color  / 255
# one hot encode outputs
y_train = np_utils.to_categorical(labels)
y_valid = np_utils.to_categorical(valid_labels)
num_classes = y_valid.shape[1]

In [127]:
def larger_model():
	# create model
	model = Sequential()
	model.add(Convolution2D(30, 5, 5, border_mode='valid', input_shape=(64, 64, 3), activation='relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))
	model.add(Convolution2D(15, 3, 3, activation='relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))
	model.add(Dropout(0.2))
	model.add(Flatten())
	model.add(Dense(128, activation='relu'))
	model.add(Dense(50, activation='relu'))
	model.add(Dense(num_classes, activation='softmax'))
	# Compile model
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [131]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Convolution2D
from keras.layers.convolutional import MaxPooling2D
# build the model
model = larger_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_valid, y_valid), nb_epoch=50, batch_size=200, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_valid, y_valid, verbose=0)
print("Classification Error: %.2f%%" % (100-scores[1]*100))


C:\Users\koyel\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\__main__.py:4: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(30, (5, 5), padding="valid", activation="relu", input_shape=(64, 64, 3...)`
C:\Users\koyel\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\__main__.py:6: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(15, (3, 3), activation="relu")`
C:\Users\koyel\Anaconda3\envs\tensorflow\lib\site-packages\keras\models.py:834: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.
  warnings.warn('The `nb_epoch` argument in `fit` '
Train on 2000 samples, validate on 200 samples
Epoch 1/50
17s - loss: 0.6880 - acc: 0.5305 - val_loss: 0.6951 - val_acc: 0.5300
Epoch 2/50
15s - loss: 0.6550 - acc: 0.5965 - val_loss: 0.6753 - val_acc: 0.5950
Epoch 3/50
15s - loss: 0.6291 - acc: 0.6350 - val_loss: 0.6654 - val_acc: 0.6100
Epoch 4/50
15s - loss: 0.5949 - acc: 0.6810 - val_loss: 0.6738 - val_acc: 0.6000
Epoch 5/50
16s - loss: 0.5766 - acc: 0.6895 - val_loss: 0.6506 - val_acc: 0.6600
Epoch 6/50
15s - loss: 0.5469 - acc: 0.7250 - val_loss: 0.6362 - val_acc: 0.6750
Epoch 7/50
15s - loss: 0.5470 - acc: 0.7120 - val_loss: 0.6346 - val_acc: 0.6600
Epoch 8/50
17s - loss: 0.4941 - acc: 0.7510 - val_loss: 0.6497 - val_acc: 0.6650
Epoch 9/50
16s - loss: 0.4818 - acc: 0.7645 - val_loss: 0.6636 - val_acc: 0.6500
Epoch 10/50
16s - loss: 0.4712 - acc: 0.7720 - val_loss: 0.6523 - val_acc: 0.6550
Epoch 11/50
16s - loss: 0.4228 - acc: 0.8075 - val_loss: 0.6619 - val_acc: 0.6600
Epoch 12/50
18s - loss: 0.3935 - acc: 0.8200 - val_loss: 0.7350 - val_acc: 0.6350
Epoch 13/50
17s - loss: 0.4011 - acc: 0.8135 - val_loss: 0.6878 - val_acc: 0.6550
Epoch 14/50
15s - loss: 0.3705 - acc: 0.8305 - val_loss: 0.7621 - val_acc: 0.6600
Epoch 15/50
18s - loss: 0.3594 - acc: 0.8345 - val_loss: 0.7160 - val_acc: 0.6550
Epoch 16/50
18s - loss: 0.3115 - acc: 0.8710 - val_loss: 0.7511 - val_acc: 0.6400
Epoch 17/50
15s - loss: 0.2924 - acc: 0.8735 - val_loss: 0.7979 - val_acc: 0.6450
Epoch 18/50
15s - loss: 0.2703 - acc: 0.8830 - val_loss: 0.8522 - val_acc: 0.6550
Epoch 19/50
14s - loss: 0.2470 - acc: 0.8945 - val_loss: 0.8409 - val_acc: 0.6450
Epoch 20/50
14s - loss: 0.2461 - acc: 0.8935 - val_loss: 0.8261 - val_acc: 0.6200
Epoch 21/50
15s - loss: 0.2173 - acc: 0.9210 - val_loss: 0.8745 - val_acc: 0.6550
Epoch 22/50
15s - loss: 0.1854 - acc: 0.9240 - val_loss: 0.9711 - val_acc: 0.6200
Epoch 23/50
15s - loss: 0.1695 - acc: 0.9375 - val_loss: 0.9895 - val_acc: 0.6000
Epoch 24/50
15s - loss: 0.1562 - acc: 0.9435 - val_loss: 1.0087 - val_acc: 0.6400
Epoch 25/50
15s - loss: 0.1281 - acc: 0.9590 - val_loss: 1.0472 - val_acc: 0.6300
Epoch 26/50
15s - loss: 0.1126 - acc: 0.9595 - val_loss: 1.0362 - val_acc: 0.6450
Epoch 27/50
14s - loss: 0.1023 - acc: 0.9670 - val_loss: 1.1378 - val_acc: 0.6250
Epoch 28/50
15s - loss: 0.0889 - acc: 0.9710 - val_loss: 1.1879 - val_acc: 0.6400
Epoch 29/50
14s - loss: 0.0720 - acc: 0.9795 - val_loss: 1.2377 - val_acc: 0.6450
Epoch 30/50
14s - loss: 0.0778 - acc: 0.9740 - val_loss: 1.3014 - val_acc: 0.6200
Epoch 31/50
14s - loss: 0.0849 - acc: 0.9715 - val_loss: 1.3661 - val_acc: 0.6500
Epoch 32/50
15s - loss: 0.0646 - acc: 0.9820 - val_loss: 1.3406 - val_acc: 0.6100
Epoch 33/50
15s - loss: 0.0593 - acc: 0.9815 - val_loss: 1.3681 - val_acc: 0.6400
Epoch 34/50
15s - loss: 0.0422 - acc: 0.9900 - val_loss: 1.3316 - val_acc: 0.6150
Epoch 35/50
15s - loss: 0.0349 - acc: 0.9920 - val_loss: 1.4584 - val_acc: 0.6150
Epoch 36/50
17s - loss: 0.0427 - acc: 0.9900 - val_loss: 1.3957 - val_acc: 0.6000
Epoch 37/50
15s - loss: 0.0341 - acc: 0.9930 - val_loss: 1.4892 - val_acc: 0.6500
Epoch 38/50
15s - loss: 0.0517 - acc: 0.9815 - val_loss: 1.5743 - val_acc: 0.6800
Epoch 39/50
14s - loss: 0.0533 - acc: 0.9845 - val_loss: 1.3040 - val_acc: 0.6450
Epoch 40/50
14s - loss: 0.0309 - acc: 0.9950 - val_loss: 1.4560 - val_acc: 0.6600
Epoch 41/50
14s - loss: 0.0280 - acc: 0.9915 - val_loss: 1.5658 - val_acc: 0.6500
Epoch 42/50
15s - loss: 0.0224 - acc: 0.9965 - val_loss: 1.5555 - val_acc: 0.6250
Epoch 43/50
14s - loss: 0.0199 - acc: 0.9950 - val_loss: 1.5634 - val_acc: 0.6650
Epoch 44/50
14s - loss: 0.0188 - acc: 0.9940 - val_loss: 1.5320 - val_acc: 0.6350
Epoch 45/50
14s - loss: 0.0217 - acc: 0.9955 - val_loss: 1.6760 - val_acc: 0.6500
Epoch 46/50
15s - loss: 0.0131 - acc: 0.9985 - val_loss: 1.6250 - val_acc: 0.6400
Epoch 47/50
16s - loss: 0.0144 - acc: 0.9975 - val_loss: 1.6693 - val_acc: 0.6550
Epoch 48/50
15s - loss: 0.0189 - acc: 0.9940 - val_loss: 1.9025 - val_acc: 0.6800
Epoch 49/50
15s - loss: 0.0169 - acc: 0.9970 - val_loss: 1.6281 - val_acc: 0.6550
Epoch 50/50
15s - loss: 0.0132 - acc: 0.9975 - val_loss: 1.6650 - val_acc: 0.6650
Classification Error: 33.50%

In [132]:
submission = model.predict_classes(X_test, verbose=2)

In [133]:
pd.DataFrame({"id": list(range(1,len(test_color)+1)), 
              "label": submission}).to_csv('submission.csv', index=False,header=True)

In [ ]:


In [ ]: