In [1]:
import numpy as np
import pandas as pd
import os
import cv2
from PIL import Image
from scipy.misc import imread
import matplotlib.pyplot as plt
import skimage.feature
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Lambda, Cropping2D
from keras.utils import np_utils

from collections import Counter

from keras.models import load_model

import datetime

%matplotlib inline


Using TensorFlow backend.

In [9]:
class_names = ['adult_females', 'adult_males', 'juveniles', 'pups', 'subadult_males']

my_dir = "/Volumes/dax/seals/Kaggle-NOAA-SeaLions/"

In [10]:
blacklist_fin = open(my_dir + 'MismatchedTrainImages.txt')

blacklist_ws = blacklist_fin.readlines()
blacklist = []
for i in blacklist_ws:
    blacklist.append(i.strip() + '.jpg')
    
blacklist.append('train.csv')

#print(blacklist[:5])

In [11]:
file_names = os.listdir(my_dir + "Train/")
file_names = sorted(file_names, key=lambda 
                    item: (int(item.partition('.')[0]) if item[0].isdigit() else float('inf'), item)) 

# select a subset of files to run on
file_names = file_names[0:1]

# dataframe to store results in
coordinates_df = pd.DataFrame(index=file_names, columns=class_names)

#print(file_names[:])

In [12]:
for filename in file_names:
    if filename in blacklist:
        file_names.remove(filename)
    else:
        # read the Train and Train Dotted images
        image_1 = cv2.imread(my_dir + "/TrainDotted/" + filename)
        image_2 = cv2.imread(my_dir + "/Train/" + filename)

        cut = np.copy(image_2)

        # absolute difference between Train and Train Dotted
        image_3 = cv2.absdiff(image_1,image_2)

    # mask out blackened regions from Train Dotted
        mask_1 = cv2.cvtColor(image_1, cv2.COLOR_BGR2GRAY)
        mask_1[mask_1 < 20] = 0
        mask_1[mask_1 > 0] = 255

        mask_2 = cv2.cvtColor(image_2, cv2.COLOR_BGR2GRAY)
        mask_2[mask_2 < 20] = 0
        mask_2[mask_2 > 0] = 255

        image_3 = cv2.bitwise_or(image_3, image_3, mask=mask_1)
        image_3 = cv2.bitwise_or(image_3, image_3, mask=mask_2) 

        # convert to grayscale to be accepted by skimage.feature.blob_log
        image_3 = cv2.cvtColor(image_3, cv2.COLOR_BGR2GRAY)

        # detect blobs
        blobs = skimage.feature.blob_log(image_3, min_sigma=3, max_sigma=4, num_sigma=1, threshold=0.02)

        adult_males = []
        subadult_males = []
        pups = []
        juveniles = []
        adult_females = [] 

        image_circles = image_1

        for blob in blobs:
            # get the coordinates for each blob
            y, x, s = blob
            # get the color of the pixel from Train Dotted in the center of the blob
            g,b,r = image_1[int(y)][int(x)][:]

            # decision tree to pick the class of the blob by looking at the color in Train Dotted
            if r > 200 and g < 50 and b < 50: # RED
                adult_males.append((int(x),int(y)))
                cv2.circle(image_circles, (int(x),int(y)), 20, (0,0,255), 10) 
            elif r > 200 and g > 200 and b < 50: # MAGENTA
                subadult_males.append((int(x),int(y))) 
                cv2.circle(image_circles, (int(x),int(y)), 20, (250,10,250), 10)
            elif r < 100 and g < 100 and 150 < b < 200: # GREEN
                pups.append((int(x),int(y)))
                cv2.circle(image_circles, (int(x),int(y)), 20, (20,180,35), 10)
            elif r < 100 and  100 < g and b < 100: # BLUE
                juveniles.append((int(x),int(y))) 
                cv2.circle(image_circles, (int(x),int(y)), 20, (180,60,30), 10)
            elif r < 150 and g < 50 and b < 100:  # BROWN
                adult_females.append((int(x),int(y)))
                cv2.circle(image_circles, (int(x),int(y)), 20, (0,42,84), 10)  

            cv2.rectangle(cut, (int(x)-112,int(y)-112),(int(x)+112,int(y)+112), 0,-1)

        coordinates_df["adult_males"][filename] = adult_males
        coordinates_df["subadult_males"][filename] = subadult_males
        coordinates_df["adult_females"][filename] = adult_females
        coordinates_df["juveniles"][filename] = juveniles
        coordinates_df["pups"][filename] = pups
        
%time


CPU times: user 5 µs, sys: 11 µs, total: 16 µs
Wall time: 20 µs

In [13]:
x = []
y = []

for filename in file_names:    
    image = cv2.imread(my_dir + "/Train/" + filename)
    for lion_class in class_names:
        try:
            for coordinates in coordinates_df[lion_class][filename]:
                thumb = image[coordinates[1]-32:coordinates[1]+32,coordinates[0]-32:coordinates[0]+32,:]
                if np.shape(thumb) == (64, 64, 3):
                    x.append(thumb)
                    y.append(lion_class)
        except:
            pass

In [14]:
for i in range(0,np.shape(cut)[0],224):
    for j in range(0,np.shape(cut)[1],224):                
        thumb = cut[i:i+64,j:j+64,:]
        if np.amin(cv2.cvtColor(thumb, cv2.COLOR_BGR2GRAY)) != 0:
            if np.shape(thumb) == (64,64,3):
                x.append(thumb)
                y.append("negative")

In [15]:
class_names.append("negative")

In [16]:
x = np.array(x)
y = np.array(y)

In [17]:
encoder = LabelBinarizer()
encoder.fit(y)
y = encoder.transform(y).astype(float)

In [18]:
my_model = '2017-06-23_model.h5'#what is the model file named?

model = load_model(my_dir + my_model)

In [19]:
test_file_names = os.listdir(my_dir + "Test/")
test_file_names = sorted(test_file_names, key=lambda 
                    item: (int(item.partition('.')[0]) if item[0].isdigit() else float('inf'), item)) 

# select a subset of files to run on
#test_file_names = test_file_names[0:7]

# dataframe to store results in
test_coordinates_df = pd.DataFrame(0,index=test_file_names, columns=class_names)

#print(test_file_names[:5])
#print(test_coordinates_df)

In [21]:
for filename in test_file_names:
    file_int = int(filename[:-4])
    current_time = datetime.datetime.now().time().isoformat()[:5]
    if file_int%500 == 0:
        print('completed %d images at %s' % (file_int, current_time))
        
    img = cv2.imread(my_dir + "Test/"  + filename)

    x_test = []

    for i in range(0,np.shape(img)[0],64):
        for j in range(0,np.shape(img)[1],64):                
            thumb = img[i:i+64,j:j+64,:]        
            if np.shape(thumb) == (64,64,3):
                x_test.append(thumb)

    x_test = np.array(x_test)

    y_predicted = model.predict(x_test, verbose=0)

    y_predicted = encoder.inverse_transform(y_predicted)

    the_counter = Counter(y_predicted)
    
    #print(the_counter)
    
    for key in the_counter:
        test_coordinates_df.set_value(index = filename, col = key, value = the_counter[key])
        
%time


completed 0 images at 09:53
completed 2 images at 09:57
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-21-377d9138e03d> in <module>()
     17     x_test = np.array(x_test)
     18 
---> 19     y_predicted = model.predict(x_test, verbose=0)
     20 
     21     y_predicted = encoder.inverse_transform(y_predicted)

/anaconda/lib/python3.5/site-packages/keras/models.py in predict(self, x, batch_size, verbose)
    889         if self.model is None:
    890             self.build()
--> 891         return self.model.predict(x, batch_size=batch_size, verbose=verbose)
    892 
    893     def predict_on_batch(self, x):

/anaconda/lib/python3.5/site-packages/keras/engine/training.py in predict(self, x, batch_size, verbose)
   1570         f = self.predict_function
   1571         return self._predict_loop(f, ins,
-> 1572                                   batch_size=batch_size, verbose=verbose)
   1573 
   1574     def train_on_batch(self, x, y,

/anaconda/lib/python3.5/site-packages/keras/engine/training.py in _predict_loop(self, f, ins, batch_size, verbose)
   1200                 ins_batch = _slice_arrays(ins, batch_ids)
   1201 
-> 1202             batch_outs = f(ins_batch)
   1203             if not isinstance(batch_outs, list):
   1204                 batch_outs = [batch_outs]

/anaconda/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
   2071         session = get_session()
   2072         updated = session.run(self.outputs + [self.updates_op],
-> 2073                               feed_dict=feed_dict)
   2074         return updated[:len(self.outputs)]
   2075 

/anaconda/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    765     try:
    766       result = self._run(None, fetches, feed_dict, options_ptr,
--> 767                          run_metadata_ptr)
    768       if run_metadata:
    769         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/anaconda/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    963     if final_fetches or final_targets:
    964       results = self._do_run(handle, final_targets, final_fetches,
--> 965                              feed_dict_string, options, run_metadata)
    966     else:
    967       results = []

/anaconda/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1013     if handle is None:
   1014       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1015                            target_list, options, run_metadata)
   1016     else:
   1017       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/anaconda/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1020   def _do_call(self, fn, *args):
   1021     try:
-> 1022       return fn(*args)
   1023     except errors.OpError as e:
   1024       message = compat.as_text(e.message)

/anaconda/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1002         return tf_session.TF_Run(session, options,
   1003                                  feed_dict, fetch_list, target_list,
-> 1004                                  status, run_metadata)
   1005 
   1006     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

In [14]:
protect_df = test_coordinates_df
#print(test_coordinates_df)

del test_coordinates_df['negative']
test_coordinates_df = test_coordinates_df[['adult_males', 'subadult_males', 'adult_females', 'juveniles', 'pups']]
print(test_coordinates_df)


           adult_males  subadult_males  adult_females  juveniles  pups
0.jpg                6               3             62         28   117
1.jpg               38              10            199         95   534
2.jpg              324              27            821        401   543
3.jpg               69               8            201        157   926
4.jpg              215              36            441        235   711
5.jpg              116              11            250        139  1093
6.jpg               46               5            337        110  1429
7.jpg               15               2            102         85   802
8.jpg               24               4            160         96   538
9.jpg              212              15           1164        303  2154
10.jpg              48               5            190         97   840
11.jpg              13               1            188        227  1780
12.jpg              87              20            498        146   628
13.jpg              57               1             45         29   885
14.jpg             456              23            813        364  1144
15.jpg              69              18            263        109   168
16.jpg             106              10            239        132   517
17.jpg             213              38            997        303  1119
18.jpg              59              11            327         91   863
19.jpg             104               8            198        117   488
20.jpg              50               5            124         79  1258
21.jpg              40               5            150         91   216
22.jpg              53               8            255        113   364
23.jpg              77              11            306        166   580
24.jpg             481              60            509        310   237
25.jpg              51              14            113         91   321
26.jpg              49               5            226        129   859
27.jpg              51               8            295        230  1402
28.jpg              41               1             95        133  1024
29.jpg              75               5            190         96   256
...                ...             ...            ...        ...   ...
18606.jpg            0               0              0          0     0
18607.jpg            0               0              0          0     0
18608.jpg            0               0              0          0     0
18609.jpg            0               0              0          0     0
18610.jpg            0               0              0          0     0
18611.jpg            0               0              0          0     0
18612.jpg            0               0              0          0     0
18613.jpg            0               0              0          0     0
18614.jpg            0               0              0          0     0
18615.jpg            0               0              0          0     0
18616.jpg            0               0              0          0     0
18617.jpg            0               0              0          0     0
18618.jpg            0               0              0          0     0
18619.jpg            0               0              0          0     0
18620.jpg            0               0              0          0     0
18621.jpg            0               0              0          0     0
18622.jpg            0               0              0          0     0
18623.jpg            0               0              0          0     0
18624.jpg            0               0              0          0     0
18625.jpg            0               0              0          0     0
18626.jpg            0               0              0          0     0
18627.jpg            0               0              0          0     0
18628.jpg            0               0              0          0     0
18629.jpg            0               0              0          0     0
18630.jpg            0               0              0          0     0
18631.jpg            0               0              0          0     0
18632.jpg            0               0              0          0     0
18633.jpg            0               0              0          0     0
18634.jpg            0               0              0          0     0
18635.jpg            0               0              0          0     0

[18636 rows x 5 columns]

In [15]:
test_coordinates_df.to_csv(my_dir + datetime.date.today().isoformat() + '_submission.csv')

In [ ]: