In [16]:
import numpy as np
import pandas as pd
import os
import cv2
from PIL import Image
from scipy.misc import imread
import matplotlib.pyplot as plt
import skimage.feature
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Lambda, Cropping2D
from keras.utils import np_utils

from collections import Counter

import datetime

%matplotlib inline

In [17]:
class_names = ['adult_females', 'adult_males', 'juveniles', 'pups', 'subadult_males']

my_dir = "/Volumes/dax/seals/Kaggle-NOAA-SeaLions/"

In [18]:
blacklist_fin = open(my_dir + 'MismatchedTrainImages.txt')

blacklist_ws = blacklist_fin.readlines()
blacklist = []
for i in blacklist_ws:
    blacklist.append(i.strip() + '.jpg')
    
blacklist.append('train.csv')

#print(blacklist[:5])

In [19]:
file_names = os.listdir(my_dir + "Train/")
file_names = sorted(file_names, key=lambda 
                    item: (int(item.partition('.')[0]) if item[0].isdigit() else float('inf'), item)) 

# select a subset of files to run on
file_names = file_names[0:101]

# dataframe to store results in
coordinates_df = pd.DataFrame(index=file_names, columns=class_names)

In [20]:
for filename in file_names:
    file_int = int(filename[:-4])
    if file_int%50 == 0:
        print('completed %d images' % file_int)
    if filename in blacklist:
        file_names.remove(filename)
    else:
        # read the Train and Train Dotted images
        image_1 = cv2.imread(my_dir + "/TrainDotted/" + filename)
        image_2 = cv2.imread(my_dir + "/Train/" + filename)

        cut = np.copy(image_2)

        # absolute difference between Train and Train Dotted
        image_3 = cv2.absdiff(image_1,image_2)

    # mask out blackened regions from Train Dotted
        mask_1 = cv2.cvtColor(image_1, cv2.COLOR_BGR2GRAY)
        mask_1[mask_1 < 20] = 0
        mask_1[mask_1 > 0] = 255

        mask_2 = cv2.cvtColor(image_2, cv2.COLOR_BGR2GRAY)
        mask_2[mask_2 < 20] = 0
        mask_2[mask_2 > 0] = 255

        image_3 = cv2.bitwise_or(image_3, image_3, mask=mask_1)
        image_3 = cv2.bitwise_or(image_3, image_3, mask=mask_2) 

        # convert to grayscale to be accepted by skimage.feature.blob_log
        image_3 = cv2.cvtColor(image_3, cv2.COLOR_BGR2GRAY)

        # detect blobs
        blobs = skimage.feature.blob_log(image_3, min_sigma=3, max_sigma=4, num_sigma=1, threshold=0.02)

        adult_males = []
        subadult_males = []
        pups = []
        juveniles = []
        adult_females = [] 

        image_circles = image_1

        for blob in blobs:
            # get the coordinates for each blob
            y, x, s = blob
            # get the color of the pixel from Train Dotted in the center of the blob
            g,b,r = image_1[int(y)][int(x)][:]

            # decision tree to pick the class of the blob by looking at the color in Train Dotted
            if r > 200 and g < 50 and b < 50: # RED
                adult_males.append((int(x),int(y)))
                cv2.circle(image_circles, (int(x),int(y)), 20, (0,0,255), 10) 
            elif r > 200 and g > 200 and b < 50: # MAGENTA
                subadult_males.append((int(x),int(y))) 
                cv2.circle(image_circles, (int(x),int(y)), 20, (250,10,250), 10)
            elif r < 100 and g < 100 and 150 < b < 200: # GREEN
                pups.append((int(x),int(y)))
                cv2.circle(image_circles, (int(x),int(y)), 20, (20,180,35), 10)
            elif r < 100 and  100 < g and b < 100: # BLUE
                juveniles.append((int(x),int(y))) 
                cv2.circle(image_circles, (int(x),int(y)), 20, (180,60,30), 10)
            elif r < 150 and g < 50 and b < 100:  # BROWN
                adult_females.append((int(x),int(y)))
                cv2.circle(image_circles, (int(x),int(y)), 20, (0,42,84), 10)  

            cv2.rectangle(cut, (int(x)-112,int(y)-112),(int(x)+112,int(y)+112), 0,-1)

        coordinates_df["adult_males"][filename] = adult_males
        coordinates_df["subadult_males"][filename] = subadult_males
        coordinates_df["adult_females"][filename] = adult_females
        coordinates_df["juveniles"][filename] = juveniles
        coordinates_df["pups"][filename] = pups
        
%time


completed 0 images
completed 50 images
completed 100 images
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 16 µs

In [21]:
f, ax = plt.subplots(1,1,figsize=(10,16))
ax.imshow(cv2.cvtColor(image_circles, cv2.COLOR_BGR2RGB))
plt.show()



In [22]:
f, ax = plt.subplots(1,1,figsize=(10,16))
ax.imshow(cv2.cvtColor(cut, cv2.COLOR_BGR2RGB))
plt.show()