In [1]:
import numpy as np
import pandas as pd
import os
import cv2
from PIL import Image
from scipy.misc import imread
import matplotlib.pyplot as plt
import skimage.feature
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Lambda, Cropping2D
from keras.utils import np_utils

from collections import Counter

from keras.models import load_model

from tqdm import tqdm_notebook

import datetime

%matplotlib inline


Using TensorFlow backend.

In [2]:
class_names = ['adult_females', 'adult_males', 'juveniles', 'pups', 'subadult_males']

my_dir = "/seal_the_data/"

In [3]:
# specify run number, which selects appropriate file name subset and file name suffix
# will need 9 runs total

# completed runs: 

run_num = 1

In [4]:
mismatch_id=[3,7,9,21,30,34,71,81,89,97,151,184,215,234,242,268,290,311,331,344,380,384,406,421,469,475,490,499,507,530,531,605,607,614,621,638,644,687,712,721,767,779,781,794,800,811,839,840,869,882,901,903,905,909,913,927,946]
blacklist = []
for i in mismatch_id:
    blacklist.append(str(i) + '.jpg')
print(blacklist[:5])
blacklist.append('train.csv')
print(blacklist)


['3.jpg', '7.jpg', '9.jpg', '21.jpg', '30.jpg']
['3.jpg', '7.jpg', '9.jpg', '21.jpg', '30.jpg', '34.jpg', '71.jpg', '81.jpg', '89.jpg', '97.jpg', '151.jpg', '184.jpg', '215.jpg', '234.jpg', '242.jpg', '268.jpg', '290.jpg', '311.jpg', '331.jpg', '344.jpg', '380.jpg', '384.jpg', '406.jpg', '421.jpg', '469.jpg', '475.jpg', '490.jpg', '499.jpg', '507.jpg', '530.jpg', '531.jpg', '605.jpg', '607.jpg', '614.jpg', '621.jpg', '638.jpg', '644.jpg', '687.jpg', '712.jpg', '721.jpg', '767.jpg', '779.jpg', '781.jpg', '794.jpg', '800.jpg', '811.jpg', '839.jpg', '840.jpg', '869.jpg', '882.jpg', '901.jpg', '903.jpg', '905.jpg', '909.jpg', '913.jpg', '927.jpg', '946.jpg', 'train.csv']

In [5]:
file_names = os.listdir(my_dir + "Train/")
file_names = sorted(file_names, key=lambda 
                    item: (int(item.partition('.')[0]) if item[0].isdigit() else float('inf'), item)) 

# select a subset of files to run on
file_names = file_names[0:1]

# dataframe to store results in
coordinates_df = pd.DataFrame(index=file_names, columns=class_names)

#print(file_names[:])

In [6]:
for filename in tqdm_notebook(file_names):
    if filename in blacklist:
        file_names.remove(filename)
    else:
        # read the Train and Train Dotted images
        image_1 = cv2.imread(my_dir + "/TrainDotted/" + filename)
        image_2 = cv2.imread(my_dir + "/Train/" + filename)

        cut = np.copy(image_2)

        # absolute difference between Train and Train Dotted
        image_3 = cv2.absdiff(image_1,image_2)

    # mask out blackened regions from Train Dotted
        mask_1 = cv2.cvtColor(image_1, cv2.COLOR_BGR2GRAY)
        mask_1[mask_1 < 20] = 0
        mask_1[mask_1 > 0] = 255

        mask_2 = cv2.cvtColor(image_2, cv2.COLOR_BGR2GRAY)
        mask_2[mask_2 < 20] = 0
        mask_2[mask_2 > 0] = 255

        image_3 = cv2.bitwise_or(image_3, image_3, mask=mask_1)
        image_3 = cv2.bitwise_or(image_3, image_3, mask=mask_2) 

        # convert to grayscale to be accepted by skimage.feature.blob_log
        image_3 = cv2.cvtColor(image_3, cv2.COLOR_BGR2GRAY)

        # detect blobs
        blobs = skimage.feature.blob_log(image_3, min_sigma=3, max_sigma=4, num_sigma=1, threshold=0.02)

        adult_males = []
        subadult_males = []
        pups = []
        juveniles = []
        adult_females = [] 

        image_circles = image_1

        for blob in blobs:
            # get the coordinates for each blob
            y, x, s = blob
            # get the color of the pixel from Train Dotted in the center of the blob
            g,b,r = image_1[int(y)][int(x)][:]

            # decision tree to pick the class of the blob by looking at the color in Train Dotted
            if r > 200 and g < 50 and b < 50: # RED
                adult_males.append((int(x),int(y)))
                cv2.circle(image_circles, (int(x),int(y)), 20, (0,0,255), 10) 
            elif r > 200 and g > 200 and b < 50: # MAGENTA
                subadult_males.append((int(x),int(y))) 
                cv2.circle(image_circles, (int(x),int(y)), 20, (250,10,250), 10)
            elif r < 100 and g < 100 and 150 < b < 200: # GREEN
                pups.append((int(x),int(y)))
                cv2.circle(image_circles, (int(x),int(y)), 20, (20,180,35), 10)
            elif r < 100 and  100 < g and b < 100: # BLUE
                juveniles.append((int(x),int(y))) 
                cv2.circle(image_circles, (int(x),int(y)), 20, (180,60,30), 10)
            elif r < 150 and g < 50 and b < 100:  # BROWN
                adult_females.append((int(x),int(y)))
                cv2.circle(image_circles, (int(x),int(y)), 20, (0,42,84), 10)  

            cv2.rectangle(cut, (int(x)-112,int(y)-112),(int(x)+112,int(y)+112), 0,-1)

        coordinates_df["adult_males"][filename] = adult_males
        coordinates_df["subadult_males"][filename] = subadult_males
        coordinates_df["adult_females"][filename] = adult_females
        coordinates_df["juveniles"][filename] = juveniles
        coordinates_df["pups"][filename] = pups
        
%time


CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.96 µs

In [7]:
x = []
y = []

for filename in tqdm_notebook(file_names):    
    image = cv2.imread(my_dir + "/Train/" + filename)
    for lion_class in class_names:
        try:
            for coordinates in coordinates_df[lion_class][filename]:
                thumb = image[coordinates[1]-32:coordinates[1]+32,coordinates[0]-32:coordinates[0]+32,:]
                if np.shape(thumb) == (64, 64, 3):
                    x.append(thumb)
                    y.append(lion_class)
        except:
            pass




In [8]:
for i in range(0,np.shape(cut)[0],224):
    for j in range(0,np.shape(cut)[1],224):                
        thumb = cut[i:i+64,j:j+64,:]
        if np.amin(cv2.cvtColor(thumb, cv2.COLOR_BGR2GRAY)) != 0:
            if np.shape(thumb) == (64,64,3):
                x.append(thumb)
                y.append("negative")

In [9]:
class_names.append("negative")

In [10]:
x = np.array(x)
y = np.array(y)

In [11]:
encoder = LabelBinarizer()
encoder.fit(y)
y = encoder.transform(y).astype(float)

In [12]:
#my_model = '2017-06-23_model.h5'#what is the model file named?

my_model = '2017-06-25_model.h5'#what is the model file named?

model = load_model(my_dir + my_model)

In [16]:
test_file_names = os.listdir(my_dir + "Test/")
test_file_names = sorted(test_file_names, key=lambda 
                    item: (int(item.partition('.')[0]) if item[0].isdigit() else float('inf'), item)) 

# select a subset of files to run on

if run_num == 1:
    test_file_names = test_file_names[:2000]
elif run_num == 2:
    test_file_names = test_file_names[2000:4000]
elif run_num == 3:
    test_file_names = test_file_names[4000:6000]
elif run_num == 4: 
    test_file_names = test_file_names[6000:8000]
elif run_num == 5:
    test_file_names = test_file_names[8000:10000]
elif run_num == 6:
    test_file_names = test_file_names[10000:12000]
elif run_num == 7:
    test_file_names = test_file_names[12000:14000]
elif run_num == 8:
    test_file_names = test_file_names[14000:16000]
elif run_num == 9:
    test_file_names = test_file_names[16000:]


# dataframe to store results in
test_coordinates_df = pd.DataFrame(0,index=test_file_names, columns=class_names)

print('run number:', run_num)
print(test_file_names[:5])
#print(test_coordinates_df)


run number: 1
['0.jpg', '1.jpg', '2.jpg', '3.jpg', '4.jpg']

In [ ]:
for filename in tqdm_notebook(test_file_names):
    file_int = int(filename[:-4])
    current_time = datetime.datetime.now().time().isoformat()[:5]
    if file_int%500 == 0:
        print('completed %d images at %s' % (file_int, current_time))
        
    img = cv2.imread(my_dir + "Test/"  + filename)

    x_test = []

    for i in range(0,np.shape(img)[0],64):
        for j in range(0,np.shape(img)[1],64):                
            thumb = img[i:i+64,j:j+64,:]        
            if np.shape(thumb) == (64,64,3):
                x_test.append(thumb)

    x_test = np.array(x_test)

    y_predicted = model.predict(x_test, verbose=0)

    y_predicted = encoder.inverse_transform(y_predicted)

    the_counter = Counter(y_predicted)
    
    #print(the_counter)
    
    for key in the_counter:
        test_coordinates_df.set_value(index = filename, col = key, value = the_counter[key])
        
%time


completed 0 images at 16:10

In [14]:
protect_df = test_coordinates_df
#print(test_coordinates_df)

del test_coordinates_df['negative']
test_coordinates_df = test_coordinates_df[['adult_males', 'subadult_males', 'adult_females', 'juveniles', 'pups']]
print(test_coordinates_df)


           adult_males  subadult_males  adult_females  juveniles  pups
0.jpg                6               3             62         28   117
1.jpg               38              10            199         95   534
2.jpg              324              27            821        401   543
3.jpg               69               8            201        157   926
4.jpg              215              36            441        235   711
5.jpg              116              11            250        139  1093
6.jpg               46               5            337        110  1429
7.jpg               15               2            102         85   802
8.jpg               24               4            160         96   538
9.jpg              212              15           1164        303  2154
10.jpg              48               5            190         97   840
11.jpg              13               1            188        227  1780
12.jpg              87              20            498        146   628
13.jpg              57               1             45         29   885
14.jpg             456              23            813        364  1144
15.jpg              69              18            263        109   168
16.jpg             106              10            239        132   517
17.jpg             213              38            997        303  1119
18.jpg              59              11            327         91   863
19.jpg             104               8            198        117   488
20.jpg              50               5            124         79  1258
21.jpg              40               5            150         91   216
22.jpg              53               8            255        113   364
23.jpg              77              11            306        166   580
24.jpg             481              60            509        310   237
25.jpg              51              14            113         91   321
26.jpg              49               5            226        129   859
27.jpg              51               8            295        230  1402
28.jpg              41               1             95        133  1024
29.jpg              75               5            190         96   256
...                ...             ...            ...        ...   ...
18606.jpg            0               0              0          0     0
18607.jpg            0               0              0          0     0
18608.jpg            0               0              0          0     0
18609.jpg            0               0              0          0     0
18610.jpg            0               0              0          0     0
18611.jpg            0               0              0          0     0
18612.jpg            0               0              0          0     0
18613.jpg            0               0              0          0     0
18614.jpg            0               0              0          0     0
18615.jpg            0               0              0          0     0
18616.jpg            0               0              0          0     0
18617.jpg            0               0              0          0     0
18618.jpg            0               0              0          0     0
18619.jpg            0               0              0          0     0
18620.jpg            0               0              0          0     0
18621.jpg            0               0              0          0     0
18622.jpg            0               0              0          0     0
18623.jpg            0               0              0          0     0
18624.jpg            0               0              0          0     0
18625.jpg            0               0              0          0     0
18626.jpg            0               0              0          0     0
18627.jpg            0               0              0          0     0
18628.jpg            0               0              0          0     0
18629.jpg            0               0              0          0     0
18630.jpg            0               0              0          0     0
18631.jpg            0               0              0          0     0
18632.jpg            0               0              0          0     0
18633.jpg            0               0              0          0     0
18634.jpg            0               0              0          0     0
18635.jpg            0               0              0          0     0

[18636 rows x 5 columns]

In [15]:
test_coordinates_df.to_csv(my_dir + datetime.date.today().isoformat() + '_submission_' + str(run_num) + '.csv')

In [ ]: