In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import glob
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.externals import joblib
from skimage.transform import resize
from tqdm import tqdm_notebook as tqdm
import ipywidgets as ipy
import warnings
import sys
if not sys.warnoptions:
    warnings.simplefilter("ignore")
from common_blocks.utils import plot_list, read_images
from common_blocks.metrics import compute_eval_metric

METADATA_FILEPATH = './meta/files/metadata.csv'
OUT_OF_FOLD_TRAIN_RESULTS_FILEPATH = '/home/dex/Desktop/ml/salt/output/openSolution/experiment/out_of_fold_train_predictions.pkl'

In [3]:
def load_img(path):
    img = np.array(Image.open(path))
    return img

def filter_iout(results, iout_range):
    iout_min, iout_max = iout_range
    results_filtered = []
    for tup in results:
        if iout_min<=tup[0]<=iout_max:
            results_filtered.append(tup)
    return results_filtered

def filter_size(results, size_range):
    size_min, size_max = size_range
    results_filtered = []
    for tup in results:
        if size_min<=tup[1]<=size_max:
            results_filtered.append(tup)
    return results_filtered

In [6]:
! ls


 augmentations.ipynb	     main.py
 auxiliary.csv		     meta
 auxiliary_metadata.csv      modules
 common_blocks		     offline_job.log
 configs		    'OpenSolution results.ipynb'
 data_exploration.ipynb      requirements.txt
 depths.csv		     result_exploration-Copy1.ipynb
 Main-folds.ipynb	     result_exploration.ipynb
'Main folds neptune.ipynb'   small_mask_generation.ipynb
 Main.ipynb		    'Untitled 1.ods'

In [4]:
metadata = pd.read_csv(METADATA_FILEPATH)

oof_train = joblib.load(OUT_OF_FOLD_TRAIN_RESULTS_FILEPATH)
ids = oof_train['ids']
predictions = oof_train['images']

In [8]:
THRESHOLD = 0.45

predicted_maps, predicted_masks, masks, images, depths, sizes = [],[],[],[],[],[]
for idx, pred in tqdm(zip(ids, predictions)):
    row = metadata[metadata['id']==idx]
    predicted_map = resize(pred[1,:,:],(101,101),mode='constant')
    predicted_mask = (predicted_map > THRESHOLD).astype(int)
    image = load_img(row.file_path_image.values[0])
    mask = (load_img(row.file_path_mask.values[0]) > 0).astype(int)
    depth = row.z.values[0]
    size = np.sum(mask)
    
    images.append(image)
    masks.append(mask)
    depths.append(depth)
    predicted_maps.append(predicted_map)
    predicted_masks.append(predicted_mask)
    sizes.append(size)




In [9]:
iouts = [compute_eval_metric(gt, pred) for gt, pred in tqdm(zip(masks, predicted_masks))]
results = list(zip(iouts, sizes, depths, images, predicted_masks, predicted_maps, masks))



Score by size


In [10]:
print('IOUT {:.4f}\n'.format(np.mean(list(zip(*results))[0])))
for size_range in [(0,0),(1,300),(300,1000),(1000,3000),(3000,9000), (9000,10201)]:
    results_by_size = filter_size(results, size_range)
    iout = np.mean(list(zip(*results_by_size))[0])
    sample_size = len(results_by_size)
    fraction = len(results_by_size)/len(results)
    print('size {} | IOUT {:.4f} | sample nr {} | fraction {} | max gain {:.4f}'.format(size_range, 
                                                                       iout,
                                                                       sample_size, 
                                                                       fraction,
                                                                       (1.0-iout) * fraction
                                                                       ))


IOUT 0.7074

size (0, 0) | IOUT 0.9533 | sample nr 1562 | fraction 0.2846209912536443 | max gain 0.0133
size (1, 300) | IOUT 0.3637 | sample nr 1647 | fraction 0.30010932944606417 | max gain 0.1910
size (300, 1000) | IOUT 0.4855 | sample nr 415 | fraction 0.07561953352769679 | max gain 0.0389
size (1000, 3000) | IOUT 0.7526 | sample nr 508 | fraction 0.09256559766763849 | max gain 0.0229
size (3000, 9000) | IOUT 0.8974 | sample nr 1090 | fraction 0.19861516034985421 | max gain 0.0204
size (9000, 10201) | IOUT 0.8658 | sample nr 272 | fraction 0.04956268221574344 | max gain 0.0067

In [ ]:
IOUT 0.7071

size (0, 0) | IOUT 0.9539 | sample nr 1562 | fraction 0.2846209912536443 | max gain 0.0131
size (1, 300) | IOUT 0.3632 | sample nr 1647 | fraction 0.30010932944606417 | max gain 0.1911
size (300, 1000) | IOUT 0.4834 | sample nr 415 | fraction 0.07561953352769679 | max gain 0.0391
size (1000, 3000) | IOUT 0.7516 | sample nr 508 | fraction 0.09256559766763849 | max gain 0.0230
size (3000, 9000) | IOUT 0.8972 | sample nr 1090 | fraction 0.19861516034985421 | max gain 0.0204
size (9000, 10201) | IOUT 0.8658 | sample nr 272 | fraction 0.04956268221574344 | max gain 0.0067

In [ ]:
IOUT 0.6857 47

size (0, 0) | IOUT 0.9456 | sample nr 1562 | fraction 0.2846209912536443 | max gain 0.0155
size (1, 300) | IOUT 0.3073 | sample nr 1647 | fraction 0.30010932944606417 | max gain 0.2079
size (300, 1000) | IOUT 0.4369 | sample nr 415 | fraction 0.07561953352769679 | max gain 0.0426
size (1000, 3000) | IOUT 0.7551 | sample nr 508 | fraction 0.09256559766763849 | max gain 0.0227
size (3000, 9000) | IOUT 0.8982 | sample nr 1090 | fraction 0.19861516034985421 | max gain 0.0202
size (9000, 10201) | IOUT 0.8805 | sample nr 272 | fraction 0.04956268221574344 | max gain 0.0059

IOUT 0.6856 50

size (0, 0) | IOUT 0.9469 | sample nr 1562 | fraction 0.2846209912536443 | max gain 0.0151
size (1, 300) | IOUT 0.3073 | sample nr 1647 | fraction 0.30010932944606417 | max gain 0.2079
size (300, 1000) | IOUT 0.4342 | sample nr 415 | fraction 0.07561953352769679 | max gain 0.0428
size (1000, 3000) | IOUT 0.7539 | sample nr 508 | fraction 0.09256559766763849 | max gain 0.0228
size (3000, 9000) | IOUT 0.8979 | sample nr 1090 | fraction 0.19861516034985421 | max gain 0.0203
size (9000, 10201) | IOUT 0.8801 | sample nr 272 | fraction 0.04956268221574344 | max gain 0.0059

IOUT 0.6856 45

size (0, 0) | IOUT 0.9443 | sample nr 1562 | fraction 0.2846209912536443 | max gain 0.0159
size (1, 300) | IOUT 0.3077 | sample nr 1647 | fraction 0.30010932944606417 | max gain 0.2078
size (300, 1000) | IOUT 0.4400 | sample nr 415 | fraction 0.07561953352769679 | max gain 0.0423
size (1000, 3000) | IOUT 0.7547 | sample nr 508 | fraction 0.09256559766763849 | max gain 0.0227
size (3000, 9000) | IOUT 0.8980 | sample nr 1090 | fraction 0.19861516034985421 | max gain 0.0203
size (9000, 10201) | IOUT 0.8809 | sample nr 272 | fraction 0.04956268221574344 | max gain 0.0059


IOUT 0.6850 40

size (0, 0) | IOUT 0.9417 | sample nr 1562 | fraction 0.2846209912536443 | max gain 0.0166
size (1, 300) | IOUT 0.3084 | sample nr 1647 | fraction 0.30010932944606417 | max gain 0.2075
size (300, 1000) | IOUT 0.4407 | sample nr 415 | fraction 0.07561953352769679 | max gain 0.0423
size (1000, 3000) | IOUT 0.7539 | sample nr 508 | fraction 0.09256559766763849 | max gain 0.0228
size (3000, 9000) | IOUT 0.8977 | sample nr 1090 | fraction 0.19861516034985421 | max gain 0.0203
size (9000, 10201) | IOUT 0.8809 | sample nr 272 | fraction 0.04956268221574344 | max gain 0.005

Predicted mask exploration


In [7]:
results_filtered = results.copy()
results_filtered = filter_iout(results_filtered, iout_range=(0.0,0.2))
results_filtered = filter_size(results_filtered, size_range=(1, 300))

print('sample nr {} fraction {} mean IOUT {}'.format(len(results_filtered), 
                                                     len(results_filtered)/len(results),
                                                     np.mean(list(zip(*results_filtered))[0])))

@ipy.interact(idx = ipy.IntSlider(min=0,max=len(results_filtered)-1,value=0,step=1))
def present(idx=idx):
    iout, s, z, img, pred_mask, pred_map, gt = results_filtered[idx]
    print('IOUT {}, size {}, depth {}'.format(iout, s, z))
    plot_list(images=[img, pred_map],labels=[pred_mask, gt])


sample nr 827 fraction 0.1506924198250729 mean IOUT 0.024667472793228534

In [ ]:
def rle_decode(rle_mask):
    '''
    rle_mask: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    if rle_mask !=str(np.nan):
        s = rle_mask.split()
        starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
        starts -= 1
        ends = starts + lengths
        img = np.zeros(101*101, dtype=np.uint8)
        for lo, hi in zip(starts, ends):
            img[lo:hi] = 1
        return img.reshape(101,101)