In [ ]:
%matplotlib inline
import os

import glob
import matplotlib.pyplot as plt
from PIL import Image
import seaborn as sns
import ipywidgets as ipy
import pandas as pd
import numpy as np

DATA_DIR = '/mnt/ml-team/minerva/open-solutions/salt/'

In [ ]:
train = pd.read_csv(os.path.join(DATA_DIR,'data','train.csv'))

In [ ]:
train.head()

In [ ]:
depths = pd.read_csv(os.path.join(DATA_DIR,'data','depths.csv'))

In [ ]:
depths.head()

In [ ]:
sns.distplot(depths['z'])

In [ ]:
def img_cumsum(img):
    return (np.float32(img) - img.mean()).cumsum(axis=0)

def plot(k, z_range):
    z_min, z_max = z_range
    ids = depths[(depths['z']>z_min) & (depths['z']<z_max)]['id'].tolist()[:k]
    for idx in ids:
        train_img_filepath= glob.glob('{}/files/train/images/{}*'.format(DATA_DIR, idx))[0]
        train_mask_filepath = glob.glob('{}/files/train/masks/{}*'.format(DATA_DIR,idx))[0]
        img = np.array(Image.open(train_img_filepath))/255.
        mask = np.array(Image.open(train_mask_filepath))/255.
        img_cs = img_cumsum(img)
        fig, axs = plt.subplots(1,3, figsize=(12,6))
        axs[0].imshow(img, cmap='seismic')
        axs[1].imshow(img_cs, cmap='seismic')
        axs[2].imshow(mask)
        plt.show()
plot(100, (400,600))

In [ ]:
train_img_filepaths = sorted(glob.glob('{}/files/train/images/*'.format(DATA_DIR)))
train_mask_filepaths = sorted(glob.glob('{}/files/train/masks/*'.format(DATA_DIR)))

In [ ]:
@ipy.interact(idx = ipy.IntSlider(min=0,max=len(train_img_filepaths)),value=10,step=1)
def plot(idx):
    img = Image.open(train_img_filepaths[idx])
    mask = Image.open(train_mask_filepaths[idx])
    fig, axs = plt.subplots(1,2, figsize=(20,10))
    axs[0].imshow(img)
    axs[1].imshow(mask)
    plt.show()

Train characteristics


In [ ]:
imgs = []
for idx in train['id'].tolist():
    train_img_filepath= glob.glob('{}/files/train/images/{}*'.format(DATA_DIR, idx))[0]
    img = np.array(Image.open(train_img_filepath))/255.
    imgs.append(img)

In [ ]:
np.mean(imgs), np.std(imgs)

In [ ]: