Generador de recortes del fondo de la imagen


Este notebook tiene como objetivo la obtención de recortes del fondo de las imágenes de fitolitos. La obtención de estos es fundamental para la utilización de técnicas de clasificación y/o reconocimiento de objetos mediante clasificadores junto a descriptores.

Para ello:

  1. Leemos las imágenes junto a sus coordenadas almacenadas en un fichero JSON.
  2. Obtenemos recortes (de distintos tamaños) de la imágen siempre y cuando sea un area sin un fitolito.

Las imágenes generadas se almacenan en "Background2" dentro de "code/rsc/img" para no alterar el conjunto de imágenes del fondo que se aporta inicialmente.


In [10]:
%matplotlib inline
from __future__ import print_function
from ipywidgets import interact_manual, fixed

import matplotlib.pyplot as plt

import os, os.path
import re

import numpy as np
import math
from math import ceil

from sklearn.feature_extraction.image import PatchExtractor
from skimage import io
from skimage.transform import rescale

import copy
import json

import warnings

import random

In [11]:
def extract_patches(img, coords_list, patch_size, N=math.inf, scale=1.0, random_patch_size = True):
    """Extraemos los recortes de una imagen 
    dado un tamaño de recorte."""
    patches = []
    count = 0
    y_size, x_size = patch_size
    h, w, _ = img.shape
    for y in range(0, h, 400):
        y2 = y+y_size
        if(y2 > h):
            break
        for x in range(0, w, 400):
            y2 = y+y_size
            x2 = x+x_size
            if(x2 > w):
                break
            else:
                # Transformación aleatoria del patch_size
                # para tener mayor variabilidad en los
                # tamaños del recorte
                if(random_patch_size == True):
                    rand = random.random()
                    if rand > 0.85:
                        y2 = y + round(y_size*0.5)
                    elif rand > 0.7:
                        x2 = x + round(x_size*0.5)
                    elif rand > 0.55:
                        y2 = y + round(y_size*0.5)
                        x2 = x + round(x_size*0.5)
                
                patches.append((img[y:y2,x:x2],(x,y,x2,y2)))
                count += 1 
                if(count > N):
                    return patches
    return patches

In [12]:
def is_containing_objects(patch_coords, coords):
    """"""
    is_containing_corners = []
    
    height = coords[3] - coords[1]
    width = coords[2] - coords[0]
    
    # TODO Refactorizar
    is_containing_corners.append(patch_coords[0] <= coords[0] <= patch_coords[2]\
    and patch_coords[1] <= coords[1] <= patch_coords[3])

    is_containing_corners.append(patch_coords[0] <= (coords[0] + width)\
    <= patch_coords[2] and patch_coords[1] <= coords[1] <= patch_coords[3])

    is_containing_corners.append(patch_coords[0] <= coords[0] <= patch_coords[2]\
    and patch_coords[1] <= (coords[1] + height) <= patch_coords[3])

    is_containing_corners.append(patch_coords[0] <= coords[2] <= patch_coords[2]\
    and patch_coords[1] <= coords[3] <= patch_coords[3])
    
    height = patch_coords[3] - patch_coords[1]
    width = patch_coords[2] - patch_coords[0]

    is_containing_corners.append(coords[0] <= patch_coords[0] <= coords[2]\
    and coords[1] <= patch_coords[1] <= coords[3])

    is_containing_corners.append(coords[0] <= (patch_coords[0] + width)\
    <= coords[2] and coords[1] <= patch_coords[1] <= coords[3])

    is_containing_corners.append(coords[0] <= patch_coords[0] <= coords[2]\
    and coords[1] <= (patch_coords[1] + height) <= coords[3])

    is_containing_corners.append(coords[0] <= patch_coords[2] <= coords[2]\
    and coords[1] <= patch_coords[3] <= coords[3])
    
    return any(is_containing_corners)

In [13]:
def supress_contained_patches(patches, coords_list):
    """Función que recibe un conjunto de recortes
    junto a sus coordenadas dentro de la imagen y 
    elimina todos los recortes que pertenezcan al 
    area en la que se encuentren fitolitos"""
    cleaned_patches = []
    
    contained = False
    count = 0
    
    for complete_patch in patches:
        patch = complete_patch[0]
        patch_coords = complete_patch[1]
        
        for coords in coords_list:
            if (is_containing_objects(patch_coords,
                                      coords)):
                contained = True
                count += 1
                break
        
        if contained == False:
            cleaned_patches.append(complete_patch)
        else:
            contained = False
    
    return cleaned_patches

In [14]:
def save_patches(patches, path, image_name = ''):
    """Función que guarda cada uno de
    los recortes como imágen"""
    count = 0
    for patch in patches:
        io.imsave(path + image_name +str(patch[1][0]) + "_" 
                  + str(patch[1][1]) + "_"
                  + str(patch[1][2]) + "_"
                  + str(patch[1][3]) + ".jpg",
                  patch[0], quality=30)
        count += 1

In [15]:
path="../../rsc/img/Default"
dest_path = "../../rsc/img/Background2/"

pattern = re.compile("^.*\.jpg$", re.IGNORECASE)

def list_images(path='../../rsc/img/Default'):
    """Contamos el número de imágenes que tenemos en
    el directorio de las imágenes etiquetadas"""
    images_list = []
    for name in os.listdir(path):
        json_name = name.split(".")[0] + ".json"
        if pattern.match(name) \
        and os.path.exists(path + "/" + json_name):
            images_list.append(path + "/" + name)
    return images_list

In [16]:
def read_coords_conversion(coords_dict):
    coords_list =[]
    for _, coords in coords_dict.items():
        coords_mod = np.array(coords)
        coords_mod = coords_mod[:,[2,0,3,1]]
        coords_mod = coords_mod.tolist()
        for coords in coords_mod:
            coords_list.append(coords)
    return coords_list

In [17]:
def background_images_generator(path, number_of_images, dest_path):
    images_names_list = list_images(path)
    
    initial_value = len(images_names_list)
    
    if initial_value == 0:
        raise ValueError("Number of images must be greater than 0")
    
    count = 0
    images_per_image = ceil(number_of_images / initial_value)
    
    for image_path in images_names_list:
        warnings.filterwarnings("ignore")
        image = rescale(io.imread(image_path), 0.5)

        json_path = "../.." + image_path.split(".")[-2] + ".json"
        image_name = os.path.split(image_path)[1].split(".")[0]
        image_with_format = image_name + ".jpg"

        # Cargamos coordenadas, si existen,
        # y si no existe fichero de coordenadas
        # pasamos a la siguiente imagen
        if os.path.exists(json_path):
            with open(json_path) as jsonfile:
                coords_dict = json.load(jsonfile)
                coords_dict = coords_dict[image_with_format]
                coords_list = read_coords_conversion(coords_dict)
        else:
            continue

        # Generamos recortes del fondo de la imagen 
        patches = extract_patches(image, coords_list, patch_size=(250,250), N=images_per_image)
        patches = supress_contained_patches(patches, coords_list)
        save_patches(patches, dest_path, image_name)
        count += len(patches)
        if count > number_of_images:
            break

In [19]:
interact_manual(background_images_generator, 
                number_of_images=(10,4000,10),
                path=fixed(path),
                dest_path=fixed(dest_path))


Out[19]:
<function __main__.background_images_generator>