In [ ]:
# THIS REQUIRES PYTHON 3 AND IPYTHON 3
# here's a pip freeze of the necessary packages:
"""
astropy==1.0
ipython==3.0.0
matplotlib==1.4.3
numpy==1.9.2
"""

# delete any old variables as we are, presumably, running the entire notebook.
%reset -f

import matplotlib.pyplot as plt
import numpy as np
import numpy.random as rng

from astropy.io import fits
from math import e as E

from collections import namedtuple, deque
from itertools import product
from functools import partial as f
from os.path import isdir

%matplotlib inline

Conventions

Our main data structure is a Box, which represents a bounding box somewhere in the image.

Our coordinate system has its origin in the bottom-left of the image (with positive up and right), and all coordinates are specified as (y,x). Our box is specified as (y,x,h,w)

Define our boxes.


In [ ]:
class Box:
    def __init__(self, y, x, h, w):
        self.pos = (y, x)
        self.size = (h, w)
    
    @property
    def pos(self):
        return self._pos
    @pos.setter
    def pos(self, value):
        assert len(value) == 2
        self._pos = np.array(value, dtype=int)
        
    @property
    def size(self):
        return self._size
    @size.setter
    def size(self, value):
        assert len(value) == 2
        assert all( value[i] > 0 for i in range(len(value)) )
        self._size = np.array(value, dtype=int)

    def __str__(self):
        return '(pos={} sze={})'.format(self.pos , self.size)
    
    def __eq__(self, other):
        return np.all(self.pos == other.pos) and np.all(self.size == other.size)
    
    def _get_integral_image_elt(self, pos, integral_image):
        M, ny, nx = integral_image.shape
        cpos = np.clip(pos, [0, 0], [ny, nx])

        if np.any(pos == 0):
            return np.zeros(M)
        return integral_image[:, cpos[0]-1, cpos[1]-1]
    
    def counts(self, integral_image):
        corners = (self.pos,
                   self.pos + [self.size[0], 0],
                   self.pos + [self.size[0], self.size[1]],
                   self.pos + [0, self.size[1]])
        
        return (self._get_integral_image_elt(corners[0], integral_image)
                - self._get_integral_image_elt(corners[1], integral_image)
                + self._get_integral_image_elt(corners[2], integral_image)
                - self._get_integral_image_elt(corners[3], integral_image))

Define helper functions.


In [ ]:
from matplotlib.patches import Rectangle
SAVE_DIR = '../output'

# represents loaded data. the truth field may be none.
Data = namedtuple('Data', ['img','truth'])

def as_boxes(*args):
    """
    Turns some parameter triplets into boxes. Handles all of the following:
      as_boxes( [(10,0,10,10), (20,50,10,10)] )
      as_boxes( (10,0,10,10), (20,50,10,10) )
      as_boxes(10,0,10,10)
    """
    if type(args[0]) in [float, int]:
        return [ Box(*args) ]
    elif type(args[0]) == tuple:
        return [ Box(*t) for t in args ]
    elif type(args[0]) == list:
        return [ Box(*t) for t in args[0] ]
    
def show (image, *args, file=None, figsize=(8,8), dpi=250, pad_inches=0):
    """
    Show the given image. The varargs can be used to pass models (ie. lists of boxes) to draw on top
    of the image. This can take two forms:
    
        1. pass in a single dict of the form { 'colour1':model1, 'colour2':model2 }
        2. pass in up to 6 models as individual parameters, and they will be automatically coloured.
    """
    
    plt.figure(figsize=figsize, dpi=dpi)
    COLOURS = ['blue', 'red', 'yellow', 'orange', 'purple', 'green']
    
    plt.gray()
    plt.gca().get_xaxis().set_visible(False)
    plt.gca().get_yaxis().set_visible(False)
    plt.imshow(image, interpolation='nearest', origin='lower')
    
    if len(args) > 0 and type(args[0]) == list: # [ model1, model2, ...]
        for colour, model in zip(COLOURS, args):
            for box in model:
                plt.gca().add_patch(Rectangle(tuple(reversed(box.pos)), box.size[1], box.size[0], alpha=0.9, facecolor='None', edgecolor=colour))
    elif len(args) == 1 and type(args[0]) == dict: # { 'red':model1, 'blue':model2 }
        for colour, model in args[0].items():
            for box in model:
                plt.gca().add_patch(Rectangle(tuple(reversed(box.pos)), box.size[1], box.size[0], alpha=0.9, facecolor='None', edgecolor=colour))
            
    if file != None:
        if isdir(SAVE_DIR):
            plt.savefig('{}/{}'.format(SAVE_DIR, file), bbox_inches='tight', pad_inches=pad_inches)
        else:
            print("output directory '{}' does not exist.".format(SAVE_DIR))
        
def fake_data (sources:'[(y, x, h, w, brightness, variance)]'=[], size:'(y, x)'=(1000,1000)):
    img = rng.uniform(size=size)
    boxes = []
    
    # define each source by making a Box, and manipulating the image.
    for (y, x, h, w, b, v) in sources:
        boxes.append( Box(y, x, h, w) )
        img[y:y+h, x:x+w] = np.clip((img[y:y+h, x:x+w] + b) * v, 0, 1)
    
    return Data(img, boxes)

def real_data (filename='frame-r-000094-1-0131.fits.gz'):
    hdulist = fits.open('../data/{}'.format(filename))
    img = hdulist[0].data
    hdulist.close()
    return Data(img, None)

Integral image

These functions turn a raw image into a discretised integral image suitable for finding histograms with. The main function of interest is integral_image, which takes an image and a number of bins and does all the work.

Why not equal-occupancy?

Good question. If we discretise our image using equal-occupancy bins, we lose any knowledge of the background distribution. For example, suppose the background strictly follows a uniform distribution, and the image contains a single very bright source. On binning, the top bins will be almost entirely filled with the source leaving little room for the background; this means the discrete version of the background will not follow a uniform distribution over the bins, as it lacks presence in the top ones.

But we do want to maintain information on the distribution of the background, so instead we use equal-range bins. There is probably an improvement on this, for example using manually specified unequal-range bins that suit our images better.


In [ ]:
def as_bins(img, M):
    """ 
    Discretises an image into M equal-range bins, the output is an array of the same dimensions.
    Assumes data is in [0,1]?
    """
    
    lo, hi = np.min(img), np.max(img)
    f = np.vectorize(lambda x: (int)(M * (x-lo) / (hi-lo)) if x < hi else M-1, otypes=[np.int])
    return f(img)

def as_booleans(dimg, M):
    """
    Takes a discrete image and a number of blocks, and returns a 3D array. The 2D array at index i contains
    True's at each coordinate where the discrete image contained bin index i. There are M indices, of course.
    """
    
    ny, nx = dimg.shape
    N = ny * nx
    bimg = np.zeros((M, ny, nx),dtype=bool)
    for m in range(M):
        bimg[m, :, :] = (dimg == m)
    return bimg

def as_accumulation(bimg):
    """
    Given a boolean image array, creates the integral image by summing up each 2D array from bottom-left to
    top-right.
    """
    
    return np.cumsum(np.cumsum(bimg, axis=2), axis=1)

def integral_image(img, M): 
    return as_accumulation(as_booleans(as_bins(img, M), M))

The algorithm


In [ ]:
def fast(iimg, background, min_area=1000, threshold=0.0000001):
    _, h, w = iimg.shape
    
    def is_odd(box):
        counts, area = box.counts(iimg), box.size[0] * box.size[1]
                
        scale = min(counts[i]/background[i] if background[i] > 0 else np.inf for i in range(len(counts)))
        dist_diff = [counts[i] - scale*background[i] for i in range(len(counts))]
        src_proportion = sum(dist_diff)/area
        
        return src_proportion > threshold

    count = 0
    inactive, result = [], []
    active = deque([ Box(0,0,h,w) ])
    #active.extend([Box(0,0,w/4,h/4), Box(w/4,0,w/2,h/4), Box(3*w/4,0,w/4,h/4),
    #               Box(0,h/4,w/4,h/2), Box(w/4,h/4,w/2,h/2), Box(3*w/4,h/4,w/4,h/2),
    #               Box(0,3*h/4,w/4,h/4), Box(w/4,3*h/4,w/2,h/4), Box(3*w/4,3*h/4,w/4,h/4)])
    #TODO add third grid?
    
    while len(active) > 0:  
        box = active.popleft()
        
        if box.size[0]*box.size[1] <= min_area:
            result.append(box)
            continue
        
        sub_boxes = [ Box(box.pos[0] + dy*box.size[0]/2, box.pos[1] + dx*box.size[1]/2, box.size[0]/2, box.size[1]/2)
                      for dy in range(2) for dx in range(2) ]
        sub_boxes = [ t for t in sub_boxes if is_odd(t) ]
        
        count += 4
        if len(sub_boxes) == 0:
            inactive.append(box)

        active.extend(sub_boxes)

    print('evaluations: {}'.format(count))
    return result

Testing fake data


In [ ]:
BINS = 16
background = [1/BINS for _ in range(BINS)]

sources = [(100,200,50,400,-0.05,1), (300,700,200,200,-0.05,1), (800,600,100,200,0,0.9), (500,200,100,100,0,0.8)]

img, truth = fake_data(sources)
iimg = integral_image(img, BINS)

result = fast(iimg, background, threshold=0.05)
show(img, result, figsize=(10,10), dpi=500, file='fake_{}_bins.png'.format(BINS))

Testing real data


In [ ]:
BINS = 32

# good for 32 bins
#background = [0.7648, 0.1940, 0.03642, 0.0047] + [0 for _ in range(BINS-1)]
background = [1] + [0]*(BINS - 1)
background = [b/sum(background) for b in background]

img, truth = real_data()
iimg = integral_image(img, BINS)

result = fast(iimg, background, threshold=0.000001)
show(img, result, figsize=(20,20), dpi=500, file='real.png')

In [ ]: