In [ ]:
# THIS REQUIRES PYTHON 3 AND IPYTHON 3
# here's a pip freeze of the necessary packages:
"""
astropy==1.0
ipython==3.0.0
matplotlib==1.4.3
numpy==1.9.2
"""
# delete any old variables as we are, presumably, running the entire notebook.
%reset -f
import matplotlib.pyplot as plt
import numpy as np
import numpy.random as rng
from astropy.io import fits
from math import e as E
from collections import namedtuple, deque
from itertools import product
from functools import partial as f
from os.path import isdir
%matplotlib inline
In [ ]:
class Box:
def __init__(self, y, x, h, w):
self.pos = (y, x)
self.size = (h, w)
@property
def pos(self):
return self._pos
@pos.setter
def pos(self, value):
assert len(value) == 2
self._pos = np.array(value, dtype=int)
@property
def size(self):
return self._size
@size.setter
def size(self, value):
assert len(value) == 2
assert all( value[i] > 0 for i in range(len(value)) )
self._size = np.array(value, dtype=int)
def __str__(self):
return '(pos={} sze={})'.format(self.pos , self.size)
def __eq__(self, other):
return np.all(self.pos == other.pos) and np.all(self.size == other.size)
def _get_integral_image_elt(self, pos, integral_image):
M, ny, nx = integral_image.shape
cpos = np.clip(pos, [0, 0], [ny, nx])
if np.any(pos == 0):
return np.zeros(M)
return integral_image[:, cpos[0]-1, cpos[1]-1]
def counts(self, integral_image):
corners = (self.pos,
self.pos + [self.size[0], 0],
self.pos + [self.size[0], self.size[1]],
self.pos + [0, self.size[1]])
return (self._get_integral_image_elt(corners[0], integral_image)
- self._get_integral_image_elt(corners[1], integral_image)
+ self._get_integral_image_elt(corners[2], integral_image)
- self._get_integral_image_elt(corners[3], integral_image))
In [ ]:
from matplotlib.patches import Rectangle
SAVE_DIR = '../output'
# represents loaded data. the truth field may be none.
Data = namedtuple('Data', ['img','truth'])
def as_boxes(*args):
"""
Turns some parameter triplets into boxes. Handles all of the following:
as_boxes( [(10,0,10,10), (20,50,10,10)] )
as_boxes( (10,0,10,10), (20,50,10,10) )
as_boxes(10,0,10,10)
"""
if type(args[0]) in [float, int]:
return [ Box(*args) ]
elif type(args[0]) == tuple:
return [ Box(*t) for t in args ]
elif type(args[0]) == list:
return [ Box(*t) for t in args[0] ]
def show (image, *args, file=None, figsize=(8,8), dpi=250, pad_inches=0):
"""
Show the given image. The varargs can be used to pass models (ie. lists of boxes) to draw on top
of the image. This can take two forms:
1. pass in a single dict of the form { 'colour1':model1, 'colour2':model2 }
2. pass in up to 6 models as individual parameters, and they will be automatically coloured.
"""
plt.figure(figsize=figsize, dpi=dpi)
COLOURS = ['blue', 'red', 'yellow', 'orange', 'purple', 'green']
plt.gray()
plt.gca().get_xaxis().set_visible(False)
plt.gca().get_yaxis().set_visible(False)
plt.imshow(image, interpolation='nearest', origin='lower')
if len(args) > 0 and type(args[0]) == list: # [ model1, model2, ...]
for colour, model in zip(COLOURS, args):
for box in model:
plt.gca().add_patch(Rectangle(tuple(reversed(box.pos)), box.size[1], box.size[0], alpha=0.9, facecolor='None', edgecolor=colour))
elif len(args) == 1 and type(args[0]) == dict: # { 'red':model1, 'blue':model2 }
for colour, model in args[0].items():
for box in model:
plt.gca().add_patch(Rectangle(tuple(reversed(box.pos)), box.size[1], box.size[0], alpha=0.9, facecolor='None', edgecolor=colour))
if file != None:
if isdir(SAVE_DIR):
plt.savefig('{}/{}'.format(SAVE_DIR, file), bbox_inches='tight', pad_inches=pad_inches)
else:
print("output directory '{}' does not exist.".format(SAVE_DIR))
def fake_data (sources:'[(y, x, h, w, brightness, variance)]'=[], size:'(y, x)'=(1000,1000)):
img = rng.uniform(size=size)
boxes = []
# define each source by making a Box, and manipulating the image.
for (y, x, h, w, b, v) in sources:
boxes.append( Box(y, x, h, w) )
img[y:y+h, x:x+w] = np.clip((img[y:y+h, x:x+w] + b) * v, 0, 1)
return Data(img, boxes)
def real_data (filename='frame-r-000094-1-0131.fits.gz'):
hdulist = fits.open('../data/{}'.format(filename))
img = hdulist[0].data
hdulist.close()
return Data(img, None)
These functions turn a raw image into a discretised integral image suitable for finding histograms with. The main function of interest is integral_image, which takes an image and a number of bins and does all the work.
Good question. If we discretise our image using equal-occupancy bins, we lose any knowledge of the background distribution. For example, suppose the background strictly follows a uniform distribution, and the image contains a single very bright source. On binning, the top bins will be almost entirely filled with the source leaving little room for the background; this means the discrete version of the background will not follow a uniform distribution over the bins, as it lacks presence in the top ones.
But we do want to maintain information on the distribution of the background, so instead we use equal-range bins. There is probably an improvement on this, for example using manually specified unequal-range bins that suit our images better.
In [ ]:
def as_bins(img, M):
"""
Discretises an image into M equal-range bins, the output is an array of the same dimensions.
Assumes data is in [0,1]?
"""
lo, hi = np.min(img), np.max(img)
f = np.vectorize(lambda x: (int)(M * (x-lo) / (hi-lo)) if x < hi else M-1, otypes=[np.int])
return f(img)
def as_booleans(dimg, M):
"""
Takes a discrete image and a number of blocks, and returns a 3D array. The 2D array at index i contains
True's at each coordinate where the discrete image contained bin index i. There are M indices, of course.
"""
ny, nx = dimg.shape
N = ny * nx
bimg = np.zeros((M, ny, nx),dtype=bool)
for m in range(M):
bimg[m, :, :] = (dimg == m)
return bimg
def as_accumulation(bimg):
"""
Given a boolean image array, creates the integral image by summing up each 2D array from bottom-left to
top-right.
"""
return np.cumsum(np.cumsum(bimg, axis=2), axis=1)
def integral_image(img, M):
return as_accumulation(as_booleans(as_bins(img, M), M))
In [ ]:
def fast(iimg, background, min_area=1000, threshold=0.0000001):
_, h, w = iimg.shape
def is_odd(box):
counts, area = box.counts(iimg), box.size[0] * box.size[1]
scale = min(counts[i]/background[i] if background[i] > 0 else np.inf for i in range(len(counts)))
dist_diff = [counts[i] - scale*background[i] for i in range(len(counts))]
src_proportion = sum(dist_diff)/area
return src_proportion > threshold
count = 0
inactive, result = [], []
active = deque([ Box(0,0,h,w) ])
#active.extend([Box(0,0,w/4,h/4), Box(w/4,0,w/2,h/4), Box(3*w/4,0,w/4,h/4),
# Box(0,h/4,w/4,h/2), Box(w/4,h/4,w/2,h/2), Box(3*w/4,h/4,w/4,h/2),
# Box(0,3*h/4,w/4,h/4), Box(w/4,3*h/4,w/2,h/4), Box(3*w/4,3*h/4,w/4,h/4)])
#TODO add third grid?
while len(active) > 0:
box = active.popleft()
if box.size[0]*box.size[1] <= min_area:
result.append(box)
continue
sub_boxes = [ Box(box.pos[0] + dy*box.size[0]/2, box.pos[1] + dx*box.size[1]/2, box.size[0]/2, box.size[1]/2)
for dy in range(2) for dx in range(2) ]
sub_boxes = [ t for t in sub_boxes if is_odd(t) ]
count += 4
if len(sub_boxes) == 0:
inactive.append(box)
active.extend(sub_boxes)
print('evaluations: {}'.format(count))
return result
In [ ]:
BINS = 16
background = [1/BINS for _ in range(BINS)]
sources = [(100,200,50,400,-0.05,1), (300,700,200,200,-0.05,1), (800,600,100,200,0,0.9), (500,200,100,100,0,0.8)]
img, truth = fake_data(sources)
iimg = integral_image(img, BINS)
result = fast(iimg, background, threshold=0.05)
show(img, result, figsize=(10,10), dpi=500, file='fake_{}_bins.png'.format(BINS))
In [ ]:
BINS = 32
# good for 32 bins
#background = [0.7648, 0.1940, 0.03642, 0.0047] + [0 for _ in range(BINS-1)]
background = [1] + [0]*(BINS - 1)
background = [b/sum(background) for b in background]
img, truth = real_data()
iimg = integral_image(img, BINS)
result = fast(iimg, background, threshold=0.000001)
show(img, result, figsize=(20,20), dpi=500, file='real.png')
In [ ]: