In [172]:
import pylab as plt
import numpy as np
import numpy.random as rng
from astropy.io import fits

In [173]:
def show_data_and_model(BG_image, BoundingBoxes = []):
    plt.imshow(BG_image, interpolation='nearest', origin='lower')
    for BB in BoundingBoxes:
        plt.gca().add_artist(Rectangle((BB.get_pos()[1], BB.get_pos()[0]), BB.get_width(), BB.get_width(), alpha=0.9, facecolor='None', edgecolor='blue'))

In [174]:
class OddityBB:
    def __init__(self, pos, width):
        """
        Warning: this sets pos and width to be ints; truncation.
        """
        self.set_pos(pos)
        self.set_width(width)
    
    def set_pos(self, pos):
        assert len(pos) == 2
        self.pos = np.array(pos, dtype=int)
        
    def set_width(self, width):
        self.width = int(width)
        assert self.width > 0

    def __str__(self):
        return "Oddity BB, pos %4d %4d, width %4d" % (self.get_pos()[0], self.get_pos()[1], self.get_width())
    
    def __eq__(self, other):
        if np.all(self.get_pos() == other.get_pos()) and self.get_width() == other.get_width():
            return True
        return False

    def is_overlapping(self, other):
        dy, dx = self.pos - other.pos
        if dy > 0.:
            ywidth = other.width
        else:
            ywidth = self.width
        if dx > 0.:
            xwidth = other.width
        else:
            xwidth = self.width
        return np.abs(dy) < ywidth and np.abs(dx) < xwidth
    
    def _get_corners(self):
        """
        We're defining what we mean by corners here! 
        These are the 'corners' relevant to the output of intensity_to_cumulated_block.
        """
        return (self.pos,
                self.pos + [self.width, 0],
                self.pos + [self.width, self.width],
                self.pos + [0, self.width])
    
    def get_pos(self):
        return self.pos  
    
    def get_width(self):
        return self.width
    
    def _get_cumulated_block_elt(self, pos, cumulated_block):
        M, nyplus1, nxplus1 = cumulated_block.shape
        cpos = np.clip(pos, [0, 0], [nyplus1-1, nxplus1-1])
        return cumulated_block[:, cpos[0], cpos[1]]
    
    def get_counts(self, cumulated_block):
        corners = self._get_corners()
        return (self._get_cumulated_block_elt(corners[0], cumulated_block)
                - self._get_cumulated_block_elt(corners[1], cumulated_block)
                + self._get_cumulated_block_elt(corners[2], cumulated_block)
                - self._get_cumulated_block_elt(corners[3], cumulated_block))

In [175]:
def intensity_to_m(img, M):
    ny, nx = img.shape
    N = ny*nx
    mimgvec = np.zeros(N,dtype=int)
    mimgvec[np.argsort(img.reshape(N))] = np.floor((float(M) / float(N)) * np.arange(N)).astype(int)
    return mimgvec.reshape((ny, nx))

In [176]:
def intensity_to_boolean_block(img, M):
    mimg = intensity_to_m(img, M)
    ny, nx = mimg.shape
    N = ny * nx
    bb = np.zeros((M, ny+1, nx+1),dtype=bool)
    for m in range(M):
        bb[m, 1:, 1:] = (mimg == m)
    return bb

The Dirichlet-multinomal distribution is

$$ P(Z | \alpha) = \frac{\Gamma(\sum_m \alpha_m)}{\Gamma(\sum_m [n_m + \alpha_m])} \prod_{m=1}^M \frac{\Gamma(n_m + \alpha_m)}{\Gamma(\alpha_m)} $$

So its log is

$$ \log P(Z | \alpha) = \log \Gamma(A) - \log \Gamma(N+A) \;\; + \sum_{m=1}^M \big[ \log \Gamma(n_m + \alpha_m) - \log \Gamma(\alpha_m) \big] $$

and $\Gamma(z) = (z-1)!$ so $\log \Gamma(z) = \sum_{m=1}^{z-1} \log m$


In [177]:
def get_logL_patch(counts, alphas):
    """
    Takes a vector of counts (across bins), and vector of alpha hyperparameters (ditto).
    Returns the log likelihood of those counts under the Dirichlet-multinomial distribution with
    those hyperparameters.
    """
    # initialize internal (static) variables
    try:
        get_logL_patch.Cmax
    except:
        get_logL_patch.Cmax = 1
        get_logL_patch.gammaln = np.array([np.Inf])
    N, A = np.sum(counts), np.sum(alphas)
    # calculate more lookup table if necessary
    if N + A > get_logL_patch.Cmax:
        print "get_logL_patch(): Calculating expensive lookup shit"
        get_logL_patch.Cmax = 2 * (N + A)
        get_logL_patch.gammaln = np.append(np.array([np.Inf, 0.]), np.cumsum(np.log(np.arange(get_logL_patch.Cmax - 2) + 1)))
    # now the actual LF
    return (  get_logL_patch.gammaln[A] 
            - get_logL_patch.gammaln[N + A] 
            + np.sum(get_logL_patch.gammaln[counts + alphas]) 
            - np.sum(get_logL_patch.gammaln[alphas]) 
            )

#print get_logL_patch(np.array([1,1,1]), 100*np.array([1,2,3]))
#print get_logL_patch(np.array([31,351,21]), 100*np.array([1,2,3]))
#print get_logL_patch(np.array([1000,10000,1000]), 100*np.array([1,2,3]))

In [178]:
def intensity_to_cumulated_block(img, M):
    return np.cumsum(np.cumsum(intensity_to_boolean_block(img, M), axis=2), axis=1)

In [179]:
def get_logL(cumulated_block, BBs, alphas_S, alphas_B):
    """
    ## inputs
    - `BBs` is a list of `OddityBB` objects.
    """
    logL = 0.
    source_counts = 0
    for BB in BBs:
        this_counts = BB.get_counts(cumulated_block)
        logL += get_logL_patch(this_counts, alphas_S)
        source_counts += this_counts
    background_counts = cumulated_block[:, -1, -1].ravel() - source_counts
    logL += get_logL_patch(background_counts, alphas_B)
    return logL

In [180]:
def get_logPrior(BBs):
    logP = 0.
    if len(BBs) == 0:
        return logP
    for i in range(len(BBs)):
        logP += -2. # MAGIC NUMBER, the cost of adding a new source.
        if BBs[i].get_width() > 500: # MAGIC NUMBER, the biggest "source" we believe in.
            logP += -1. * (BBs[i].width - 500)
        if BBs[i].get_width() < 9: # MAGIC NUMBER, the minimum source size we find interesting
            #print 'get_logPrior() : bounding box %s is too narrow' %(BBs[i])
            return -np.Inf
    for i in range(len(BBs) - 1):
        for j in range(i + 1, len(BBs)):
            if BBs[i].is_overlapping(BBs[j]):
                #print 'get_logPrior() : bounding boxes %s and %s overlap' %(BBs[i], BBs[j])
                return -np.Inf
    return logP

In [181]:
def get_logPosterior(cbb, BBs, alphas_S, alphas_B):
    logP = get_logPrior(BBs)
    if np.isinf(logP):
        #print 'get_logPosterior() : infinite logP' 
        return -np.Inf
    return logP + get_logL(cbb, BBs, alphas_S, alphas_B)

In [182]:
def MH_step(cbb, i, BBs, alphas_S, alphas_B, temperature = 1., logP = None):
    """
    Metropolis Hastings move on BBs[i]; return the next
    Also change BBs in place, so FUCKING BE CAREFUL.
    """
    if logP == None:
        logP = get_logPosterior(cbb, BBs, alphas_S, alphas_B)
    pos = BBs[i].get_pos()
    width = BBs[i].get_width()
    jump_size = 2 ** rng.randint(np.log2(np.min(img.shape)/8))
    # 1, 2, 4, and so on... 
    new_pos = pos + jump_size * rng.randint(-1,2,size=2) 
    # but new_pos should stay in bounds 
    new_pos = np.maximum(np.minimum(new_pos, img.shape), [0,0]) 
    new_width =  min(max(width + jump_size * rng.randint(-1,2),1),min(img.shape)) # keep positive, girls
    BBs[i].set_pos(new_pos)
    BBs[i].set_width(new_width)
    new_logP = get_logPosterior(cbb, BBs, alphas_S, alphas_B)
    # M-H step: 
    # We should accept proposal with probability P_accept = min(1, newP / P).
    # eg. make a random uniform in [0,1], and accept if it is < exp(new_logP - logP)).
    # Or take log of the random and accept if that is < (new_logP - logP).
    # But we've ALREADY made the change, so we have to UNDO this if we reject.
    if not(np.log(rng.uniform()) < (new_logP - logP)/temperature): 
        # Reject the proposal, so go back to the old values.
        BBs[i].set_pos(pos)
        BBs[i].set_width(width)
        return logP
    return new_logP

Get down to business


In [229]:
# make real data
hdulist = fits.open('../data/frame-r-000094-1-0131.fits.gz')
realimg = hdulist[0].data
hdulist.close()

# make fake data
fakeimg = rng.normal(size=realimg.shape)
ny, nx = fakeimg.shape
ygrid, xgrid = mgrid[:ny, :nx]
fakeimg += 0.0 * xgrid / nx  # to add a tiny gradient as a test
startrow, startcol = 150, 350
halfrow, halfcol = np.array(fakeimg.shape)/2 + [200, 250]

tmpBBs = []
fakeimg[startrow:startrow+100,startcol:startcol+100] += 0.5 # a patch of slightly brighter pixels.
tmpBBs.append( OddityBB([startrow,startcol], 100) )  

fakeimg[halfrow:halfrow+200,halfcol:halfcol+200] *= 1.3 # a patch of slightly more variable pixels.
tmpBBs.append( OddityBB([halfrow,halfcol], 200) )  

fakeimg[startrow:startrow+250,halfcol:halfcol+250] -= 0.2 # a patch of slightly darker pixels.
tmpBBs.append( OddityBB([startrow,halfcol], 250) )  

img = fakeimg # we decide which data we're working with.
plt.gray()
show_data_and_model(img, tmpBBs)
savefig('truth.png',dpi=150)



In [230]:
show_data_and_model(img)



In [231]:
M = 16 # number of histogram bins
show_data_and_model(intensity_to_m(img, M))



In [232]:
show_data_and_model(intensity_to_boolean_block(img, M)[-1])



In [233]:
cbb = intensity_to_cumulated_block(img, M)
assert OddityBB((cbb.shape[1]/2,cbb.shape[2]/2), 9).get_counts(cbb).sum() == 9*9
assert OddityBB((cbb.shape[1]/2,-8), 9).get_counts(cbb).sum() == 9
assert OddityBB((cbb.shape[1]-1,cbb.shape[2]), 9).get_counts(cbb).sum() == 0
# show_data_and_model(cbb[-1])

testing MCMC


In [234]:
halfrow, halfcol = np.array(img.shape)/2
# initialising MCMC with some BBs
BBs = []
initsize = min(halfrow,halfcol) # deliberately so the BBs are huge but each enclose one true source.
BBs.append( OddityBB([0,0], initsize) )  
BBs.append( OddityBB([0,initsize], initsize) )  
BBs.append( OddityBB([initsize,0], initsize) )  
BBs.append( OddityBB([initsize,initsize], initsize) )  

#BBs.append( OddityBB([rng.randint(1,img.shape[0]), rng.randint(1,img.shape[1])], 950) ) # random BB
#BBs.append( OddityBB([rng.randint(1,img.shape[0]), rng.randint(1,img.shape[1])], 950) ) # random BB

#BBs.append( OddityBB([ybest, xbest], 250) ) # corner is on the brightest pixel

In [235]:
show_data_and_model(img, BBs)
savefig('start.png',dpi=150)



In [236]:
indices = range(len(BBs))
logP = MH_step(cbb, 0, BBs, alphas_S, alphas_B)
T = 0
for t in range(7):
    T_inner = 2 ** (2*t)
    for tt in range(T_inner):
        for i in indices:
            logP = MH_step(cbb, i, BBs, alphas_S, alphas_B, temperature=1., logP=logP)
    T += T_inner
    print '%5d' % (T), 
    for i in indices:
        print BBs[i],
    print '%.1f' % (logP)

#top_bin_img = intensity_to_boolean_block(img, M)[-1]
show_data_and_model(img, BBs)
savefig('end.png', dpi=150)


    1 Oddity BB, pos    0    0, width  744 Oddity BB, pos    0  744, width  744 Oddity BB, pos  744    0, width  736 Oddity BB, pos  744  744, width  744 -8456075.9
    5 Oddity BB, pos    0    0, width  743 Oddity BB, pos    0  744, width  744 Oddity BB, pos  744    0, width  728 Oddity BB, pos  808  744, width  712 -8456039.4
   21 Oddity BB, pos   80    0, width  563 Oddity BB, pos   10  833, width  719 Oddity BB, pos  734    0, width  691 Oddity BB, pos  774  804, width  503 -8455612.0
   85 Oddity BB, pos   35  201, width  251 Oddity BB, pos   53 1052, width  532 Oddity BB, pos  535   63, width  457 Oddity BB, pos  658  621, width  381 -8454802.6
  341 Oddity BB, pos  151  352, width   98 Oddity BB, pos  151 1276, width  249 Oddity BB, pos  583  257, width    9 Oddity BB, pos  441  867, width   12 -8452768.3
 1365 Oddity BB, pos  150  350, width  100 Oddity BB, pos  150 1274, width  250 Oddity BB, pos  476  121, width    9 Oddity BB, pos  573  861, width    9 -8452698.8
 5461 Oddity BB, pos  150  350, width  100 Oddity BB, pos  150 1274, width  250 Oddity BB, pos  297    8, width    9 Oddity BB, pos  582  850, width   10 -8452703.4

In [145]:
"""
ny, nx = img.shape
ys, xs = np.mgrid[0:ny,0:nx]
logL_grid = np.zeros((ny, nx))
M, foo, bar = cbb.shape
counter = 0
for y,x in zip(ys.ravel(), xs.ravel()):
    BB = OddityBB([y, x], 7)
    logL_grid[y, x] = get_logL(cbb, [BB, ], alphas_S, alphas_B)
    counter += 1
    assert counter < 1000
"""


Out[145]:
'\nny, nx = img.shape\nys, xs = np.mgrid[0:ny,0:nx]\nlogL_grid = np.zeros((ny, nx))\nM, foo, bar = cbb.shape\ncounter = 0\nfor y,x in zip(ys.ravel(), xs.ravel()):\n    BB = OddityBB([y, x], 7)\n    logL_grid[y, x] = get_logL(cbb, [BB, ], alphas_S, alphas_B)\n    counter += 1\n    assert counter < 1000\n'

In [146]:
widths = np.arange(1,2*max(img.shape),2)
logLs = np.zeros(len(widths))
countss = np.zeros(len(widths))

alphas_S = np.ones(M, dtype=int) 
alphas_B = 2 * alphas_S

logL0 = get_logL(cbb, [], alphas_S, alphas_B)

#ybest, xbest = np.where(img == img.max())
#ybest, xbest = ybest[0], xbest[0]
yTop, xTop = img.shape
ybest, xbest = yTop/2, xTop/2
print ybest, xbest
print img.shape
for i, width in enumerate(widths):
    hw = (width-1)/2 # the half-width
    BB = OddityBB([ybest-hw, xbest-hw], width)
    countss[i] = np.sum(BB.get_counts(cbb))
    logLs[i] = get_logPosterior(cbb, [BB, ], alphas_S, alphas_B)

plt.subplot(211)
plt.plot(widths, logLs-logL0,'.b')
plt.xlim(0,600)

plt.subplot(212)
plt.plot(widths, countss,'.r')
print logLs


744 1024
(1489, 2048)
[             -inf              -inf              -inf ...,
 -8458617.15196883 -8458619.15196883 -8458621.15196883]

So... what I just did was start two BBs, both "as large as they can be", with one enclosing each source. And they immediately found them.

If I don't do this and (say) start them at random positions and widths, or start them with half the field each, then sure they'll discover both the sources eventually, but it's a bloody long time.

Suggests to me that whenever you think you've found one, you might place a newly initialised BB that's as large as possible, in the biggest gap there is. Something like that.

Below, I'm trying one box, really big to start with. I've added a slight (actually pretty severe) penalty for overly large boxes.


In [129]:
BBs = []
border = np.max(img.shape) # deliberately so the BBs are huge but each enclose one true source.
BBs.append( OddityBB([0,0], border) )  
logP = MH_step(cbb, 0, BBs, alphas_S, alphas_B)
T = 0
i = 0
for t in range(7):
    T_inner = 2 ** (2*t)
    for tt in range(T_inner):
        logP = MH_step(cbb, i, BBs, alphas_S, alphas_B, temperature=1., logP=logP)
    T += T_inner
    print '%5d' % (T), 
    print BBs[i],
    print '%.1f' % (logP)
show_data_and_model(img, BBs)


    1 Oddity BB, pos   32    0, width 1489 -8455087.1
    5 Oddity BB, pos    0    0, width 1457 -8454973.5
   21 Oddity BB, pos    1   73, width 1358 -8454289.1
   85 Oddity BB, pos   86  177, width 1251 -8453506.9
  341 Oddity BB, pos   58  350, width 1075 -8451712.8
 1365 Oddity BB, pos  110  350, width 1074 -8451688.4
 5461 Oddity BB, pos  112  350, width 1074 -8451689.5

testing optimization

This is currently exhaustive and toy.


In [21]:
def optimize_step(cbb, i, BBs, alphas_S, alphas_B):
    """
    Check out the 27 options for improvements on BBs[i]; return the best.
    Also change BBs in place, so FUCKING BE CAREFUL.
    """
    y, x = 1 * BBs[i].get_pos()
    w = 1 * BBs[i].get_width()
    foo = np.zeros((3,3,3), dtype=int)
    bar = np.array([-1, 0, 1])
    xoffsets = (bar[None, None, :] + foo).ravel()
    yoffsets = (bar[None, :, None] + foo).ravel()
    woffsets = (bar[:, None, None] + foo).ravel()
    logPs = []
    for j in range(len(xoffsets)):
        BBs[i].set_pos([y + yoffsets[j], x + xoffsets[j]])
        BBs[i].set_width(w + woffsets[j])
        # print 'optimize_step() : %d, %s, %f ' % (j, BBs[i], get_logPrior(BBs))
        logPs.append(get_logPosterior(cbb, BBs, alphas_S, alphas_B))
    j = np.argmax(np.array(logPs))
    #print 'optimize_step() : ', logPs, logPs[j]
    BBs[i].set_pos([y + yoffsets[j], x + xoffsets[j]])
    BBs[i].set_width(w + woffsets[j])
    if j == 13: return True # FUCK YEAH
    return False

In [22]:
def optimize(cbb, i, BBs, alphas_S, alphas_B):
    while not optimize_step(cbb, i, BBs, alphas_S, alphas_B):
        print 'optimize() : ', BBs[i], get_logPosterior(cbb, BBs, alphas_S, alphas_B)

logL0 = get_logPosterior(cbb, [], alphas_S, alphas_B)
BB = OddityBB([ybest-100, xbest], 6)  # initialising the optimization
BBs = [BB, ]
i = 0
optimize(cbb, i, BBs, alphas_S, alphas_B)
for BB in BBs: print BB
logL1 = get_logPosterior(cbb, BBs, alphas_S, alphas_B)
print logL1 - logL0

BBs.append(OddityBB([ybest, xbest+200], 6))  # initialising the optimization
i = 1
optimize(cbb, i, BBs, alphas_S, alphas_B)
for BB in BBs: print BB
logL2 = get_logPosterior(cbb, BBs, alphas_S, alphas_B)
print logL2 - logL1


optimize() :  Oddity BB, pos  643 1023, width    5 -inf
optimize() :  Oddity BB, pos  642 1022, width    4 -inf
optimize() :  Oddity BB, pos  641 1021, width    3 -inf
optimize() :  Oddity BB, pos  640 1020, width    2 -inf
optimize() :  Oddity BB, pos  639 1019, width    1 -inf
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-22-b28b5bd61a58> in <module>()
      7 BBs = [BB, ]
      8 i = 0
----> 9 optimize(cbb, i, BBs, alphas_S, alphas_B)
     10 for BB in BBs: print BB
     11 logL1 = get_logPosterior(cbb, BBs, alphas_S, alphas_B)

<ipython-input-22-b28b5bd61a58> in optimize(cbb, i, BBs, alphas_S, alphas_B)
      1 def optimize(cbb, i, BBs, alphas_S, alphas_B):
----> 2     while not optimize_step(cbb, i, BBs, alphas_S, alphas_B):
      3         print 'optimize() : ', BBs[i], get_logPosterior(cbb, BBs, alphas_S, alphas_B)
      4 
      5 logL0 = get_logPosterior(cbb, [], alphas_S, alphas_B)

<ipython-input-21-76a6519d65ff> in optimize_step(cbb, i, BBs, alphas_S, alphas_B)
     14     for j in range(len(xoffsets)):
     15         BBs[i].set_pos([y + yoffsets[j], x + xoffsets[j]])
---> 16         BBs[i].set_width(w + woffsets[j])
     17         # print 'optimize_step() : %d, %s, %f ' % (j, BBs[i], get_logPrior(BBs))
     18         logPs.append(get_logPosterior(cbb, BBs, alphas_S, alphas_B))

<ipython-input-4-003617c40a71> in set_width(self, width)
     13     def set_width(self, width):
     14         self.width = int(width)
---> 15         assert self.width > 0
     16 
     17     def __str__(self):

AssertionError: 

In [ ]: