In [170]:
import os.path
import pprint
import sys
import astropy.io.fits
import matplotlib.colors
import matplotlib.pyplot
import numpy
import pymongo
import requests
sys.path.insert(1, '..')
import crowdastro.rgz_analysis.consensus
%matplotlib inline
matplotlib.pyplot.rcParams['image.cmap'] = 'gray'
HOST = 'localhost'
PORT = 27017
DB_NAME = 'radio'
DATA_PATH = os.path.join('..', 'data')
ATLAS_CATALOGUE_PATH = os.path.join(DATA_PATH, 'ATLASDR3_cmpcat_23July2015.dat')
TILE_SIZE = '2x2'
FITS_IMAGE_WIDTH = 200
FITS_IMAGE_HEIGHT = 200
CLICK_IMAGE_WIDTH = 500
CLICK_IMAGE_HEIGHT = 500
CLICK_TO_FITS_X = FITS_IMAGE_WIDTH / CLICK_IMAGE_WIDTH
CLICK_TO_FITS_Y = FITS_IMAGE_HEIGHT / CLICK_IMAGE_HEIGHT
# Setup Mongo DB.
client = pymongo.MongoClient(HOST, PORT)
db = client[DB_NAME]
The FITS images have filenames of the form CID_{ir, radio}.fits. The mapping between IAU name and CID (component ID) is in the ATLAS catalogue file. Where in the database do the IAU names appear?
In [25]:
db.radio_subjects.find_one()
Out[25]:
Looks like the subject we found is part of the FIRST survey rather than the ATLAS survey. Let's try and find an ATLAS image.
In [26]:
db.radio_subjects.find_one({'metadata.survey': 'atlas'})
Out[26]:
It seems that the database itself actually contains the CID for the ATLAS survey (but not for the FIRST survey). Knowing this, we can write a function to load FITS images.
In [211]:
def open_fits(zid, field, wavelength):
"""Opens a FITS image.
zid: Zooniverse ID.
field: 'elais' or 'cdfs'.
wavelength: 'ir' or 'radio'.
-> FITS image file handle.
"""
if field not in {'elais', 'cdfs'}:
raise ValueError('field must be either "elais" or "cdfs".')
if wavelength not in {'ir', 'radio'}:
raise ValueError('wavelength must be either "ir" or "radio".')
subject = db.radio_subjects.find_one({'zooniverse_id': zid})
assert subject['metadata']['survey'] == 'atlas', 'Subject not from ATLAS survey.'
cid = subject['metadata']['source']
filename = '{}_{}.fits'.format(cid, wavelength)
path = os.path.join(DATA_PATH, field, TILE_SIZE, filename)
return astropy.io.fits.open(path, ignore_blank=True)
def imshow(im, contrast=0.05):
"""Helper function for showing an image."""
im = im - im.min() + contrast
return matplotlib.pyplot.imshow(im,
origin='lower',
norm=matplotlib.colors.LogNorm(
vmin=im.min(),
vmax=im.max(),
),
)
In [212]:
# Let's try it out.
with open_fits('ARG0003r17', 'cdfs', 'ir') as fits_file:
ir = fits_file[0].data
imshow(ir)
matplotlib.pyplot.show()
with open_fits('ARG0003r17', 'cdfs', 'radio') as fits_file:
radio = fits_file[0].data
imshow(radio)
matplotlib.pyplot.show()
Now, let's try and get the plurality click using the consensus module from willettk/rgz-analysis. I've updated this to Python 3, extracted all the consensus-related code, rewritten the main code to use ATLAS instead of FIRST, and assembled it into a module.
In [227]:
cons = crowdastro.rgz_analysis.consensus.consensus('ARG0003r17')
pprint.pprint(cons)
In [228]:
with open_fits('ARG0003r17', 'cdfs', 'ir') as fits_file:
ir = fits_file[0].data
imshow(ir)
matplotlib.pyplot.scatter([CLICK_TO_FITS_X * numpy.array(cons['answer'][121.757]['ir_x'])],
[CLICK_TO_FITS_Y * numpy.array(cons['answer'][121.757]['ir_y'])],
c='w', marker='+')
matplotlib.pyplot.scatter([CLICK_TO_FITS_X * cons['answer'][121.757]['ir_peak'][0]],
[CLICK_TO_FITS_Y * cons['answer'][121.757]['ir_peak'][1]],
c='r', marker='x')
matplotlib.pyplot.scatter([CLICK_TO_FITS_X * cons['answer'][126.439]['ir'][0]],
[CLICK_TO_FITS_Y * cons['answer'][126.439]['ir'][1]],
c='g', marker='x')
matplotlib.pyplot.show()
There are two peaks in the output. I've plotted them in red and green. I think red is the plurality peak; I have no idea what the other peak is. I think that the white marks represent places people clicked.
Let's try doing this again on a few more subjects.
Note: For some reason, the contours and scatter plots flip to whatever the opposite orientation of the image is. I've manually flipped them back. There should be a better way, but all resources I've found tell me that setting origin = 'lower'
on the imshow
call should flip the image correctly...
In [220]:
def plot_contours(contours, colour='gray'):
for row in contours:
for col in row:
xs = []
ys = []
for pair in col['arr']:
xs.append(pair['x'])
ys.append(pair['y'])
matplotlib.pyplot.plot(xs, FITS_IMAGE_HEIGHT - numpy.array(ys), c=colour)
In [221]:
limit = 10
for subject in db.radio_subjects.find({'metadata.survey': 'atlas'}).limit(limit):
matplotlib.pyplot.figure(figsize=(15, 15))
zid = subject['zooniverse_id']
cons = crowdastro.rgz_analysis.consensus.consensus(zid)
contours = requests.get(subject['location']['contours']).json()['contours']
print('=' * 40, zid, '=' * 40)
# Not all answers seem to contain valid peaks.
n_answers = sum(1 for answer in cons['answer'].values() if 'ir_peak' in answer)
with open_fits(zid, 'cdfs', 'ir') as fits_file:
ir = fits_file[0].data
with open_fits(zid, 'cdfs', 'radio') as fits_file:
radio = fits_file[0].data
for answer_index, (checksum, answer) in enumerate(cons['answer'].items()):
if 'ir_peak' not in answer:
continue
matplotlib.pyplot.subplot(n_answers, 2, answer_index * 2 + 1)
matplotlib.pyplot.title('{} (IR): ind = {}'.format(zid, answer['ind']))
imshow(ir)
plot_contours(contours, colour='green')
matplotlib.pyplot.scatter([CLICK_TO_FITS_X * numpy.array(answer['ir_peak'][0])],
[FITS_IMAGE_HEIGHT - CLICK_TO_FITS_Y * numpy.array(answer['ir_peak'][1])],
c='r', marker='o')
matplotlib.pyplot.subplot(n_answers, 2, answer_index * 2 + 2)
matplotlib.pyplot.title('{} (Radio): ind = {}'.format(zid, answer['ind']))
imshow(radio)
plot_contours(contours, colour='green')
matplotlib.pyplot.scatter([CLICK_TO_FITS_X * numpy.array(answer['ir_peak'][0])],
[FITS_IMAGE_HEIGHT - CLICK_TO_FITS_Y * numpy.array(answer['ir_peak'][1])],
c='r', marker='o')
matplotlib.pyplot.show()
The extra indices refer to extra sets of contours. How many objects in the dataset are "simple", i.e., have just one set of contours? I'll just run a counter on a subset.
In [225]:
limit = 1000
n_one = 0
for subject in db.radio_subjects.find({'metadata.survey': 'atlas'}).limit(limit):
zid = subject['zooniverse_id']
cons = crowdastro.rgz_analysis.consensus.consensus(zid)
n_answers = sum(1 for answer in cons['answer'].values() if 'ir_peak' in answer)
if n_answers == 1:
n_one += 1
print('Number of subjects with one set of contours:', n_one)
So about 30%. I think that's a reasonable first attempt at a training data set. I'll try and assemble that in another notebook.
In [ ]: