This notebook generates a table of radio components in the CDFS and ELAIS-S1 fields, according to various incarnations of the ATLAS survey. To run it, you will need a MongoDB server with the RGZ database loaded. All other data is fetched from the internet.
In the following cell, specify the MongoDB server details:
In [1]:
MONGO_HOST = 'localhost'
MONGO_PORT = 27017
In this cell, specify if you have access to a crowdastro output file (crowdastro.h5), and if so, where it is:
In [2]:
USING_CROWDASTRO = True
CROWDASTRO_PATH = 'crowdastro-swire.h5'
# To get this file, run `crowdastro import_data --ir swire`.
In this cell, specify if you have access to a CSV of the Fan et al. (2015) cross-identifications, and if so, where it is:
In [3]:
USING_FAN = True
FAN_PATH = 'J:/repos/crowdastro/data/fan_2015.csv'
Next, we will fetch the resources we need.
In [4]:
NORRIS_COMPONENTS_URI = 'http://www.atnf.csiro.au/people/rnorris/papers/n202/tab4.txt'
NORRIS_CROSS_IDENTIFICATIONS_URI = 'http://www.atnf.csiro.au/people/rnorris/papers/n202/tab6.txt'
MIDDELBERG_COMPONENTS_URI = 'http://iopscience.iop.org/article/10.1086/508275/fulltext/datafile4.txt'
MIDDELBERG_CROSS_IDENTIFICATIONS_URI = 'http://iopscience.iop.org/article/10.1086/508275/fulltext/datafile6.txt'
In [5]:
# Load Norris components.
import requests, io, astropy.io.ascii as asc, astropy.table, pandas
norris_components = astropy.table.Table.from_pandas(
pandas.read_fwf(
io.StringIO(
requests.get(NORRIS_COMPONENTS_URI).text
),
skiprows=[0, 2],
header=0,
widths=map(len, [
' # ',
'Name ',
'Radio RA ',
'Radio dec ',
'err(RA) ',
'err(dec) ',
'Peak Flux ',
'Int flux ',
'Bmaj ',
'Bmin ',
' Bpa ',
' rms ',
])
)
)
norris_components
Out[5]:
In [7]:
# Load Norris cross-identifications.
# This table has inconsistent tabs, so we will have to convert them to "soft tabs".
def replace_tabs(s, tabstop=8):
"""Convert tabs to spaces."""
out = ''
upto = 0
last = None
for c in s:
if c == '\t':
# Fill up to next tabstop.
diff = tabstop - upto % tabstop
if diff == 0:
diff = tabstop
out += ' ' * diff
upto += diff
last = c
continue
last = c
out += c
upto += 1
return out
test_input = ('S001 ATCDFS_J032602.78-284709.0 C001 SWIRE3_J032603.15-284708.5 3:26:02.785 -28:47:09.06 1.4 33.8 21.1 -1.0 -1.0 -1.0 4 looks like a group in irac 1')
test_output = ('S001 ATCDFS_J032602.78-284709.0 C001 SWIRE3_J032603.15-284708.5 3:26:02.785 -28:47:09.06 1.4 33.8 21.1 -1.0 -1.0 -1.0 4 looks like a group in irac 1')
assert test_output == replace_tabs(test_input)
norris_cross_identifications = astropy.table.Table.from_pandas(
pandas.read_fwf(
io.StringIO(
'\n'.join(map(
lambda s: replace_tabs(s, 8),
requests.get(NORRIS_CROSS_IDENTIFICATIONS_URI).text.split('\r\n'))
)
),
skiprows=[0, 2],
header=0,
widths=[8, 32, 20, 28, 16, 16, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 16, 8, 16]
)
)
norris_cross_identifications[700:710]
Out[7]:
In [8]:
# Load Middelberg tables.
middelberg_components = asc.read(MIDDELBERG_COMPONENTS_URI)
print(middelberg_components[0])
middelberg_cross_identifications = asc.read(MIDDELBERG_CROSS_IDENTIFICATIONS_URI)
print(middelberg_cross_identifications[0])
In [9]:
# Convert Middelberg data into columns. There's no catalogue matching to do here so we can
# throw everything in right away.
import astropy.coordinates
_middelberg_component_ids = middelberg_components['ID']
_middelberg_component_names = middelberg_components['Name']
_middelberg_component_positions = [
astropy.coordinates.SkyCoord(ra=(r['RAh'], r['RAm'], r['RAs']),
dec=(-r['DEd'], r['DEm'], r['DEs']),
unit=('hourangle', 'deg'))
for r in middelberg_components
]
_middelberg_component_ras = [r.ra.deg for r in _middelberg_component_positions]
_middelberg_component_decs = [r.dec.deg for r in _middelberg_component_positions]
_middelberg_source_ids = middelberg_components['ID']
_middelberg_cid_to_source_id = {}
_middelberg_cid_to_source_name = {}
_middelberg_cid_to_swire = {}
_middelberg_cid_to_source_z = {}
_middelberg_cid_to_source_ra = {}
_middelberg_cid_to_source_dec = {}
for row in middelberg_cross_identifications:
for component in row['CID'].split(','):
component = component.strip()
_middelberg_cid_to_source_id[component] = row['ID']
_middelberg_cid_to_source_name[component] = row['Name']
_middelberg_cid_to_swire[component] = row['SName']
_middelberg_cid_to_source_z[component] = row['z']
pos = astropy.coordinates.SkyCoord(ra=(row['RAh'], row['RAm'], row['RAs']),
dec=(-row['DEd'], row['DEm'], row['DEs']),
unit=('hourangle', 'deg'))
_middelberg_cid_to_source_ra[component] = pos.ra.deg
_middelberg_cid_to_source_dec[component] = pos.dec.deg
_middelberg_component_source_ids = [_middelberg_cid_to_source_id[c] for c in _middelberg_component_ids]
_middelberg_component_source_names = [_middelberg_cid_to_source_name[c] for c in _middelberg_component_ids]
_middelberg_component_swires = [_middelberg_cid_to_swire[c] for c in _middelberg_component_ids]
_middelberg_component_source_zs = [_middelberg_cid_to_source_z[c] for c in _middelberg_component_ids]
_middelberg_component_source_ras = [_middelberg_cid_to_source_ra[c] for c in _middelberg_component_ids]
_middelberg_component_source_decs = [_middelberg_cid_to_source_dec[c] for c in _middelberg_component_ids]
In [10]:
# Load RGZ.
import pymongo, numpy
client = pymongo.MongoClient(MONGO_HOST, MONGO_PORT)
db = client['radio']
_rgz_sources = []
_rgz_coords = []
_rgz_zids = []
for subject in db.radio_subjects.find({'metadata.survey': 'atlas'}):
source = subject['metadata']['source']
ra, dec = subject['coords']
zid = subject['zooniverse_id']
_rgz_sources.append(source)
_rgz_coords.append((ra, dec))
_rgz_zids.append(zid)
_rgz_coords = numpy.array(_rgz_coords)
In [11]:
# Load consensuses from crowdastro.
import h5py
with h5py.File(CROWDASTRO_PATH, 'r') as crowdastro_h5:
# (atlas_i, ir_i, success, percentage)
_crowdastro_consensus_objects = crowdastro_h5['/atlas/cdfs/consensus_objects']
_crowdastro_zids = [r[0].decode('ascii') for r in crowdastro_h5['/atlas/cdfs/string']]
_crowdastro_swire_names = [r.decode('ascii') for r in crowdastro_h5['/swire/cdfs/string']]
_crowdastro_zid_to_swire = {}
for atlas_i, ir_i, success, percentage in _crowdastro_consensus_objects:
_crowdastro_zid_to_swire[_crowdastro_zids[int(atlas_i)]] = _crowdastro_swire_names[int(ir_i)]
In [12]:
# Match RGZ to Norris.
import scipy.spatial
_rgz_zid_to_norris = {} # Maps ZID -> Norris CID
_norris_cids = [r['#'] for r in norris_components]
_norris_coords = [astropy.coordinates.SkyCoord(
ra=r['Radio RA'],
dec=r['Radio dec'],
unit=('hourangle', 'deg')) for r in norris_components]
_norris_coords = numpy.array([(p.ra.deg, p.dec.deg) for p in _norris_coords])
_norris_tree = scipy.spatial.KDTree(_norris_coords)
# Assume that there are no situations where one Norris component maps to multiple RGZ components (and vice versa).
_dists, _indices = _norris_tree.query(_rgz_coords)
_matches = _dists < 3 / 60 / 60
for zid, match, index in zip(_rgz_zids, _matches, _indices):
if not match:
continue
_rgz_zid_to_norris[zid] = _norris_cids[index]
_norris_to_rgz_zid = {j:i for i, j in _rgz_zid_to_norris.items()}
In [13]:
# Load Fan.
fan_cross_identifications = asc.read(FAN_PATH, header_start=0, delimiter=',')
_fan_source_ids = fan_cross_identifications['id']
_fan_id_to_swire = {r['id']:r['swire'] for r in fan_cross_identifications}
# Assuming that CID in Fan = CID in Norris.
_fan_component_to_source = {}
_fan_component_to_swire = {}
for row in fan_cross_identifications:
components = row['radios'].split(',')
for component in components:
component = component.strip()
_fan_component_to_source[component] = row['id']
_fan_component_to_swire[component] = row['swire']
Now, we can construct the table. We will have the following columns:
In [14]:
columns = [
'Key', 'Component ID (Norris)', 'Source ID (Norris)', 'Source Name (Norris)',
'SWIRE Name (Norris)', 'RA (Norris)', 'Dec (Norris)', 'Source RA (Norris)', 'Source Dec (Norris)',
'Component ID (RGZ)', 'Zooniverse ID (RGZ)', 'SWIRE Name (RGZ)', 'RA (RGZ)', 'Dec (RGZ)',
'Source ID (Fan)', 'SWIRE Name (Fan)',
'Component ID (Middelberg)', 'Component Name (Middelberg)', 'RA (Middelberg)',
'Dec (Middelberg)', 'Source ID (Middelberg)', 'Source Name (Middelberg)',
'SWIRE Name (Middelberg)', 'Source RA (Middelberg)', 'Source Dec (Middelberg)',
'Source Redshift (Middelberg)',
]
In [15]:
import astropy.coordinates
# Component ID (Norris)
component_ids_norris = [r['#'] for r in norris_components]
# Source ID (Norris)
_component_to_source = {}
for r in norris_cross_identifications:
for component in r['Component'].split(','):
_component_to_source[component.strip()] = r['#']
source_ids_norris = [_component_to_source[c] for c in component_ids_norris]
# Source Name (Norris)
_source_to_name = {r['#']:r['Name'] for r in norris_cross_identifications}
source_names_norris = [_source_to_name[s] for s in source_ids_norris]
# SWIRE Name (Norris)
_source_to_swire_norris = {r['#']:r['SWIRE'] for r in norris_cross_identifications}
swire_names_norris = [_source_to_swire_norris[s] for s in source_ids_norris]
# RA (Norris), Dec (Norris)
_positions_norris = [astropy.coordinates.SkyCoord(
ra=r['Radio RA'],
dec=r['Radio dec'],
unit=('hourangle', 'deg')) for r in norris_components]
ras_norris = [p.ra.deg for p in _positions_norris]
decs_norris = [p.dec.deg for p in _positions_norris]
# Source RA (Norris), Source Dec (Norris)
_source_positions_norris = [astropy.coordinates.SkyCoord(
ra=r['Radio RA'],
dec=r['Radio dec'],
unit=('hourangle', 'deg')) for r in norris_cross_identifications]
_source_id_to_position_norris = dict(zip(norris_cross_identifications['#'], _source_positions_norris))
source_ras_norris = [_source_id_to_position_norris[s].ra.deg for s in source_ids_norris]
source_decs_norris = [_source_id_to_position_norris[s].dec.deg for s in source_ids_norris]
# Zooniverse ID (RGZ)
zooniverse_ids_rgz = [_norris_to_rgz_zid.get(cid, '') for cid in component_ids_norris]
# Component ID (RGZ)
_zid_to_cid = {z:c for z, c in zip(_rgz_zids, _rgz_sources)}
_zid_to_coord = {z:p for z, p in zip(_rgz_zids, _rgz_coords)}
component_ids_rgz = [_zid_to_cid.get(z, '') for z in zooniverse_ids_rgz]
# Extend all of these columns by RGZ objects with no corresponding Norris object.
_zid_no_norris = [z for z in _rgz_zids if z not in _rgz_zid_to_norris]
_cid_no_norris = [_zid_to_cid.get(z, '') for z in _zid_no_norris]
_blank_no_norris = [''] * len(_zid_no_norris)
for l in [component_ids_norris, source_ids_norris, source_names_norris,
swire_names_norris, ras_norris, decs_norris, source_ras_norris,
source_decs_norris]:
l.extend(_blank_no_norris)
zooniverse_ids_rgz.extend(_zid_no_norris)
component_ids_rgz.extend(_cid_no_norris)
# RA (RGZ), Dec (RGZ)
ras_rgz = [_zid_to_coord.get(z, ('', ''))[0] for z in zooniverse_ids_rgz]
decs_rgz = [_zid_to_coord.get(z, ('', ''))[1] for z in zooniverse_ids_rgz]
# SWIRE Name (RGZ)
swire_names_rgz = [_crowdastro_zid_to_swire.get(z, '') for z in zooniverse_ids_rgz]
# Source ID (Fan)
fan_source_ids = [_fan_component_to_source.get(cid, '') for cid in component_ids_norris]
# SWIRE Name (Fan)
fan_swire_names = [_fan_component_to_swire.get(cid, '') for cid in component_ids_norris]
# Pad out the Middelberg columns.
middelberg_component_ids = [''] * len(component_ids_norris) + list(_middelberg_component_ids)
middelberg_component_names = [''] * len(component_ids_norris) + list(_middelberg_component_names)
middelberg_component_ras = [''] * len(component_ids_norris) + list(_middelberg_component_ras)
middelberg_component_decs = [''] * len(component_ids_norris) + list(_middelberg_component_decs)
middelberg_component_source_ids = [''] * len(component_ids_norris) + list(_middelberg_component_source_ids)
middelberg_component_source_names = [''] * len(component_ids_norris) + list(_middelberg_component_source_names)
middelberg_component_swires = [''] * len(component_ids_norris) + list(_middelberg_component_swires)
middelberg_component_source_ras = [''] * len(component_ids_norris) + list(_middelberg_component_source_ras)
middelberg_component_source_decs = [''] * len(component_ids_norris) + list(_middelberg_component_source_decs)
middelberg_component_source_zs = [''] * len(component_ids_norris) + list(_middelberg_component_source_zs)
# Pad out the other columns.
for l in [component_ids_norris, source_ids_norris, source_names_norris,
swire_names_norris, ras_norris, decs_norris, component_ids_rgz,
zooniverse_ids_rgz, swire_names_rgz, ras_rgz, decs_rgz,
fan_source_ids, fan_swire_names, source_ras_norris, source_decs_norris]:
l.extend([''] * len(_middelberg_component_ids))
keys = list(range(len(component_ids_norris)))
table = astropy.table.Table(data=[keys, component_ids_norris, source_ids_norris, source_names_norris,
swire_names_norris, ras_norris, decs_norris, source_ras_norris,
source_decs_norris,
component_ids_rgz, zooniverse_ids_rgz, swire_names_rgz, ras_rgz, decs_rgz,
fan_source_ids, fan_swire_names,
middelberg_component_ids, middelberg_component_names,
middelberg_component_ras, middelberg_component_decs,
middelberg_component_source_ids, middelberg_component_source_names,
middelberg_component_swires, middelberg_component_source_ras,
middelberg_component_source_decs, middelberg_component_source_zs,
],
names=columns)
table
Out[15]:
In [16]:
table.write('one-table-to-rule-them-all.tbl', format='ascii')
In [ ]: