In [46]:
import numpy as np
import pandas as pd

from astropy import coordinates
from astropy.coordinates import match_coordinates_sky
import astropy.units as u
import astroquery
from astroquery.irsa import Irsa
from astroquery.vizier import Vizier
from astropy.table import Table, join

from astropy.units import Quantity
from astroquery.gaia import Gaia

Irsa.ROW_LIMIT = -1
Vizier.ROW_LIMIT = -1

from astropy.io import ascii as asci

import matplotlib.pyplot as plt

# Suppress warnings. Comment this out if you wish to see the warning messages
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

Get the data

2MASS => J, H K, angular resolution ~4"

WISE => 3.4, 4.6, 12, and 22 μm (W1, W2, W3, W4) with an angular resolution of 6.1", 6.4", 6.5", & 12.0"

GALEX imaging => Five imaging surveys in a Far UV band (1350-1750Å) and Near UV band (1750-2800Å) with 6-8 arcsecond resolution (80% encircled energy) and 1 arcsecond astrometry, and a cosmic UV background map.


In [47]:
from astroquery.gaia import Gaia
tables = Gaia.load_tables(only_names=True)
for table in (tables):
    print (table.get_qualified_name())


Retrieving tables...
Parsing tables...
Done.
external.external.gaiadr2_geometric_distance
public.public.dual
public.public.hipparcos
public.public.hipparcos_newreduction
public.public.hubble_sc
public.public.igsl_source
public.public.igsl_source_catalog_ids
public.public.tycho2
tap_schema.tap_schema.columns
tap_schema.tap_schema.key_columns
tap_schema.tap_schema.keys
tap_schema.tap_schema.schemas
tap_schema.tap_schema.tables
gaiadr1.gaiadr1.aux_qso_icrf2_match
gaiadr1.gaiadr1.ext_phot_zero_point
gaiadr1.gaiadr1.allwise_best_neighbour
gaiadr1.gaiadr1.allwise_neighbourhood
gaiadr1.gaiadr1.gsc23_best_neighbour
gaiadr1.gaiadr1.gsc23_neighbourhood
gaiadr1.gaiadr1.ppmxl_best_neighbour
gaiadr1.gaiadr1.ppmxl_neighbourhood
gaiadr1.gaiadr1.sdss_dr9_best_neighbour
gaiadr1.gaiadr1.sdss_dr9_neighbourhood
gaiadr1.gaiadr1.tmass_best_neighbour
gaiadr1.gaiadr1.tmass_neighbourhood
gaiadr1.gaiadr1.ucac4_best_neighbour
gaiadr1.gaiadr1.ucac4_neighbourhood
gaiadr1.gaiadr1.urat1_best_neighbour
gaiadr1.gaiadr1.urat1_neighbourhood
gaiadr1.gaiadr1.cepheid
gaiadr1.gaiadr1.phot_variable_time_series_gfov
gaiadr1.gaiadr1.phot_variable_time_series_gfov_statistical_parameters
gaiadr1.gaiadr1.rrlyrae
gaiadr1.gaiadr1.variable_summary
gaiadr1.gaiadr1.allwise_original_valid
gaiadr1.gaiadr1.gsc23_original_valid
gaiadr1.gaiadr1.ppmxl_original_valid
gaiadr1.gaiadr1.sdssdr9_original_valid
gaiadr1.gaiadr1.tmass_original_valid
gaiadr1.gaiadr1.ucac4_original_valid
gaiadr1.gaiadr1.urat1_original_valid
gaiadr1.gaiadr1.gaia_source
gaiadr1.gaiadr1.tgas_source
gaiadr2.gaiadr2.aux_allwise_agn_gdr2_cross_id
gaiadr2.gaiadr2.aux_iers_gdr2_cross_id
gaiadr2.gaiadr2.aux_sso_orbit_residuals
gaiadr2.gaiadr2.aux_sso_orbits
gaiadr2.gaiadr2.dr1_neighbourhood
gaiadr2.gaiadr2.allwise_best_neighbour
gaiadr2.gaiadr2.allwise_neighbourhood
gaiadr2.gaiadr2.apassdr9_best_neighbour
gaiadr2.gaiadr2.apassdr9_neighbourhood
gaiadr2.gaiadr2.gsc23_best_neighbour
gaiadr2.gaiadr2.gsc23_neighbourhood
gaiadr2.gaiadr2.hipparcos2_best_neighbour
gaiadr2.gaiadr2.hipparcos2_neighbourhood
gaiadr2.gaiadr2.panstarrs1_best_neighbour
gaiadr2.gaiadr2.panstarrs1_neighbourhood
gaiadr2.gaiadr2.ppmxl_best_neighbour
gaiadr2.gaiadr2.ppmxl_neighbourhood
gaiadr2.gaiadr2.ravedr5_best_neighbour
gaiadr2.gaiadr2.ravedr5_neighbourhood
gaiadr2.gaiadr2.sdssdr9_best_neighbour
gaiadr2.gaiadr2.sdssdr9_neighbourhood
gaiadr2.gaiadr2.tmass_best_neighbour
gaiadr2.gaiadr2.tmass_neighbourhood
gaiadr2.gaiadr2.tycho2_best_neighbour
gaiadr2.gaiadr2.tycho2_neighbourhood
gaiadr2.gaiadr2.urat1_best_neighbour
gaiadr2.gaiadr2.urat1_neighbourhood
gaiadr2.gaiadr2.sso_observation
gaiadr2.gaiadr2.sso_source
gaiadr2.gaiadr2.vari_cepheid
gaiadr2.gaiadr2.vari_classifier_class_definition
gaiadr2.gaiadr2.vari_classifier_definition
gaiadr2.gaiadr2.vari_classifier_result
gaiadr2.gaiadr2.vari_long_period_variable
gaiadr2.gaiadr2.vari_rotation_modulation
gaiadr2.gaiadr2.vari_rrlyrae
gaiadr2.gaiadr2.vari_short_timescale
gaiadr2.gaiadr2.vari_time_series_statistics
gaiadr2.gaiadr2.panstarrs1_original_valid
gaiadr2.gaiadr2.gaia_source

In [48]:
#obj = ["3C 454.3", 343.49062, 16.14821, 1.0]
obj = ["PKS J0006-0623", 1.55789, -6.39315, 1]
#obj = ["M87", 187.705930, 12.391123, 1.0]
#### name, ra, dec, radius of cone (in deg)

obj_name = obj[0]
obj_ra   = obj[1]
obj_dec  = obj[2]
cone_radius  = obj[3]

In [49]:
obj_coord = coordinates.SkyCoord(ra=obj_ra, dec=obj_dec, unit=(u.deg, u.deg), frame="icrs")

Try GAIA with 2MASS

  • in gaiadr1 and gaiadr2 they also provide 2mass, allwise, sdss "best neigbour" pairs
  • catalogs provided by GAIA:
    • gaiadr1.gaiadr1.allwise_original_valid
    • gaiadr1.gaiadr1.gsc23_original_valid
    • gaiadr1.gaiadr1.ppmxl_original_valid
    • gaiadr1.gaiadr1.sdssdr9_original_valid
    • gaiadr1.gaiadr1.tmass_original_valid
    • gaiadr1.gaiadr1.ucac4_original_valid
    • gaiadr1.gaiadr1.urat1_original_valid

In [64]:
# Query data
# 2MASS
# data_2mass = Irsa.query_region(obj_coord, catalog="fp_psc", radius=cone_radius * u.deg)

# # WISE
# data_wise  = Irsa.query_region(obj_coord, catalog="allwise_p3as_psd", radius=cone_radius * u.deg)

# # GALEX
# __data_galex = Vizier.query_region(obj_coord, catalog='II/335', radius=cone_radius * u.deg)
# data_galex = __data_galex[0]

# cmd = "SELECT * \
# FROM gaiadr2.gaia_source \
# WHERE CONTAINS(POINT('ICRS',gaiadr2.gaia_source.ra,gaiadr2.gaia_source.dec), \
# CIRCLE('ICRS'," + str(obj_ra) + "," + str(obj_dec) + "," + str(cone_radius) + "))=1;"

cmd = "SELECT * FROM gaiadr2.gaia_source AS g, \
gaiadr2.tmass_best_neighbour AS tbest, \
gaiadr1.tmass_original_valid AS tmass \
WHERE g.source_id = tbest.source_id AND tbest.tmass_oid = tmass.tmass_oid AND CONTAINS(POINT('ICRS',g.ra,g.dec),\
CIRCLE('ICRS'," + str(obj_ra) + "," + str(obj_dec) + "," + str(cone_radius) + "))=1;"

print(cmd)


job = Gaia.launch_job_async(cmd, dump_to_file=True)
print (job)


# GAIA


SELECT * FROM gaiadr2.gaia_source AS g, gaiadr2.tmass_best_neighbour AS tbest, gaiadr1.tmass_original_valid AS tmass WHERE g.source_id = tbest.source_id AND tbest.tmass_oid = tmass.tmass_oid AND CONTAINS(POINT('ICRS',g.ra,g.dec),CIRCLE('ICRS',1.55789,-6.39315,1))=1;
Jobid: 1542987284378O
Phase: COMPLETED
Owner: None
Output file: async_20181123123444.vot
Results: None

In [65]:
r = job.get_results()
print(len(r['source_id']))


4207

In [66]:
print(r['phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag', 'j_m', 'h_m', 'ks_m', 'tmass_oid'])


phot_g_mean_mag phot_bp_mean_mag phot_rp_mean_mag  j_m    h_m    ks_m  tmass_oid
      mag             mag              mag         mag    mag    mag            
--------------- ---------------- ---------------- ------ ------ ------ ---------
       19.06598        20.061134         17.98525 16.697 15.994 15.868 271970634
      19.486813        20.899199         18.17955 16.568 15.761 15.252 271273659
      18.801517        19.611595        17.864454 16.587 15.862 15.314 271981619
      17.345057        17.782911         16.73697 15.999 15.509 15.318 271223089
      18.668016        19.498903        17.653946 16.393 15.711 15.498 271208327
      16.346586         16.66519         15.86856 15.294 15.064 14.885 271919488
      15.356233        15.818361        14.730853 13.953 13.453 13.377 271965018
      18.073174         18.70632        17.315163  16.36 15.802 15.282 271944076
      18.862864         20.19449        17.644482 15.971 15.529 15.401 272134874
       15.78973        16.219479        15.201209 14.468 13.947 13.925 272121146
            ...              ...              ...    ...    ...    ...       ...
      16.288784        16.684328        15.732504 15.069 14.773  14.83 275141737
      20.113007        21.604626        18.714922 16.819  15.98 15.415 275456305
      16.895649         17.23442        16.367863 15.797 15.523 15.498 275210999
      16.956244        17.329174        16.433014 15.818 15.426 15.421 275427448
      16.232542        17.150475        15.284848 14.094 13.431 13.279 275330290
      18.788189        20.031862        17.572582 15.981 15.453 14.988 275722821
      14.346713        15.176543        13.422222 12.254 11.605 11.435 275400255
      16.468006        16.834158        15.946133 15.339 14.944 14.931 275448072
      18.911425        19.716862        17.749294 16.406 16.053  15.66 275526325
      19.092836        19.876394        18.084078 16.966 16.069  15.94 275429919
Length = 4207 rows

Try GAIA and WISE


In [54]:
cmd = "SELECT * FROM gaiadr2.gaia_source AS g, \
gaiadr2.allwise_best_neighbour AS wbest, \
gaiadr1.allwise_original_valid AS allwise \
WHERE g.source_id = wbest.source_id AND wbest.allwise_oid = allwise.allwise_oid AND CONTAINS(POINT('ICRS',g.ra,g.dec),\
CIRCLE('ICRS'," + str(obj_ra) + "," + str(obj_dec) + "," + str(cone_radius) + "))=1;"

print(cmd)


job = Gaia.launch_job_async(cmd, dump_to_file=True)
print(job)


SELECT * FROM gaiadr2.gaia_source AS g, gaiadr2.allwise_best_neighbour AS wbest, gaiadr1.allwise_original_valid AS allwise WHERE g.source_id = wbest.source_id AND wbest.allwise_oid = allwise.allwise_oid AND CONTAINS(POINT('ICRS',g.ra,g.dec),CIRCLE('ICRS',1.55789,-6.39315,1))=1;
Jobid: 1542985794627O
Phase: COMPLETED
Owner: None
Output file: async_20181123120954.vot
Results: None

In [55]:
r = job.get_results()
print(len(r['source_id']))


6152

In [58]:
print(r['w1mpro', 'w2mpro', 'w3mpro', 'w4mpro'])


w1mpro w2mpro w3mpro w4mpro
 mag    mag    mag    mag  
------ ------ ------ ------
15.766 15.452 11.957   8.91
15.922 15.838  12.46  8.822
16.123 15.152 12.408  8.396
15.369 15.248 11.925  8.983
15.798 15.462  11.91  8.776
 15.51 15.567 12.442  9.049
15.331 15.483 11.678  8.909
14.854 14.875 12.385  8.721
16.925 16.794 12.452  8.799
13.355 13.393  11.93  9.003
   ...    ...    ...    ...
15.359 15.442 12.364  8.839
15.481 15.436 12.206   8.99
13.177 13.201 12.346  8.767
14.993 14.874 11.824   8.97
11.363 11.362 11.735  8.306
14.932  15.03  12.47  8.785
15.519 15.373 12.242  8.563
15.736 15.711 12.458  9.077
15.918  15.89 12.268  8.849
16.654 16.701 12.389  8.602
Length = 6152 rows

Try GAIA-WISE-2MASS directly

  • I check there is tmass_key in gaiadr1.allwise_original_valid (ALLWISE catalog)

Desc of tmass_key:

2MASS PSC association. Unique identifier of the closest source in the 2MASS Point Source Catalog (PSC) that falls within 3 arcsec of the non-motion fit position of this WISE source. This is equivalent to the pts_key in the 2MASS PSC entry. This column is “null” if there is no 2MASS PSC source within 3 arcsec of the WISE source position.


In [68]:
cmd = "SELECT * FROM gaiadr2.gaia_source AS g, \
gaiadr2.allwise_best_neighbour AS wbest, \
gaiadr1.allwise_original_valid AS allwise, \
gaiadr1.tmass_original_valid AS tmass \
WHERE g.source_id = wbest.source_id AND wbest.allwise_oid = allwise.allwise_oid AND CONTAINS(POINT('ICRS',g.ra,g.dec),\
CIRCLE('ICRS'," + str(obj_ra) + "," + str(obj_dec) + "," + str(cone_radius) + "))=1\
AND allwise.tmass_key IS NOT NULL \
AND allwise.tmass_key = tmass.tmass_oid;"

print(cmd)


job = Gaia.launch_job_async(cmd, dump_to_file=True)
print(job)


SELECT * FROM gaiadr2.gaia_source AS g, gaiadr2.allwise_best_neighbour AS wbest, gaiadr1.allwise_original_valid AS allwise, gaiadr1.tmass_original_valid AS tmass WHERE g.source_id = wbest.source_id AND wbest.allwise_oid = allwise.allwise_oid AND CONTAINS(POINT('ICRS',g.ra,g.dec),CIRCLE('ICRS',1.55789,-6.39315,1))=1AND allwise.tmass_key IS NOT NULL AND allwise.tmass_key = tmass.tmass_oid;
Jobid: 1542987462189O
Phase: COMPLETED
Owner: None
Output file: async_20181123123742.vot
Results: None

In [69]:
r = job.get_results()
print(len(r['source_id']))


2990

In [74]:
r['ra', 'dec', 'phot_g_mean_mag', 'j_m', 'w1mpro', 'tmass_key', 'tmass_oid']


Out[74]:
Table masked=True length=2990
radecphot_g_mean_magj_mw1mprotmass_keytmass_oid
degdegmagmagmag
float64float64float32float32float64int64int64
2.390732751830467-6.54605259299479819.0659814.9615.766125705984125705984
2.4119940994386777-6.75483186611919.48681316.3815.369125706276125706276
2.5428858650859794-6.542754299597989518.80151716.47215.798125722213125722213
2.4657746243804772-6.77002608966310817.34505713.8615.51125721895125721895
2.3905290941368316-6.7743107353979418.66801611.32615.331125706301125706301
2.3086280130046433-6.56126974637592816.34658616.6314.854125704003125704003
2.5481475582031994-6.54761332345948115.35623316.30413.355125722207125722207
2.471145860590163-6.55387316136739318.07317416.21415.543125722199125722199
2.3531584034045467-6.49669407031052818.86286415.75615.123125705917125705917
.....................
1.0653507106747422-6.1464224421085213.62128612.70411.423197406677197406677
0.8479615508290679-6.25223756946433418.42384111.99414.728197387460197387460
1.0724744505610586-6.21156784984986519.51312614.55314.33197406582197406582
0.6018944211121126-6.43651703965303916.74536915.75912.965197366938197366938
0.6671745919113613-6.35939428744452513.98038214.7512.439197369756197369756
0.6606070450355732-6.411185283466017517.08797615.87814.108197369818197369818
1.0389233265011801-5.966480023368867520.44596314.32116.241197406970197406970
0.8307729230385131-5.95065243688518718.60759715.11115.513197387954197387954
1.0232729605527533-6.08971305088290518.9120316.17413.489197406768197406768
0.9370722898659803-5.99765935549356519.08854315.37915.864197388330197388330

In [77]:
r.colnames


Out[77]:
['solution_id',
 'designation',
 'source_id',
 'random_index',
 'ref_epoch',
 'ra',
 'ra_error',
 'dec',
 'dec_error',
 'parallax',
 'parallax_error',
 'parallax_over_error',
 'pmra',
 'pmra_error',
 'pmdec',
 'pmdec_error',
 'ra_dec_corr',
 'ra_parallax_corr',
 'ra_pmra_corr',
 'ra_pmdec_corr',
 'dec_parallax_corr',
 'dec_pmra_corr',
 'dec_pmdec_corr',
 'parallax_pmra_corr',
 'parallax_pmdec_corr',
 'pmra_pmdec_corr',
 'astrometric_n_obs_al',
 'astrometric_n_obs_ac',
 'astrometric_n_good_obs_al',
 'astrometric_n_bad_obs_al',
 'astrometric_gof_al',
 'astrometric_chi2_al',
 'astrometric_excess_noise',
 'astrometric_excess_noise_sig',
 'astrometric_params_solved',
 'astrometric_primary_flag',
 'astrometric_weight_al',
 'astrometric_pseudo_colour',
 'astrometric_pseudo_colour_error',
 'mean_varpi_factor_al',
 'astrometric_matched_observations',
 'visibility_periods_used',
 'astrometric_sigma5d_max',
 'frame_rotator_object_type',
 'matched_observations',
 'duplicated_source',
 'phot_g_n_obs',
 'phot_g_mean_flux',
 'phot_g_mean_flux_error',
 'phot_g_mean_flux_over_error',
 'phot_g_mean_mag',
 'phot_bp_n_obs',
 'phot_bp_mean_flux',
 'phot_bp_mean_flux_error',
 'phot_bp_mean_flux_over_error',
 'phot_bp_mean_mag',
 'phot_rp_n_obs',
 'phot_rp_mean_flux',
 'phot_rp_mean_flux_error',
 'phot_rp_mean_flux_over_error',
 'phot_rp_mean_mag',
 'phot_bp_rp_excess_factor',
 'phot_proc_mode',
 'bp_rp',
 'bp_g',
 'g_rp',
 'radial_velocity',
 'radial_velocity_error',
 'rv_nb_transits',
 'rv_template_teff',
 'rv_template_logg',
 'rv_template_fe_h',
 'phot_variable_flag',
 'l',
 'b',
 'ecl_lon',
 'ecl_lat',
 'priam_flags',
 'teff_val',
 'teff_percentile_lower',
 'teff_percentile_upper',
 'a_g_val',
 'a_g_percentile_lower',
 'a_g_percentile_upper',
 'e_bp_min_rp_val',
 'e_bp_min_rp_percentile_lower',
 'e_bp_min_rp_percentile_upper',
 'flame_flags',
 'radius_val',
 'radius_percentile_lower',
 'radius_percentile_upper',
 'lum_val',
 'lum_percentile_lower',
 'lum_percentile_upper',
 'datalink_url',
 'epoch_photometry_url',
 'source_id_2',
 'original_ext_source_id',
 'angular_distance',
 'gaia_astrometric_params',
 'allwise_oid',
 'number_of_neighbours',
 'number_of_mates',
 'best_neighbour_multiplicity',
 'allwise_oid_2',
 'designation_2',
 'ra_2',
 'dec_2',
 'ra_error_2',
 'dec_error_2',
 'radec_co_error',
 'w1mpro',
 'w1mpro_error',
 'w2mpro',
 'w2mpro_error',
 'w3mpro',
 'w3mpro_error',
 'w4mpro',
 'w4mpro_error',
 'cc_flags',
 'ext_flag',
 'var_flag',
 'ph_qual',
 'w1mjd_mean',
 'w2mjd_mean',
 'w3mjd_mean',
 'w4mjd_mean',
 'w1gmag',
 'w1gmag_error',
 'w2gmag',
 'w2gmag_error',
 'w3gmag',
 'w3gmag_error',
 'w4gmag',
 'w4gmag_error',
 'tmass_key',
 'ph_qual_2',
 'tmass_oid',
 'designation_3',
 'ra_3',
 'dec_3',
 'err_maj',
 'err_min',
 'err_ang',
 'j_m',
 'j_msigcom',
 'h_m',
 'h_msigcom',
 'ks_m',
 'ks_msigcom',
 'ext_key',
 'j_date']

In [ ]:
num_2mass = len(data_2mass)
num_wise  = len(data_wise)
num_galex = len(data_galex)

print("Number of object in (2MASS, WISE, GALEX): ", num_2mass, num_wise, num_galex)

Matching coordinates


In [6]:
# use only coordinate columns
ra_2mass = data_2mass['ra']
dec_2mass = data_2mass['dec']
c_2mass = coordinates.SkyCoord(ra=ra_2mass, dec=dec_2mass, unit=(u.deg, u.deg), frame="icrs")

ra_wise  = data_wise['ra']
dec_wise = data_wise['dec']
c_wise = coordinates.SkyCoord(ra=ra_wise, dec=dec_wise, unit=(u.deg, u.deg), frame="icrs")

ra_galex  = data_galex['RAJ2000']
dec_galex = data_galex['DEJ2000']
c_galex = coordinates.SkyCoord(ra=ra_galex, dec=dec_galex, unit=(u.deg, u.deg), frame="icrs")

In [7]:
####
sep_min = 1.0 * u.arcsec # minimum separation in arcsec

In [8]:
# Only 2MASS and WISE matching 
#
idx_2mass, idx_wise, d2d, d3d = c_wise.search_around_sky(c_2mass, sep_min)
# select only one nearest if there are more in the search reagion (minimum seperation parameter)!

print("Only 2MASS and WISE: ", len(idx_2mass))


Only 2MASS and WISE:  4292

Plot $W_1-J$ vs $W_1$


In [9]:
# from matching of 2 cats (2MASS and WISE) coordinate
data_2mass_matchwith_wise = data_2mass[idx_2mass]
data_wise_matchwith_2mass = data_wise[idx_wise] # WISE dataset

w1 = data_wise_matchwith_2mass['w1mpro']
j = data_2mass_matchwith_wise['j_m']
w1j = w1-j

cutw1j = -1.7 # https://academic.oup.com/mnras/article/448/2/1305/1055284

# WISE galaxy data -> from cut
galaxy = data_wise_matchwith_2mass[w1j < cutw1j] 
print("Number of galaxy from cut W1-J:", len(galaxy))


Number of galaxy from cut W1-J: 455

In [10]:
w1j_galaxy = w1j[w1j<cutw1j]
w1_galaxy = w1[w1j<cutw1j]

plt.scatter(w1j, w1, marker='o', color='blue')
plt.scatter(w1j_galaxy, w1_galaxy, marker='.', color="red")
          
plt.axvline(x=cutw1j) # https://academic.oup.com/mnras/article/448/2/1305/1055284


Out[10]:
<matplotlib.lines.Line2D at 0x7f46cf1425f8>
  • W1-J < -1.7 => galaxy
  • W1-J > -1.7 => stars

Filter all Cats


In [11]:
# GALEX
###
# coord of object in 2mass which match wise (first objet/nearest in sep_min region)
c_2mass_matchwith_wise = c_2mass[idx_2mass]
c_wise_matchwith_2mass = c_wise[idx_wise]

#Check with 2mass cut
idx_2mass_wise_galex, idx_galex1, d2d, d3d = c_galex.search_around_sky(c_2mass_matchwith_wise, sep_min)
num_galex1 = len(idx_galex1)
#Check with wise cut
idx_wise_2mass_galex, idx_galex2, d2d, d3d = c_galex.search_around_sky(c_wise_matchwith_2mass, sep_min)
num_galex2 = len(idx_galex2)

print("Number of GALEX match in 2MASS cut (with WISE): ", num_galex1)
print("Number of GALEX match in WISE cut (with 2MASS): ", num_galex2)

# diff/average
print("Confusion level: ", abs(num_galex1 - num_galex2)/np.mean([num_galex1, num_galex2])*100, "%")


Number of GALEX match in 2MASS cut (with WISE):  805
Number of GALEX match in WISE cut (with 2MASS):  814
Confusion level:  1.1117974058060531 %

In [12]:
# Choose which one is smaller!
if num_galex1 < num_galex2:
    select_from_galex = idx_galex1
    
    match_galex = data_galex[select_from_galex]
    c_selected_galex = c_galex[select_from_galex]
    
    # 2MASS from GALEX_selected
    _idx_galex1, _idx_2mass, d2d, d3d = c_2mass.search_around_sky(c_selected_galex, sep_min)
    match_2mass = data_2mass[_idx_2mass]
    
    # WISE from 2MASS_selected
    _ra_match_2mass = match_2mass['ra']
    _dec_match_2mass = match_2mass['dec']
    _c_match_2mass = coordinates.SkyCoord(ra=_ra_match_2mass, dec=_dec_match_2mass, unit=(u.deg, u.deg), frame="icrs")

    _idx, _idx_wise, d2d, d3d = c_wise.search_around_sky(_c_match_2mass, sep_min)
    match_wise = data_wise[_idx_wise]
    
else:
    select_from_galex = idx_galex2

    match_galex = data_galex[select_from_galex]
    c_selected_galex = c_galex[select_from_galex]
    
    # WISE from GALEX_selected
    _idx_galex1, _idx_wise, d2d, d3d = c_wise.search_around_sky(c_selected_galex, sep_min)
    match_wise = data_wise[_idx_wise]
    
    # 2MASS from WISE_selected
    _ra_match_wise = match_wise['ra']
    _dec_match_wise = match_wise['dec']
    _c_match_wise = coordinates.SkyCoord(ra=_ra_match_wise, dec=_dec_match_wise, unit=(u.deg, u.deg), frame="icrs")

    _idx, _idx_2mass, d2d, d3d = c_2mass.search_around_sky(_c_match_wise, sep_min)
    match_2mass = data_2mass[_idx_2mass]

print("Number of match in GALEX: ", len(match_galex))
print("Number of match in 2MASS: ", len(match_2mass))
print("Number of match in WISE : ", len(match_wise))


Number of match in GALEX:  805
Number of match in 2MASS:  805
Number of match in WISE :  805

Collect relevant data


In [13]:
joindata = np.array([match_2mass['j_m'], 
                  match_2mass['j_m']-match_2mass['h_m'], 
                  match_2mass['j_m']-match_2mass['k_m'],
                  match_2mass['j_m']-match_wise['w1mpro'], 
                  match_2mass['j_m']-match_wise['w2mpro'],
                  match_2mass['j_m']-match_wise['w3mpro'],
                  match_2mass['j_m']-match_wise['w4mpro'],
                  match_2mass['j_m']-match_galex['NUVmag']])

joindata = joindata.T

Analysis

we can try:

  • dimensionality reduction
  • clustering
  • classification
  • data embedding

PCA


In [14]:
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale

X = scale(joindata)

pca = PCA(n_components=4)
X_r = pca.fit(X).transform(X)

print(pca.components_)
print(pca.explained_variance_)


[[ 0.33139607  0.32186432  0.38093203  0.38167798  0.38327615  0.40630151
   0.36211013  0.22953746]
 [-0.43879002  0.40462767  0.26939914  0.31632385  0.29689673 -0.20503478
  -0.37587224 -0.4468067 ]
 [-0.3538287  -0.25778796  0.00967304  0.21562806  0.26159256 -0.11903955
  -0.31146701  0.76298677]
 [ 0.00260929  0.80945746 -0.17860143 -0.25460733 -0.27162836 -0.1280133
  -0.08106047  0.38898365]]
[5.22483684 1.72122199 0.59943592 0.20892579]

In [15]:
# plot PCA result
# Plot data using PC1 vs PC2
plt.scatter(X_r[:,0], X_r[:,1], marker='o', color='blue')

# overplot galaxy selected using cut W1-J
for i, name in enumerate(match_wise['designation']):
    for galaxyname in galaxy['designation']:
        if name == galaxyname:
            plt.scatter(X_r[i,0], X_r[i,1], marker=".", color="red")



In [16]:
# plot PCA result
# Plot data using PC1 vs PC2
plt.scatter(X_r[:,0], X_r[:,2], marker='o', color='blue')

# overplot galaxy selected using cut W1-J
for i, name in enumerate(match_wise['designation']):
    for galaxyname in galaxy['designation']:
        if name == galaxyname:
            plt.scatter(X_r[i,0], X_r[i,2], marker=".", color="red")



In [17]:
# plot PCA result
# Plot data using PC1 vs PC2
plt.scatter(X_r[:,0], X_r[:,3], marker='o', color='blue')

# overplot galaxy selected using cut W1-J
for i, name in enumerate(match_wise['designation']):
    for galaxyname in galaxy['designation']:
        if name == galaxyname:
            plt.scatter(X_r[i,0], X_r[i,3], marker=".", color="red")



In [18]:
# plot PCA result
# Plot data using PC1 vs PC2
plt.scatter(X_r[:,1], X_r[:,2], marker='o', color='blue')

# overplot galaxy selected using cut W1-J
for i, name in enumerate(match_wise['designation']):
    for galaxyname in galaxy['designation']:
        if name == galaxyname:
            plt.scatter(X_r[i,1], X_r[i,2], marker=".", color="red")



In [19]:
# plot PCA result
# Plot data using PC1 vs PC2
plt.scatter(X_r[:,1], X_r[:,3], marker='o', color='blue')

# overplot galaxy selected using cut W1-J
for i, name in enumerate(match_wise['designation']):
    for galaxyname in galaxy['designation']:
        if name == galaxyname:
            plt.scatter(X_r[i,1], X_r[i,3], marker=".", color="red")



In [20]:
# plot PCA result
# Plot data using PC1 vs PC2
plt.scatter(X_r[:,2], X_r[:,3], marker='o', color='blue')

# overplot galaxy selected using cut W1-J
for i, name in enumerate(match_wise['designation']):
    for galaxyname in galaxy['designation']:
        if name == galaxyname:
            plt.scatter(X_r[i,2], X_r[i,3], marker=".", color="red")


DBSCAN


In [21]:
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

X = scale(joindata)

db = DBSCAN(eps=1, min_samples=3).fit(X)

core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

print('Estimated number of clusters: %d' % n_clusters_)
#print(labels)


Estimated number of clusters: 5

Plot $W_1 - J$ vs $J$


In [22]:
# Black removed and is used for noise instead.
unique_labels = set(labels)
colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = [0, 0, 0, 1]

    class_member_mask = (labels == k)

    
    ## J vs J-W1
    
    xy = X[class_member_mask & core_samples_mask]
    plt.plot(xy[:, 3], xy[:, 0], 'o', markerfacecolor=tuple(col), markeredgecolor='k', markersize=14)

    xy = X[class_member_mask & ~core_samples_mask]
    plt.plot(xy[:, 3], xy[:, 0], 'o', markerfacecolor=tuple(col), markeredgecolor='k', markersize=8)
    
for i, name in enumerate(match_wise['designation']):
    for galaxyname in galaxy['designation']:
        if name == galaxyname:
            plt.plot(X[i,3], X[i,0], marker="X", markerfacecolor='red', markeredgecolor='none', markersize=8)  

plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()


t-SNE


In [25]:
from sklearn.manifold import TSNE
X = scale(joindata)
X_r = TSNE(n_components=2).fit_transform(X)

In [26]:
plt.scatter(X_r[:,0], X_r[:,1], marker='o', color="blue")
for i, name in enumerate(match_wise['designation']):
    for galaxyname in galaxy['designation']:
        if name == galaxyname:
            plt.scatter(X_r[i,0], X_r[i,1], marker='.', color="red")



In [ ]: