In [1]:
import numpy as np
import numpy.ma as ma
import pandas as pd

from astropy import coordinates
from astropy.coordinates import match_coordinates_sky
import astropy.units as u
import astroquery
from astroquery.irsa import Irsa
from astroquery.vizier import Vizier
from astropy.table import Table, join, Column, hstack

from astropy.units import Quantity
from astroquery.gaia import Gaia

Irsa.ROW_LIMIT = -1
Vizier.ROW_LIMIT = -1

from astropy.io import ascii as asci

import matplotlib.pyplot as plt

# Suppress warnings. Comment this out if you wish to see the warning messages
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline


Created TAP+ (v1.0.1) - Connection:
	Host: gea.esac.esa.int
	Use HTTPS: False
	Port: 80
	SSL Port: 443

Get the data


In [2]:
#obj = ["3C 454.3", 343.49062, 16.14821, 1.0]
#obj = ["PKS J0006-0623", 1.55789, -6.39315, 1]
obj = ["M87", 187.705930, 12.391123, 1.0]
#### name, ra, dec, radius of cone (in deg)

obj_name = obj[0]
obj_ra   = obj[1]
obj_dec  = obj[2]
cone_radius  = obj[3]

In [3]:
obj_coord = coordinates.SkyCoord(ra=obj_ra, dec=obj_dec, unit=(u.deg, u.deg), frame="icrs")

Try GAIA and 2MASS

  • in gaiadr1 and gaiadr2 they also provide 2mass, allwise, sdss "best neigbour" pairs
  • catalogs provided by GAIA:
    • gaiadr1.gaiadr1.allwise_original_valid
    • gaiadr1.gaiadr1.gsc23_original_valid
    • gaiadr1.gaiadr1.ppmxl_original_valid
    • gaiadr1.gaiadr1.sdssdr9_original_valid
    • gaiadr1.gaiadr1.tmass_original_valid
    • gaiadr1.gaiadr1.ucac4_original_valid
    • gaiadr1.gaiadr1.urat1_original_valid

In [4]:
cmd = "SELECT * FROM gaiadr2.gaia_source AS g, \
gaiadr2.tmass_best_neighbour AS tbest, \
gaiadr1.tmass_original_valid AS tmass \
WHERE g.source_id = tbest.source_id AND tbest.tmass_oid = tmass.tmass_oid AND CONTAINS(POINT('ICRS',g.ra,g.dec),\
CIRCLE('ICRS'," + str(obj_ra) + "," + str(obj_dec) + "," + str(cone_radius) + "))=1;"

print(cmd)

job1 = Gaia.launch_job_async(cmd, dump_to_file=True)
print (job1)


SELECT * FROM gaiadr2.gaia_source AS g, gaiadr2.tmass_best_neighbour AS tbest, gaiadr1.tmass_original_valid AS tmass WHERE g.source_id = tbest.source_id AND tbest.tmass_oid = tmass.tmass_oid AND CONTAINS(POINT('ICRS',g.ra,g.dec),CIRCLE('ICRS',187.70593,12.391123,1.0))=1;
Jobid: 1543155001763O
Phase: COMPLETED
Owner: None
Output file: async_20181125111001.vot
Results: None

In [5]:
gaia_2mass = job1.get_results()
print(len(gaia_2mass['source_id']))

# print(gaia_2mass['ra', 'dec', 'ra_2', 'dec_2', 'phot_g_mean_mag', 'j_m'])


3772

Try GAIA and WISE


In [6]:
cmd = "SELECT * FROM gaiadr2.gaia_source AS g, \
gaiadr2.allwise_best_neighbour AS wbest, \
gaiadr1.allwise_original_valid AS allwise \
WHERE g.source_id = wbest.source_id AND wbest.allwise_oid = allwise.allwise_oid AND CONTAINS(POINT('ICRS',g.ra,g.dec),\
CIRCLE('ICRS'," + str(obj_ra) + "," + str(obj_dec) + "," + str(cone_radius) + "))=1;"

print(cmd)

job2 = Gaia.launch_job_async(cmd, dump_to_file=True)
print(job2)


SELECT * FROM gaiadr2.gaia_source AS g, gaiadr2.allwise_best_neighbour AS wbest, gaiadr1.allwise_original_valid AS allwise WHERE g.source_id = wbest.source_id AND wbest.allwise_oid = allwise.allwise_oid AND CONTAINS(POINT('ICRS',g.ra,g.dec),CIRCLE('ICRS',187.70593,12.391123,1.0))=1;
Jobid: 1543155028052O
Phase: COMPLETED
Owner: None
Output file: async_20181125111028.vot
Results: None

In [7]:
gaia_wise = job2.get_results()
print(len(gaia_wise['source_id']))

# print(gaia_wise['ra', 'dec', 'ra_2', 'dec_2', 'phot_g_mean_mag'])


6001

Combine


In [8]:
sep_min = 1.0 * u.arcsec # minimum separation in arcsec

In [9]:
############
# Using GAIA coord
# ref_epoch: J2015.5

ra_2mass = gaia_2mass['ra']
dec_2mass = gaia_2mass['dec']
c_2mass = coordinates.SkyCoord(ra=ra_2mass, dec=dec_2mass, unit=(u.deg, u.deg), frame="icrs")

ra_wise  = gaia_wise['ra']
dec_wise = gaia_wise['dec']
c_wise = coordinates.SkyCoord(ra=ra_wise, dec=dec_wise, unit=(u.deg, u.deg), frame="icrs")

In [10]:
idx_2mass1, idx_wise1, d2d, d3d = c_wise.search_around_sky(c_2mass, sep_min)
# select only one nearest if there are more in the search reagion (minimum seperation parameter)!

print("GAIA + 2MASS + WISE (using gaia coord): ", len(idx_2mass1))


GAIA + 2MASS + WISE (using gaia coord):  3420

In [11]:
############
# Using 2MASS and WISE coord
# ra_2 and dec_2 here are RA and DEC of 2MASS and WISE accordingly

ra_2mass = gaia_2mass['ra_2']
dec_2mass = gaia_2mass['dec_2']
c_2mass = coordinates.SkyCoord(ra=ra_2mass, dec=dec_2mass, unit=(u.deg, u.deg), frame="icrs")

ra_wise  = gaia_wise['ra_2']
dec_wise = gaia_wise['dec_2']
c_wise = coordinates.SkyCoord(ra=ra_wise, dec=dec_wise, unit=(u.deg, u.deg), frame="icrs")

In [12]:
idx_2mass2, idx_wise2, d2d, d3d = c_wise.search_around_sky(c_2mass, sep_min)
# select only one nearest if there are more in the search reagion (minimum seperation parameter)!

print("GAIA + 2MASS + WISE (using 2MASS-WISE coord): ", len(idx_2mass2))


GAIA + 2MASS + WISE (using 2MASS-WISE coord):  3328

In [13]:
print("Confusion level :", abs(len(idx_2mass1) - len(idx_2mass2))/(0.5*(len(idx_2mass1) + len(idx_2mass2))) * 100,'%')


Confusion level : 2.7267338470657974 %

In [14]:
# Combine dataset
# GAIA-WISE
# GAIA-2MASS

# result of match with each other
gaia_2mass___wise = gaia_2mass[idx_2mass2] 
gaia_wise___2mass = gaia_wise[idx_wise2]

print("gaia_2mass___wise: ", len(gaia_2mass___wise))
print("gaia_wise___2mass: ", len(gaia_wise___2mass))


#Check
if np.all(gaia_2mass___wise['solution_id'] == gaia_wise___2mass['solution_id']):
    # remove duplicating data 
    # remove duplicating IDs in each dataset
    gaia_wise___2mass.remove_columns(['source_id_2', 'designation_2', 'allwise_oid_2'])
    gaia_2mass___wise.remove_columns(['source_id_2', 'designation_2', 'tmass_oid_2'])
    
    # remove gaia data from gaia_wise___2mass before combining
    gaia_wise___2mass.remove_columns(['solution_id', 'designation', 'source_id', 'random_index', 'ref_epoch', 'ra', 'ra_error', 'dec', 'dec_error', 'parallax', 'parallax_error', 'parallax_over_error', 'pmra', 'pmra_error', 'pmdec', 'pmdec_error', 'ra_dec_corr', 'ra_parallax_corr', 'ra_pmra_corr', 'ra_pmdec_corr', 'dec_parallax_corr', 'dec_pmra_corr', 'dec_pmdec_corr', 'parallax_pmra_corr', 'parallax_pmdec_corr', 'pmra_pmdec_corr', 'astrometric_n_obs_al', 'astrometric_n_obs_ac', 'astrometric_n_good_obs_al', 'astrometric_n_bad_obs_al', 'astrometric_gof_al', 'astrometric_chi2_al', 'astrometric_excess_noise', 'astrometric_excess_noise_sig', 'astrometric_params_solved', 'astrometric_primary_flag', 'astrometric_weight_al', 'astrometric_pseudo_colour', 'astrometric_pseudo_colour_error', 'mean_varpi_factor_al', 'astrometric_matched_observations', 'visibility_periods_used', 'astrometric_sigma5d_max', 'frame_rotator_object_type', 'matched_observations', 'duplicated_source', 'phot_g_n_obs', 'phot_g_mean_flux', 'phot_g_mean_flux_error', 'phot_g_mean_flux_over_error', 'phot_g_mean_mag', 'phot_bp_n_obs', 'phot_bp_mean_flux', 'phot_bp_mean_flux_error', 'phot_bp_mean_flux_over_error', 'phot_bp_mean_mag', 'phot_rp_n_obs', 'phot_rp_mean_flux', 'phot_rp_mean_flux_error', 'phot_rp_mean_flux_over_error', 'phot_rp_mean_mag', 'phot_bp_rp_excess_factor', 'phot_proc_mode', 'bp_rp', 'bp_g', 'g_rp', 'radial_velocity', 'radial_velocity_error', 'rv_nb_transits', 'rv_template_teff', 'rv_template_logg', 'rv_template_fe_h', 'phot_variable_flag', 'l', 'b', 'ecl_lon', 'ecl_lat', 'priam_flags', 'teff_val', 'teff_percentile_lower', 'teff_percentile_upper', 'a_g_val', 'a_g_percentile_lower', 'a_g_percentile_upper', 'e_bp_min_rp_val', 'e_bp_min_rp_percentile_lower', 'e_bp_min_rp_percentile_upper', 'flame_flags', 'radius_val', 'radius_percentile_lower', 'radius_percentile_upper', 'lum_val', 'lum_percentile_lower', 'lum_percentile_upper', 'datalink_url', 'epoch_photometry_url'])

    # select only important GAIA data from gaia_2mass___wise
#     ___gaia_2mass___wise = gaia_2mass___wise['solution_id', 'designation', 'source_id', 'random_index', 
#                                              'ref_epoch', 'ra', 'ra_error', 'dec', 'dec_error', 
#                                              'parallax', 'parallax_error', 'parallax_over_error', 
#                                              'pmra', 'pmra_error', 'pmdec', 'pmdec_error',
#                                              'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag']
    
    # merge (hstack)
    gaia_2mass_wise = hstack([gaia_2mass___wise, gaia_wise___2mass], 
                           table_names=['2mass', 'wise'],
                           uniq_col_name='{table_name}_{col_name}')
    
    num_gaia_2mass_wise = len(gaia_2mass_wise)
    
    print("Merge...")
    print(num_gaia_2mass_wise)
    
else:
    print("Big Error: not match")


gaia_2mass___wise:  3328
gaia_wise___2mass:  3328
Merge...
3328

In [15]:
print(gaia_2mass_wise.colnames)


['solution_id', 'designation', 'source_id', 'random_index', 'ref_epoch', 'ra', 'ra_error', 'dec', 'dec_error', 'parallax', 'parallax_error', 'parallax_over_error', 'pmra', 'pmra_error', 'pmdec', 'pmdec_error', 'ra_dec_corr', 'ra_parallax_corr', 'ra_pmra_corr', 'ra_pmdec_corr', 'dec_parallax_corr', 'dec_pmra_corr', 'dec_pmdec_corr', 'parallax_pmra_corr', 'parallax_pmdec_corr', 'pmra_pmdec_corr', 'astrometric_n_obs_al', 'astrometric_n_obs_ac', 'astrometric_n_good_obs_al', 'astrometric_n_bad_obs_al', 'astrometric_gof_al', 'astrometric_chi2_al', 'astrometric_excess_noise', 'astrometric_excess_noise_sig', 'astrometric_params_solved', 'astrometric_primary_flag', 'astrometric_weight_al', 'astrometric_pseudo_colour', 'astrometric_pseudo_colour_error', 'mean_varpi_factor_al', 'astrometric_matched_observations', 'visibility_periods_used', 'astrometric_sigma5d_max', 'frame_rotator_object_type', 'matched_observations', 'duplicated_source', 'phot_g_n_obs', 'phot_g_mean_flux', 'phot_g_mean_flux_error', 'phot_g_mean_flux_over_error', 'phot_g_mean_mag', 'phot_bp_n_obs', 'phot_bp_mean_flux', 'phot_bp_mean_flux_error', 'phot_bp_mean_flux_over_error', 'phot_bp_mean_mag', 'phot_rp_n_obs', 'phot_rp_mean_flux', 'phot_rp_mean_flux_error', 'phot_rp_mean_flux_over_error', 'phot_rp_mean_mag', 'phot_bp_rp_excess_factor', 'phot_proc_mode', 'bp_rp', 'bp_g', 'g_rp', 'radial_velocity', 'radial_velocity_error', 'rv_nb_transits', 'rv_template_teff', 'rv_template_logg', 'rv_template_fe_h', 'phot_variable_flag', 'l', 'b', 'ecl_lon', 'ecl_lat', 'priam_flags', 'teff_val', 'teff_percentile_lower', 'teff_percentile_upper', 'a_g_val', 'a_g_percentile_lower', 'a_g_percentile_upper', 'e_bp_min_rp_val', 'e_bp_min_rp_percentile_lower', 'e_bp_min_rp_percentile_upper', 'flame_flags', 'radius_val', 'radius_percentile_lower', 'radius_percentile_upper', 'lum_val', 'lum_percentile_lower', 'lum_percentile_upper', 'datalink_url', 'epoch_photometry_url', '2mass_original_ext_source_id', '2mass_angular_distance', '2mass_gaia_astrometric_params', 'tmass_oid', '2mass_number_of_neighbours', '2mass_number_of_mates', '2mass_best_neighbour_multiplicity', '2mass_ph_qual', '2mass_ra_2', '2mass_dec_2', 'err_maj', 'err_min', 'err_ang', 'j_m', 'j_msigcom', 'h_m', 'h_msigcom', 'ks_m', 'ks_msigcom', 'ext_key', 'j_date', 'wise_original_ext_source_id', 'wise_angular_distance', 'wise_gaia_astrometric_params', 'allwise_oid', 'wise_number_of_neighbours', 'wise_number_of_mates', 'wise_best_neighbour_multiplicity', 'wise_ra_2', 'wise_dec_2', 'ra_error_2', 'dec_error_2', 'radec_co_error', 'w1mpro', 'w1mpro_error', 'w2mpro', 'w2mpro_error', 'w3mpro', 'w3mpro_error', 'w4mpro', 'w4mpro_error', 'cc_flags', 'ext_flag', 'var_flag', 'wise_ph_qual', 'w1mjd_mean', 'w2mjd_mean', 'w3mjd_mean', 'w4mjd_mean', 'w1gmag', 'w1gmag_error', 'w2gmag', 'w2gmag_error', 'w3gmag', 'w3gmag_error', 'w4gmag', 'w4gmag_error', 'tmass_key']

Make tracer of galaxy and star


In [16]:
# Make a tracer of galaxies using W1-J cut
cutw1j = -1.7
galaxy_tracer = Column((gaia_2mass_wise['w1mpro'] - gaia_2mass_wise['j_m']) < cutw1j, name='galaxy_tracer')
gaia_2mass_wise.add_column(galaxy_tracer)

print("Number of galaxy tracer: ", len(np.where(gaia_2mass_wise['galaxy_tracer']==True)[0]))


Number of galaxy tracer:  147

In [17]:
# Make a tracer of stars using large proper motion
cutpm = 10
star_tracer = Column(np.sqrt(gaia_2mass_wise['pmra']**2 + gaia_2mass_wise['pmdec']**2) > cutpm, name='star_tracer')
gaia_2mass_wise.add_column(star_tracer)

print("Number of star tracer: ", len(np.where(gaia_2mass_wise['star_tracer']==True)[0]))


Number of star tracer:  2109

Plot W1-J vs J


In [18]:
# all
plt.scatter(gaia_2mass_wise['w1mpro'] - gaia_2mass_wise['j_m'], gaia_2mass_wise['j_m'], 
            marker='.', color='gray', alpha=0.9)

# galaxy tracer
g_tracer = gaia_2mass_wise[gaia_2mass_wise['galaxy_tracer']]
plt.scatter(g_tracer['w1mpro'] - g_tracer['j_m'], g_tracer['j_m'], 
            marker='s', color='red', alpha=0.1)

# star tracer
s_tracer = gaia_2mass_wise[gaia_2mass_wise['star_tracer']]
plt.scatter(s_tracer['w1mpro'] - s_tracer['j_m'], s_tracer['j_m'], 
            marker='o', color='blue', alpha=0.1)

plt.gca().invert_yaxis()



In [19]:
# list of parameters
params = ['j_m', 'h_m', 'ks_m', 
          'w1mpro', 'w2mpro', 'w3mpro', 'w4mpro', 
          'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag', 'bp_g', 'g_rp']

Check Masked Value, replace with mean value of parameters


In [20]:
## check Masked value
# unmasked value = 0 
# NaN, INF

ma_p = np.zeros(len(params))
print("Number of masked value (NULL): ")
print(gaia_2mass_wise['w4mpro'][gaia_2mass_wise['w4mpro'] == ma.masked].data)
for i,p in enumerate(params):
    mean_p = np.mean(gaia_2mass_wise[p])
    
    idx_masked = np.where(gaia_2mass_wise[p] == ma.masked)[0]
    
    gaia_2mass_wise.add_column((gaia_2mass_wise[p] == ma.masked).data, name=p+'_mask')
    
    ma_p[i] = len(idx_masked)
    for idx in idx_masked:
        gaia_2mass_wise[p] = mean_p

    print(p, '  ', ma_p[i], '  ', ma_p[i]/num_gaia_2mass_wise*100, '%')


Number of masked value (NULL): 
[]
j_m    0.0    0.0 %
h_m    0.0    0.0 %
ks_m    0.0    0.0 %
w1mpro    0.0    0.0 %
w2mpro    0.0    0.0 %
w3mpro    0.0    0.0 %
w4mpro    0.0    0.0 %
phot_g_mean_mag    0.0    0.0 %
phot_bp_mean_mag    97.0    2.9146634615384617 %
phot_rp_mean_mag    97.0    2.9146634615384617 %
bp_g    97.0    2.9146634615384617 %
g_rp    97.0    2.9146634615384617 %

Select variables


In [21]:
# only color
variables = np.array([gaia_2mass_wise['j_m'] - gaia_2mass_wise['h_m'], 
                      gaia_2mass_wise['j_m'] - gaia_2mass_wise['ks_m'], 
                      gaia_2mass_wise['h_m'] - gaia_2mass_wise['ks_m'],
                      gaia_2mass_wise['w1mpro'] - gaia_2mass_wise['w2mpro'],
                      gaia_2mass_wise['w1mpro'] - gaia_2mass_wise['w3mpro'],
                      gaia_2mass_wise['w1mpro'] - gaia_2mass_wise['w4mpro'],
                      gaia_2mass_wise['w2mpro'] - gaia_2mass_wise['w3mpro'],
                      gaia_2mass_wise['w2mpro'] - gaia_2mass_wise['w4mpro'],
                      gaia_2mass_wise['w3mpro'] - gaia_2mass_wise['w4mpro'],
                      gaia_2mass_wise['bp_g'],
                      gaia_2mass_wise['g_rp'],
                      gaia_2mass_wise['j_m'] - gaia_2mass_wise['w1mpro'], 
                      gaia_2mass_wise['j_m'] - gaia_2mass_wise['w2mpro'], 
                      gaia_2mass_wise['j_m'] - gaia_2mass_wise['w3mpro'], 
                      gaia_2mass_wise['j_m'] - gaia_2mass_wise['w4mpro'], 
                      gaia_2mass_wise['h_m'] - gaia_2mass_wise['w1mpro'], 
                      gaia_2mass_wise['h_m'] - gaia_2mass_wise['w2mpro'], 
                      gaia_2mass_wise['h_m'] - gaia_2mass_wise['w3mpro'], 
                      gaia_2mass_wise['h_m'] - gaia_2mass_wise['w4mpro'],
                      gaia_2mass_wise['ks_m'] - gaia_2mass_wise['w1mpro'], 
                      gaia_2mass_wise['ks_m'] - gaia_2mass_wise['w2mpro'], 
                      gaia_2mass_wise['ks_m'] - gaia_2mass_wise['w3mpro'], 
                      gaia_2mass_wise['ks_m'] - gaia_2mass_wise['w4mpro']
                     ])

# variables = np.array([gaia_2mass_wise['j_m'] - gaia_2mass_wise['w1mpro'], 
#                       gaia_2mass_wise['j_m'] - gaia_2mass_wise['w2mpro'], 
#                       gaia_2mass_wise['j_m'] - gaia_2mass_wise['w3mpro'], 
#                       gaia_2mass_wise['j_m'] - gaia_2mass_wise['w4mpro'], 
#                       gaia_2mass_wise['h_m'] - gaia_2mass_wise['w1mpro'], 
#                       gaia_2mass_wise['h_m'] - gaia_2mass_wise['w2mpro'], 
#                       gaia_2mass_wise['h_m'] - gaia_2mass_wise['w3mpro'], 
#                       gaia_2mass_wise['h_m'] - gaia_2mass_wise['w4mpro'],
#                       gaia_2mass_wise['ks_m'] - gaia_2mass_wise['w1mpro'], 
#                       gaia_2mass_wise['ks_m'] - gaia_2mass_wise['w2mpro'], 
#                       gaia_2mass_wise['ks_m'] - gaia_2mass_wise['w3mpro'], 
#                       gaia_2mass_wise['ks_m'] - gaia_2mass_wise['w4mpro'],
#                       gaia_2mass_wise['j_m'] - gaia_2mass_wise['phot_g_mean_mag'], 
#                       gaia_2mass_wise['j_m'] - gaia_2mass_wise['phot_bp_mean_mag'], 
#                       gaia_2mass_wise['j_m'] - gaia_2mass_wise['phot_rp_mean_mag'], 
#                       gaia_2mass_wise['h_m'] - gaia_2mass_wise['phot_g_mean_mag'], 
#                       gaia_2mass_wise['h_m'] - gaia_2mass_wise['phot_bp_mean_mag'], 
#                       gaia_2mass_wise['h_m'] - gaia_2mass_wise['phot_rp_mean_mag'],
#                       gaia_2mass_wise['ks_m'] - gaia_2mass_wise['phot_g_mean_mag'], 
#                       gaia_2mass_wise['ks_m'] - gaia_2mass_wise['phot_bp_mean_mag'], 
#                       gaia_2mass_wise['ks_m'] - gaia_2mass_wise['phot_rp_mean_mag'],
#                       gaia_2mass_wise['w1mpro'] - gaia_2mass_wise['phot_g_mean_mag'],
#                       gaia_2mass_wise['w1mpro'] - gaia_2mass_wise['phot_bp_mean_mag'],
#                       gaia_2mass_wise['w1mpro'] - gaia_2mass_wise['phot_rp_mean_mag'],
#                       gaia_2mass_wise['w2mpro'] - gaia_2mass_wise['phot_g_mean_mag'],
#                       gaia_2mass_wise['w2mpro'] - gaia_2mass_wise['phot_bp_mean_mag'],
#                       gaia_2mass_wise['w2mpro'] - gaia_2mass_wise['phot_rp_mean_mag'],
#                       gaia_2mass_wise['w3mpro'] - gaia_2mass_wise['phot_g_mean_mag'],
#                       gaia_2mass_wise['w3mpro'] - gaia_2mass_wise['phot_bp_mean_mag'],
#                       gaia_2mass_wise['w3mpro'] - gaia_2mass_wise['phot_rp_mean_mag'],
#                       gaia_2mass_wise['w4mpro'] - gaia_2mass_wise['phot_g_mean_mag'],
#                       gaia_2mass_wise['w4mpro'] - gaia_2mass_wise['phot_bp_mean_mag'],
#                       gaia_2mass_wise['w4mpro'] - gaia_2mass_wise['phot_rp_mean_mag']
#                      ])


# variables = np.array([gaia_2mass_wise['j_m'] - gaia_2mass_wise['w1mpro'], 
#                       gaia_2mass_wise['j_m'] - gaia_2mass_wise['w2mpro'], 
#                       gaia_2mass_wise['j_m'] - gaia_2mass_wise['w3mpro'],
#                       gaia_2mass_wise['h_m'] - gaia_2mass_wise['w1mpro'], 
#                       gaia_2mass_wise['h_m'] - gaia_2mass_wise['w2mpro'], 
#                       gaia_2mass_wise['h_m'] - gaia_2mass_wise['w3mpro'],
#                       gaia_2mass_wise['ks_m'] - gaia_2mass_wise['w1mpro'], 
#                       gaia_2mass_wise['ks_m'] - gaia_2mass_wise['w2mpro'], 
#                       gaia_2mass_wise['ks_m'] - gaia_2mass_wise['w3mpro'],
#                       gaia_2mass_wise['j_m'] - gaia_2mass_wise['phot_g_mean_mag']
#                      ])

# variables = np.array([gaia_2mass_wise['j_m'] - gaia_2mass_wise['w1mpro'], 
#                       gaia_2mass_wise['j_m'] - gaia_2mass_wise['phot_g_mean_mag'],
#                       gaia_2mass_wise['w1mpro'] - gaia_2mass_wise['phot_g_mean_mag']
#                      ])


variables = variables.T

Analysis

PCA


In [22]:
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
# from sklearn.impute import SimpleImputer
# imp = SimpleImputer(missing_values=np.nan, strategy='mean')


# X = imp.transform(variables)
X = scale(variables)

n_pcacomp = 4
pca = PCA(n_components=n_pcacomp)
X_pca = pca.fit(X).transform(X)

print(pca.components_)
print(pca.explained_variance_)


[[-1.02082077e-01 -1.41925725e-01 -9.12092828e-02 -8.40596392e-02
  -2.89966541e-01 -2.90501449e-01 -2.79062490e-01 -2.81560413e-01
  -1.50066616e-01 -4.55775451e-51 -2.36759157e-50 -1.29100316e-01
  -1.31963367e-01 -3.05427826e-01 -3.08349409e-01 -9.18252490e-02
  -1.03874893e-01 -3.02492038e-01 -3.05038793e-01 -2.14151698e-02
  -5.66018146e-02 -2.95536113e-01 -2.97799752e-01]
 [ 1.29166822e-01  2.05607751e-01  1.47674867e-01  2.60818421e-01
  -1.17220016e-01 -1.44232222e-01 -1.56302188e-01 -1.75206115e-01
  -1.27724590e-01  3.27812287e-50  2.82859360e-49  3.56628800e-01
   3.74396532e-01  3.62780353e-03 -4.53779521e-02  3.56064534e-01
   3.76293501e-01 -1.89419583e-02 -6.55531311e-02  2.78650146e-01
   3.37010177e-01 -5.24716718e-02 -9.37213511e-02]
 [-4.70370595e-02 -4.90044819e-01 -5.68487225e-01  5.31938860e-02
   1.64524566e-02  2.69663054e-02  8.68998662e-03  2.08234569e-02
   3.47506696e-02  1.88785127e-46  9.70534914e-46 -6.30698213e-02
  -3.19880937e-02 -4.55545451e-03  9.40756829e-03 -4.67583253e-02
  -1.38441950e-02  3.50419724e-03  1.64932798e-02  4.84804693e-01
   3.94220427e-01  1.29541231e-01  1.17994912e-01]
 [-7.74934972e-01 -2.47577629e-01  3.36379203e-01  9.57121372e-02
   3.43613829e-02  1.42272957e-02  2.04188779e-02  3.06842695e-03
  -3.27631511e-02  1.49890867e-44  7.71762692e-44 -1.97584723e-01
  -1.24654573e-01 -2.99900535e-02 -3.71036146e-02  2.74039288e-01
   2.46887221e-01  1.05011900e-01  7.25827472e-02  2.96962265e-03
   4.81619436e-02  3.51221025e-02  1.49441784e-02]]
[10.19502464  5.66501508  1.97307518  1.32077966]

In [23]:
# change X_pca to Table
pca_names = ['PC'+str(i) for i in range(n_pcacomp)]
table_pca = Table(X_pca, names=pca_names)

# add X_pca table to DATA table
gaia_2mass_wise.add_columns(table_pca.columns.values())

In [24]:
# PC_plot 
def plot_pc(pc_x='PC0', pc_y='PC1'):
    plt.scatter(gaia_2mass_wise[pc_x], gaia_2mass_wise[pc_y], marker='.', color='gray', alpha=0.9)

    ## Overplot from tracer
    g_tracer = gaia_2mass_wise[gaia_2mass_wise['galaxy_tracer']]
    plt.scatter(g_tracer[pc_x], g_tracer[pc_y], marker='s', color='red', alpha=0.1)

    s_tracer = gaia_2mass_wise[gaia_2mass_wise['star_tracer']]
    plt.scatter(s_tracer[pc_x], s_tracer[pc_y], marker='s', color='blue', alpha=0.1)
    
    plt.xlabel(pc_x)
    plt.ylabel(pc_y)

In [25]:
plot_pc('PC0', 'PC1')



In [26]:
plot_pc('PC0', 'PC2')



In [27]:
plot_pc('PC0', 'PC3')



In [28]:
plot_pc('PC1', 'PC2')



In [29]:
plot_pc('PC1', 'PC3')



In [30]:
plot_pc('PC2', 'PC3')


t-SNE


In [31]:
from sklearn.manifold import TSNE
X = scale(variables)
n_tsnecomp = 2
X_tsne = TSNE(n_components=n_tsnecomp).fit_transform(X)

In [32]:
# change X_tsne to table
tsne_names = ['tSNE'+str(i) for i in range(n_tsnecomp)]
table_tsne = Table(X_tsne, names=tsne_names)

# add X_pca table to DATA table
gaia_2mass_wise.add_columns(table_tsne.columns.values())

In [33]:
def plot_tsne(pc_x='tSNE0', pc_y='tSNE1'):
    plt.scatter(gaia_2mass_wise[pc_x], gaia_2mass_wise[pc_y], marker='.', color='gray', alpha=0.9)

    ## Overplot from tracer
    g_tracer = gaia_2mass_wise[gaia_2mass_wise['galaxy_tracer']]
    plt.scatter(g_tracer[pc_x], g_tracer[pc_y], marker='.', color='red', alpha=0.1)

    s_tracer = gaia_2mass_wise[gaia_2mass_wise['star_tracer']]
    plt.scatter(s_tracer[pc_x], s_tracer[pc_y], marker='.', color='blue', alpha=0.1)
    
    m_tracer = gaia_2mass_wise[gaia_2mass_wise['phot_bp_mean_mag_mask']]
    plt.scatter(m_tracer[pc_x], m_tracer[pc_y], marker='.', color='green', alpha=0.1)
    
    plt.xlabel(pc_x)
    plt.ylabel(pc_y)

In [34]:
plot_tsne()


DBSCAN for tSNE result


In [35]:
from sklearn.cluster import DBSCAN
from scipy.spatial import Voronoi, voronoi_plot_2d
from scipy.stats import sigmaclip

In [36]:
X_dbscan = np.array([gaia_2mass_wise['tSNE0'], gaia_2mass_wise['tSNE1']]).T

Voronoi to find best eps


In [37]:
vor = Voronoi(X_dbscan)

voronoi_plot_2d(vor, show_points=False, point_size=1, show_vertices=False)
plt.show()



In [38]:
array_length = []

for vpair in vor.ridge_vertices:
    if vpair[0] >= 0 and vpair[1] >= 0:
        v0 = vor.vertices[vpair[0]]
        v1 = vor.vertices[vpair[1]]
        
        length = np.sqrt((v0[0]-v1[0])**2 + (v0[1]-v1[1])**2)
        array_length.append(length)

# plt.hist(array_length, bins=25);
# Sigma clip: 5 sigma, before calculating median
arr, low, upp = sigmaclip(array_length, 5, 5)

plt.hist(arr, bins=25);
        
print("Median: ", np.median(arr))
print("Mean: ", np.mean(arr))


Median:  1.0086923474999017
Mean:  1.1949036320147106

In [39]:
hist, bin_edges = np.histogram(arr, density=True)

In [40]:
# just to easily estimate
# often need to be adjusted manually
epsilon = 3 * np.median(arr) 
minsamp = 6
print("DBSCAN Epsilon: ", epsilon)
print("DBSCAN minPts: ", minsamp)

db = DBSCAN(eps=epsilon, min_samples=minsamp).fit(X_dbscan)

core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters_)
print('Estimated number of noise points: %d' % n_noise_)

# print(labels)


DBSCAN Epsilon:  3.026077042499705
DBSCAN minPts:  6
Estimated number of clusters: 7
Estimated number of noise points: 38

In [41]:
# add X_pca table to DATA table
gaia_2mass_wise.add_column(Column(labels), name="dbscan_label")

In [42]:
####
# Black removed and is used for noise instead.
unique_labels = set(labels)
colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]

plt.figure(figsize=(12, 7))
for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = [0, 0, 0, 1]

    class_member_mask = (labels == k)

    xy = X_dbscan[class_member_mask & core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', 
             markerfacecolor=tuple(col), markeredgecolor='black', 
             markersize=8, alpha=0.5, label=k)

    xy = X_dbscan[class_member_mask & ~core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', 
             markerfacecolor=tuple(col), markeredgecolor='black', 
             markersize=4, alpha=0.5)
    
plt.legend(loc="best")


Out[42]:
<matplotlib.legend.Legend at 0x7f3497f8b278>

Plot t-SNE result based on other parameters


In [43]:
def plot_tsne_par(pc_x='tSNE0', pc_y='tSNE1', par=gaia_2mass_wise['w1mpro'] - gaia_2mass_wise['j_m']):
    co = (((par - np.min(par))) / (np.max(par) - np.min(par)))
    plt.scatter(gaia_2mass_wise[pc_x], gaia_2mass_wise[pc_y], 
                marker='o', s=10,
                c=co, cmap=plt.cm.get_cmap('plasma'), alpha=0.5)
    
    plt.xlim([np.min(gaia_2mass_wise[pc_x])-10, np.max(gaia_2mass_wise[pc_x])+10])
    plt.ylim([np.min(gaia_2mass_wise[pc_y])-10, np.max(gaia_2mass_wise[pc_y])+10])

In [44]:
plot_tsne_par(par=gaia_2mass_wise['w1mpro'] - gaia_2mass_wise['j_m'])



In [45]:
plot_tsne_par(par=gaia_2mass_wise['phot_g_mean_mag'])



In [46]:
plot_tsne_par(par=gaia_2mass_wise['j_m'])



In [47]:
plot_tsne_par(par=gaia_2mass_wise['w1mpro'])



In [48]:
plot_tsne_par(par=gaia_2mass_wise['pmra'])



In [49]:
plot_tsne_par(par=gaia_2mass_wise['parallax'])



In [50]:
plot_tsne_par(par=gaia_2mass_wise['bp_rp'])



In [51]:
plot_tsne_par(par=gaia_2mass_wise['bp_g'])



In [52]:
plot_tsne_par(par=gaia_2mass_wise['g_rp'])



In [53]:
plot_tsne_par(par=(gaia_2mass_wise['phot_g_mean_mag'] - gaia_2mass_wise['phot_rp_mean_mag']))



In [54]:
plot_tsne_par(par=gaia_2mass_wise['teff_val'])



In [55]:
plot_tsne_par(par=gaia_2mass_wise['radius_val'])



In [56]:
plot_tsne_par(par=gaia_2mass_wise['rv_template_logg'])



In [57]:
plot_tsne_par(par=gaia_2mass_wise['rv_template_fe_h'])



In [58]:
plot_tsne_par(par=gaia_2mass_wise['dec'])



In [59]:
plot_tsne_par(par=gaia_2mass_wise['b'])



In [60]:
plot_tsne_par(par=gaia_2mass_wise['ecl_lat'])



In [61]:
plot_tsne_par(par=gaia_2mass_wise['radial_velocity'])



In [62]:
plot_tsne_par(par=gaia_2mass_wise['astrometric_n_obs_ac'])



In [63]:
## Select only galaxy based on t-SNE

In [64]:
galaxy_tsne = gaia_2mass_wise[gaia_2mass_wise['dbscan_label']==2]

In [65]:
def plot_galaxy_tsne_par(pc_x='tSNE0', pc_y='tSNE1', par=galaxy_tsne['w1mpro'] - galaxy_tsne['j_m']):
    co = (((par - np.min(par))) / (np.max(par) - np.min(par)))
    plt.scatter(galaxy_tsne[pc_x], galaxy_tsne[pc_y], 
                marker='o', s=10,
                c=co, cmap=plt.cm.get_cmap('plasma'), alpha=0.5)
    
    plt.xlim([np.min(galaxy_tsne[pc_x])-10, np.max(galaxy_tsne[pc_x])+10])
    plt.ylim([np.min(galaxy_tsne[pc_y])-10, np.max(galaxy_tsne[pc_y])+10])

In [66]:
plot_galaxy_tsne_par(par=galaxy_tsne['w1mpro'] - galaxy_tsne['j_m'])



In [67]:
plot_galaxy_tsne_par(par=galaxy_tsne['j_m'])



In [68]:
plot_galaxy_tsne_par(par=galaxy_tsne['parallax'])



In [69]:
plot_galaxy_tsne_par(par=galaxy_tsne['pmdec'])



In [70]:
plot_galaxy_tsne_par(par=galaxy_tsne['g_rp'])



In [71]:
plot_galaxy_tsne_par(par=(galaxy_tsne['w1mpro'] - galaxy_tsne['j_m'])%-1.7)



In [72]:
plt.plot(galaxy_tsne['parallax'], galaxy_tsne['parallax_error'], 'r.');



In [73]:
plt.hist(galaxy_tsne['w1mpro'] - galaxy_tsne['j_m'], bins=20);



In [74]:
plt.plot(gaia_2mass_wise['bp_g'], gaia_2mass_wise['g_rp'], 'o')
plt.plot(galaxy_tsne['bp_g'], galaxy_tsne['g_rp'], '.');



In [75]:
plt.plot(gaia_2mass_wise['j_m'] - gaia_2mass_wise['w2mpro'], 
         gaia_2mass_wise['w2mpro'] - gaia_2mass_wise['phot_g_mean_mag'], 'o')
plt.plot(galaxy_tsne['j_m'] - galaxy_tsne['w2mpro'], 
         galaxy_tsne['w2mpro'] - galaxy_tsne['phot_g_mean_mag'], '.')


Out[75]:
[<matplotlib.lines.Line2D at 0x7f3497d3bb00>]

In [76]:
plt.plot(gaia_2mass_wise['j_m'] - gaia_2mass_wise['w2mpro'], 
         gaia_2mass_wise['w2mpro'] - gaia_2mass_wise['phot_g_mean_mag'], 'o')
plt.plot(galaxy_tsne['j_m'] - galaxy_tsne['w2mpro'], 
         galaxy_tsne['w2mpro'] - galaxy_tsne['phot_g_mean_mag'], '.')


Out[76]:
[<matplotlib.lines.Line2D at 0x7f34979f5550>]