Run the x-match using the GC method

The "great-circle" x-matching method was implemented on booq, but merging the matched catalog with the A,B source ones is showing some problems. We go now through a step-by-step onto the process to find out what's going on.



In [1]:

    
import booq



In [2]:

    
%matplotlib inline

from matplotlib import pyplot as plt
from matplotlib import cm

import numpy

plt.rcParams['figure.figsize'] = (10.0, 10.0)



In [3]:

    
# let's define some parameters for our mock catalogs

# to be real fake, we have to fake evrything... :(
# let's start with the size of the supposed images
# from where the sources are coming from...
sx = 300
sy = 300

# number of sources to be relative to the size of the image
num_sources = int( 0.1 * (sx*sy)/(sx+sy) )

# and, say there is an error radius
error_radius = 20



In [20]:

    
from booq.data import mock
from booq import coordinates

# Generate catalog A
A_image_catalog = mock.D2.generate_catalog(num_sources, error_radius, (sy,sx))

A = A_image_catalog.rename(columns={'ID':'id'})
A.loc[:,'ra']  = coordinates.pixel_to_sky(A_image_catalog.x, 0, 1.0/sx)
A.loc[:,'dec'] = coordinates.pixel_to_sky(A_image_catalog.y, 0, 1.0/sy)

# Generate catalog B
B_image_catalog = mock.D2.generate_catalog_around(A_image_catalog.sample(frac=0.75),
                                                  0.75, (sy,sx))

B = B_image_catalog.rename(columns={'ID':'id'})
B.loc[:,'ra']  = coordinates.pixel_to_sky(B_image_catalog.x, 0, 1.0/sx)
B.loc[:,'dec'] = coordinates.pixel_to_sky(B_image_catalog.y, 0, 1.0/sy)



In [21]:

    
from matplotlib import pyplot as plt
from booq import image
imgA = image.pil.draw_sources(A,(sy,sx),colormap='blue')
imgA = image.pil.pil_2_array(imgA)
plt.imshow(image.rgb_2_mono(imgA,'B'), cmap='Blues')









    Out[21]:





<matplotlib.image.AxesImage at 0x7f9a867efe50>



In [22]:

    
from matplotlib import pyplot as plt
from booq import image
imgB = image.pil.draw_sources(B,(sy,sx),colormap='red')
imgB = image.pil.pil_2_array(imgB)
plt.imshow(image.rgb_2_mono(imgB,'R'), cmap='Reds')









    Out[22]:





<matplotlib.image.AxesImage at 0x7f9a86727290>



In [23]:

    
plt.imshow( image.pil.array_2_pil( imgA+imgB ) )









    Out[23]:





<matplotlib.image.AxesImage at 0x7f9a8665a1d0>



In [24]:

    
A.describe()



In [25]:

    
B.describe()



In [26]:

    
from booq.coordinates.skycoords import skycoords
A_coord = skycoords(A.ra.values, A.dec.values)
B_coord = skycoords(B.ra.values, B.dec.values)



In [27]:

    
from booq.catalogs import xmatch
_Aidx,_Asep = xmatch.nn(A_coord,B_coord)
_Bidx,_Bsep = xmatch.nn(B_coord,A_coord)
_sep = numpy.append(_Asep.arcmin,_Bsep.arcmin)
from booq.utils import stats
_sts = stats.basic(_sep)
from booq import utils
utils.pprint(_sts)
del _Aidx,_Asep,_Bidx,_Bsep,_sep,_sts









    



length : 26
min : 1.39982213021
max : 32.0006233512
mean : 8.72629855259
std : 8.0704735842
quantiles : (4.0049966738216174, 6.105388417332942, 9.8974916837027163)



In [28]:

    
from astropy.units import Quantity
radius = Quantity(8,'arcmin')



In [29]:

    
from booq.catalogs import xmatch
match_A_gc_idx, match_B_gc_idx, match_gc_sep = xmatch.gc(A_coord, B_coord, radius)



In [30]:

    
from booq import utils
utils.pprint(utils.stats.basic(match_gc_sep.arcmin))









    



length : 9
min : 1.39982213021
max : 7.62364710421
mean : 4.24802897442
std : 1.89081900191
quantiles : (2.9998976288148471, 4.3862818990480994, 5.0034464770214324)



In [31]:

    
from booq.pipelines import xmatch_nn
reload(xmatch_nn)
df_matched_idx = xmatch_nn.select_pairs(match_A_gc_idx, match_B_gc_idx, match_gc_sep.arcmin)
df_matched_idx









    Out[31]:






  
    
      
      A_idx
      B_duplicates
      B_idx
      separation
      to_drop
    
  
  
    
      0
      0
      None
      7
      4.999999
      False
    
    
      1
      3
      None
      2
      2.999898
      False
    
    
      2
      6
      None
      9
      5.003446
      False
    
    
      3
      7
      None
      3
      4.386282
      False
    
    
      4
      8
      None
      0
      7.623647
      False
    
    
      5
      9
      None
      6
      1.399822
      False
    
    
      6
      11
      None
      10
      6.105388
      False
    
    
      7
      12
      None
      7
      4.004997
      False
    
    
      8
      14
      None
      3
      1.708781
      False



In [32]:

    
# def recover_id(indexes,df,id_column):
# #     print "input:",indexes
#     inds = [ int(n) for n in indexes.split(';') ]
# #     print "toint:",inds
#     from numpy import unique
#     ids = ';'.join(unique([ str(n) for n in df.loc[inds,id_column] ]))
# #     print "toids:",ids
# #     print
#     return ids

# def pkg_recover_id(df,id_column):
#     def pkg(indexes,df=df,id_column=id_column):
#         return recover_id(indexes,df,id_column)
#     return pkg

# recover_id_pkg = pkg_recover_id(B_matched,'id')



In [33]:

    
# AB_match = DataFrame( {'dist':df_matched_idx.separation.values},
#                         index=df_matched_idx.A_idx.values )
# if 'B_duplicates' in df_matched_idx.columns:
#     for i,row in df_matched_idx.iterrows():
#         entry = row.B_duplicates
#         if entry is None:
#             continue
#         inds = [ int(n) for n in entry.split(';') ]
#         ids = ';'.join([ str(n) for n in B.loc[inds,'id'] ])
#         df_matched_idx.loc[i,'B_duplicates'] = ids
#     AB_match.loc[:,'B_duplicates'] = df_matched_idx.B_duplicates.values

# B_matched = B.iloc[df_matched_idx.B_idx]
# B_matched.loc[:,'A_idx'] = df_matched_idx.A_idx.values

# B_matched = B_matched.set_index('A_idx')

# from pandas import DataFrame,concat
# df = concat([ A, B_matched, AB_match ], axis=1, keys=['A','B','AB'])



In [34]:

    
df = matched_catalog = xmatch_nn.merge_catalogs(A, B, df_matched_idx)



In [35]:

    
df









    Out[35]:






  
    
      
      A
      B
      AB
    
    
      
      id
      r
      x
      y
      ra
      dec
      id
      r
      x
      y
      ra
      dec
      dist
      B_duplicates
    
  
  
    
      0
      22
      20
      97
      25
      0.323333
      0.083333
      1.0
      20.0
      112.0
      5.0
      0.373333
      0.016667
      4.999999
      None
    
    
      1
      18
      20
      12
      267
      0.040000
      0.890000
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      None
    
    
      2
      24
      20
      255
      58
      0.850000
      0.193333
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      None
    
    
      3
      14
      20
      128
      142
      0.426667
      0.473333
      13.0
      20.0
      113.0
      142.0
      0.376667
      0.473333
      2.999898
      None
    
    
      4
      29
      20
      129
      42
      0.430000
      0.140000
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      None
    
    
      5
      23
      20
      272
      6
      0.906667
      0.020000
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      None
    
    
      6
      11
      20
      268
      255
      0.893333
      0.850000
      12.0
      20.0
      293.0
      256.0
      0.976667
      0.853333
      5.003446
      None
    
    
      7
      7
      20
      69
      210
      0.230000
      0.700000
      11.0
      20.0
      60.0
      230.0
      0.200000
      0.766667
      4.386282
      None
    
    
      8
      2
      20
      1
      43
      0.003333
      0.143333
      4.0
      20.0
      4.0
      81.0
      0.013333
      0.270000
      7.623647
      None
    
    
      9
      21
      20
      181
      274
      0.603333
      0.913333
      17.0
      20.0
      174.0
      274.0
      0.580000
      0.913333
      1.399822
      None
    
    
      10
      10
      20
      268
      50
      0.893333
      0.166667
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      None
    
    
      11
      16
      20
      158
      207
      0.526667
      0.690000
      3.0
      20.0
      184.0
      223.0
      0.613333
      0.743333
      6.105388
      None
    
    
      12
      4
      20
      132
      6
      0.440000
      0.020000
      1.0
      20.0
      112.0
      5.0
      0.373333
      0.016667
      4.004997
      None
    
    
      13
      28
      20
      43
      48
      0.143333
      0.160000
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      None
    
    
      14
      8
      20
      57
      238
      0.190000
      0.793333
      11.0
      20.0
      60.0
      230.0
      0.200000
      0.766667
      1.708781
      None



In [ ]:

	id	r	x	y	ra	dec
count	15.000000	15.0	15.000000	15.000000	15.000000	15.000000
mean	15.800000	20.0	138.000000	124.733333	0.460000	0.415778
std	8.620573	0.0	94.333754	104.965618	0.314446	0.349885
min	2.000000	20.0	1.000000	6.000000	0.003333	0.020000
25%	9.000000	20.0	63.000000	42.500000	0.210000	0.141667
50%	16.000000	20.0	129.000000	58.000000	0.430000	0.193333
75%	22.500000	20.0	218.000000	224.000000	0.726667	0.746667
max	29.000000	20.0	272.000000	274.000000	0.906667	0.913333

	id	r	x	y	ra	dec
count	11.000000	11.0	11.000000	11.000000	11.000000	11.000000
mean	10.363636	20.0	151.000000	174.000000	0.503333	0.580000
std	5.971143	0.0	99.464567	81.481286	0.331549	0.271604
min	1.000000	20.0	4.000000	5.000000	0.013333	0.016667
25%	6.000000	20.0	80.000000	129.000000	0.266667	0.430000
50%	11.000000	20.0	113.000000	192.000000	0.376667	0.640000
75%	14.000000	20.0	224.000000	226.500000	0.746667	0.755000
max	20.000000	20.0	297.000000	274.000000	0.990000	0.913333

	A_idx	B_duplicates	B_idx	separation	to_drop
0	0	None	7	4.999999	False
1	3	None	2	2.999898	False
2	6	None	9	5.003446	False
3	7	None	3	4.386282	False
4	8	None	0	7.623647	False
5	9	None	6	1.399822	False
6	11	None	10	6.105388	False
7	12	None	7	4.004997	False
8	14	None	3	1.708781	False

	A						B						AB
	id	r	x	y	ra	dec	id	r	x	y	ra	dec	dist	B_duplicates
0	22	20	97	25	0.323333	0.083333	1.0	20.0	112.0	5.0	0.373333	0.016667	4.999999	None
1	18	20	12	267	0.040000	0.890000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	None
2	24	20	255	58	0.850000	0.193333	NaN	NaN	NaN	NaN	NaN	NaN	NaN	None
3	14	20	128	142	0.426667	0.473333	13.0	20.0	113.0	142.0	0.376667	0.473333	2.999898	None
4	29	20	129	42	0.430000	0.140000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	None
5	23	20	272	6	0.906667	0.020000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	None
6	11	20	268	255	0.893333	0.850000	12.0	20.0	293.0	256.0	0.976667	0.853333	5.003446	None
7	7	20	69	210	0.230000	0.700000	11.0	20.0	60.0	230.0	0.200000	0.766667	4.386282	None
8	2	20	1	43	0.003333	0.143333	4.0	20.0	4.0	81.0	0.013333	0.270000	7.623647	None
9	21	20	181	274	0.603333	0.913333	17.0	20.0	174.0	274.0	0.580000	0.913333	1.399822	None
10	10	20	268	50	0.893333	0.166667	NaN	NaN	NaN	NaN	NaN	NaN	NaN	None
11	16	20	158	207	0.526667	0.690000	3.0	20.0	184.0	223.0	0.613333	0.743333	6.105388	None
12	4	20	132	6	0.440000	0.020000	1.0	20.0	112.0	5.0	0.373333	0.016667	4.004997	None
13	28	20	43	48	0.143333	0.160000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	None
14	8	20	57	238	0.190000	0.793333	11.0	20.0	60.0	230.0	0.200000	0.766667	1.708781	None