Run the x-match using the GC method

The "great-circle" x-matching method was implemented on booq, now we have tested and fixed the errors we should run on a small sample of real data (cs82 and vla)



In [1]:

    
import booq



In [2]:

    
%matplotlib inline

from matplotlib import pyplot as plt
from matplotlib import cm

import numpy

plt.rcParams['figure.figsize'] = (10.0, 5.0)



In [3]:

    
from booq.io import fits

cs82 = fits.open('data/cs82_luckSquare.fits')
svla = fits.open('data/vla_luckSquare.fits')

colsA = {'ra':'ALPHA_J2000','dec':'DELTA_J2000','id':'SE_ID'}
catA = cs82.read( columns=colsA.values())

colsB = {'ra':'RA','dec':'DEC','id':'Str82'}
catB = svla.read(columns=colsB.values())

from booq.table import ATable
A = ATable(catA.data).to_pandas()
B = ATable(catB.data).to_pandas()



In [4]:

    
A.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 528137 entries, 0 to 528136
Data columns (total 3 columns):
SE_ID          528137 non-null int32
ALPHA_J2000    528137 non-null float64
DELTA_J2000    528137 non-null float64
dtypes: float64(2), int32(1)
memory usage: 10.1 MB



In [5]:

    
B.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 879 entries, 0 to 878
Data columns (total 3 columns):
RA       879 non-null float64
DEC      879 non-null float64
Str82    879 non-null object
dtypes: float64(2), object(1)
memory usage: 20.7+ KB



In [6]:

    
from booq.coordinates.skycoords import skycoords
A_coord = skycoords(A.ALPHA_J2000.values, A.DELTA_J2000.values)
B_coord = skycoords(B.RA.values, B.DEC.values)



In [7]:

    
from booq.catalogs import xmatch
_Aidx,_Asep = xmatch.nn(A_coord,B_coord)
_Bidx,_Bsep = xmatch.nn(B_coord,A_coord)

import numpy
_sep = numpy.append(_Asep.arcsec,_Bsep.arcsec)

from booq.utils import stats
_sts = stats.basic(_sep)

from booq import utils
utils.pprint(_sts)

import seaborn
p = seaborn.distplot(_sep)
plt.show(p)

del _Aidx,_Asep,_Bidx,_Bsep,_sts,_sep









    



length : 529016
min : 0.0150427097942
max : 1049.70394273
mean : 197.894605307
std : 149.923869294
quantiles : (99.642187302709303, 161.29729778795013, 243.39168509551419)



In [8]:

    
from astropy.units import Quantity
radius = Quantity(2,'arcsec')



In [9]:

    
from booq.catalogs import xmatch
match_A_gc_idx, match_B_gc_idx, match_gc_sep = xmatch.gc(A_coord, B_coord, radius)



In [10]:

    
from booq import utils
utils.pprint(utils.stats.basic(match_gc_sep.arcmin))









    



length : 401
min : 0.000250711829904
max : 0.0333253957801
mean : 0.00935545145988
std : 0.00920822610898
quantiles : (0.0029152712569407808, 0.005501477874166787, 0.012965165870444608)



In [11]:

    
from booq.pipelines import xmatch_nn
reload(xmatch_nn)
df_matched_idx = xmatch_nn.select_pairs(match_A_gc_idx, match_B_gc_idx, match_gc_sep.arcmin)
df_matched_idx.describe(include='all')









    Out[11]:






  
    
      
      A_idx
      B_duplicates
      B_idx
      separation
    
  
  
    
      count
      395.000000
      6
      395.000000
      395.000000
    
    
      unique
      NaN
      6
      NaN
      NaN
    
    
      top
      NaN
      401
      NaN
      NaN
    
    
      freq
      NaN
      1
      NaN
      NaN
    
    
      mean
      301284.564557
      NaN
      422.653165
      0.009053
    
    
      std
      151902.361069
      NaN
      237.207332
      0.008949
    
    
      min
      2085.000000
      NaN
      50.000000
      0.000251
    
    
      25%
      160266.000000
      NaN
      218.000000
      0.002868
    
    
      50%
      334993.000000
      NaN
      409.000000
      0.005408
    
    
      75%
      425480.000000
      NaN
      600.500000
      0.011394
    
    
      max
      523644.000000
      NaN
      878.000000
      0.033311



In [12]:

    
from booq.pipelines import xmatch_nn
reload(xmatch_nn)
df = matched_catalog = xmatch_nn.merge_catalogs(A, B, df_matched_idx, 'Str82')









    



/home/chbrandt/.conda/envs/booq-dev/lib/python2.7/site-packages/pandas/core/indexing.py:288: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
/home/chbrandt/.conda/envs/booq-dev/lib/python2.7/site-packages/pandas/core/indexing.py:465: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s



In [13]:

    
df.describe(include='all')









    



/home/chbrandt/.conda/envs/booq-dev/lib/python2.7/site-packages/numpy/lib/function_base.py:3823: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)






    Out[13]:






  
    
      
      A
      B
      AB
    
    
      
      SE_ID
      ALPHA_J2000
      DELTA_J2000
      RA
      DEC
      Str82
      dist
      B_duplicates
    
  
  
    
      count
      528137.000000
      528137.000000
      528137.000000
      395.000000
      395.000000
      395
      395.000000
      6
    
    
      unique
      NaN
      NaN
      NaN
      NaN
      NaN
      360
      NaN
      6
    
    
      top
      NaN
      NaN
      NaN
      NaN
      NaN
      J005444.9-002752
      NaN
      J005200.1-004831
    
    
      freq
      NaN
      NaN
      NaN
      NaN
      NaN
      3
      NaN
      1
    
    
      mean
      67316.181656
      13.021675
      0.040702
      13.128274
      0.138412
      NaN
      0.009053
      NaN
    
    
      std
      36346.045266
      0.836532
      0.563889
      0.699448
      0.526262
      NaN
      0.008949
      NaN
    
    
      min
      1372.000000
      11.594873
      -0.999989
      11.787167
      -0.998853
      NaN
      0.000251
      NaN
    
    
      25%
      36981.000000
      12.232602
      -0.464393
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      50%
      66892.000000
      13.098465
      0.111200
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      75%
      96582.000000
      13.742887
      0.528693
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      max
      150430.000000
      14.405139
      0.940626
      14.232887
      0.909142
      NaN
      0.033311
      NaN

Now, the pipeline



In [14]:

    
import booq
logging = booq.log.init(verbose=True,debug=True)

from booq.io import fits

cs82 = fits.open('data/cs82_luckSquare.fits')
svla = fits.open('data/vla_luckSquare.fits')

colsA = {'ra':'ALPHA_J2000','dec':'DELTA_J2000','id':'SE_ID'}
catA = cs82.read( columns=colsA.values())

colsB = {'ra':'RA','dec':'DEC','id':'Str82'}
catB = svla.read(columns=colsB.values())

from booq.table import ATable
A = ATable(catA.data).to_pandas()
B = ATable(catB.data).to_pandas()



In [15]:

    
from booq.pipelines import xmatch_nn
matched_catalog = xmatch_nn.xmatch(A,B,columns_A=colsA, columns_B=colsB, radius=2, method='gc')



In [16]:

    
matched_catalog.describe(include='all')









    Out[16]:






  
    
      
      A
      B
      AB
    
    
      
      SE_ID
      ALPHA_J2000
      DELTA_J2000
      RA
      DEC
      Str82
      dist
      B_duplicates
    
  
  
    
      count
      528137.000000
      528137.000000
      528137.000000
      395.000000
      395.000000
      395
      395.000000
      6
    
    
      unique
      NaN
      NaN
      NaN
      NaN
      NaN
      360
      NaN
      6
    
    
      top
      NaN
      NaN
      NaN
      NaN
      NaN
      J005444.9-002752
      NaN
      J005200.1-004831
    
    
      freq
      NaN
      NaN
      NaN
      NaN
      NaN
      3
      NaN
      1
    
    
      mean
      67316.181656
      13.021675
      0.040702
      13.128274
      0.138412
      NaN
      0.543193
      NaN
    
    
      std
      36346.045266
      0.836532
      0.563889
      0.699448
      0.526262
      NaN
      0.536912
      NaN
    
    
      min
      1372.000000
      11.594873
      -0.999989
      11.787167
      -0.998853
      NaN
      0.015043
      NaN
    
    
      25%
      36981.000000
      12.232602
      -0.464393
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      50%
      66892.000000
      13.098465
      0.111200
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      75%
      96582.000000
      13.742887
      0.528693
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      max
      150430.000000
      14.405139
      0.940626
      14.232887
      0.909142
      NaN
      1.998689
      NaN



In [ ]:

	A_idx	B_duplicates	B_idx	separation
count	395.000000	6	395.000000	395.000000
unique	NaN	6	NaN	NaN
top	NaN	401	NaN	NaN
freq	NaN	1	NaN	NaN
mean	301284.564557	NaN	422.653165	0.009053
std	151902.361069	NaN	237.207332	0.008949
min	2085.000000	NaN	50.000000	0.000251
25%	160266.000000	NaN	218.000000	0.002868
50%	334993.000000	NaN	409.000000	0.005408
75%	425480.000000	NaN	600.500000	0.011394
max	523644.000000	NaN	878.000000	0.033311

	A			B			AB
	SE_ID	ALPHA_J2000	DELTA_J2000	RA	DEC	Str82	dist	B_duplicates
count	528137.000000	528137.000000	528137.000000	395.000000	395.000000	395	395.000000	6
unique	NaN	NaN	NaN	NaN	NaN	360	NaN	6
top	NaN	NaN	NaN	NaN	NaN	J005444.9-002752	NaN	J005200.1-004831
freq	NaN	NaN	NaN	NaN	NaN	3	NaN	1
mean	67316.181656	13.021675	0.040702	13.128274	0.138412	NaN	0.009053	NaN
std	36346.045266	0.836532	0.563889	0.699448	0.526262	NaN	0.008949	NaN
min	1372.000000	11.594873	-0.999989	11.787167	-0.998853	NaN	0.000251	NaN
25%	36981.000000	12.232602	-0.464393	NaN	NaN	NaN	NaN	NaN
50%	66892.000000	13.098465	0.111200	NaN	NaN	NaN	NaN	NaN
75%	96582.000000	13.742887	0.528693	NaN	NaN	NaN	NaN	NaN
max	150430.000000	14.405139	0.940626	14.232887	0.909142	NaN	0.033311	NaN