The goal of this program is to match the NSA galaxies to the AGC catalogs.

Ideally, there will be a one-to-one match, but of course, nothing is ideal...

So what do we do with galaxies that don't have an NSA match? We will need to get their SDSS colors another way so that we can properly separate them into blue and red, and so we can estimate their stellar masses.


In [1]:
from matplotlib import pyplot as plt
import numpy as np
from astropy.io import fits
import glob
%matplotlib inline

In [2]:
def findnearest(x1,y1,x2,y2,delta):#use where command
    matchflag=1
    nmatch=0
    d=np.sqrt((x1-x2)**2 + (y1-y2)**2)#x2 and y2 are arrays
    index=np.arange(len(d))
    t=index[d<delta]
    matches=t
    if len(matches) > 0:
        nmatch=len(matches)
        if nmatch > 1:
            imatch=index[(d == min(d[t]))]
        else:
            imatch=matches[0]
    else:
        imatch = 0
        matchflag = 0

    return imatch, matchflag,nmatch

In [3]:
def compare_samples(x1,y1,x2,y2):
    plt.figure()
    plt.plot(x1,y1,'bo')
    plt.plot(x2,y2,'ks')

In [6]:
# match radiuss = 3 arcsec
matchradius=3./3600 # convert to degrees

    
clusterfiles=glob.glob('*_AGC.fits')
for cfile in clusterfiles:
    agcdat=fits.getdata(cfile)
    name=cfile.split('_')[0]
    nsafile=name+'_NSA.fits'
    print nsafile
    nsadat=fits.getdata(nsafile)
    imatch=np.zeros(len(agcdat.RA),'i')
    matchflag=np.zeros(len(agcdat.RA),'bool')
    nmatch=np.zeros(len(agcdat.RA),'i')
    for i in range(len(agcdat.RA)):
        imatch[i],matchflag[i],nmatch[i] = findnearest(agcdat.RA[i],agcdat.DEC[i],nsadat.RA,nsadat.DEC,matchradius)
    
    outfile=name+'_matchNSAtoAGC.fits'
    
    orig_cols = agcdat.columns
    new_cols = []
    for col in nsadat.columns.names:
        print col
        base=nsadat[col]
        print base.shape,matchflag.shape
        a=nsadat[col][imatch]*matchflag + np.zeros(len(nsadat[col]))*matchflag
        #new_cols.append(a)
        #print a.shape
                                        
        t=fits.Column(name=col, array=a)
        new_cols.append(t)
    fits.ColDefs([new_cols])
    hdu = fits.BinTableHDU.from_columns(orig_cols + new_cols)
    
    hdu.writeto(outfile,clobber='yes')


Abell2063_NSA.fits
IAUNAME
(949,) (992,)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-6-fc8cfcdc6f1c> in <module>()
     24         base=nsadat[col]
     25         print base.shape,matchflag.shape
---> 26         a=nsadat[col][imatch]*matchflag + np.zeros(len(nsadat[col]))*matchflag
     27         #new_cols.append(a)
     28         #print a.shape

/Users/rfinn/Ureka/variants/common/lib/python2.7/site-packages/numpy/core/defchararray.pyc in __mul__(self, i)
   1954         multiply
   1955         """
-> 1956         return asarray(multiply(self, i))
   1957 
   1958     def __rmul__(self, i):

/Users/rfinn/Ureka/variants/common/lib/python2.7/site-packages/numpy/core/defchararray.pyc in multiply(a, i)
    308     i_arr = numpy.asarray(i)
    309     if not issubclass(i_arr.dtype.type, integer):
--> 310         raise ValueError("Can only multiply by integers")
    311     out_size = _get_num_chars(a_arr) * max(long(i_arr.max()), 0)
    312     return _vec_string(

ValueError: Can only multiply by integers

In [5]:
plt.figure()
plt.hist(agcdat.VOPT,color='b',bins=np.arange(0,12000,500),label='AGC')
plt.hist(nsadat.ZDIST*3.e5,color='g',bins=np.arange(0,12000,500),label='NSA')
plt.xlabel('Recession Velocity')


Out[5]:
<matplotlib.text.Text at 0x109ecff50>

In [ ]: