Correlation function of DR72 SDSS VAGC Catalog

First import all the modules such as healpy and astropy needed for analyzing the structure


In [1]:
import healpix_util as hu
import astropy as ap
import numpy as np
from astropy.io import fits
from astropy.table import Table
import astropy.io.ascii as ascii
from astropy.io import fits
from astropy.constants import c
import matplotlib.pyplot as plt
import math as m
from math import pi
import scipy.special as sp
from astroML.decorators import pickle_results
from scipy import integrate
import warnings
from sklearn.neighbors import BallTree
import pickle
import multiprocessing as mp
import time
from cython_metric import *
from progressbar import *
#from astroML.datasets import fetch_sdss_specgals
#from astroML.correlation import bootstrap_two_point_angular
%matplotlib inline

Random catalog created based on the survey limitations also taken from http://cosmo.nyu.edu/~eak306/SDSS-LRG.html


In [2]:
dataR=fits.open("/Users/rohin/Downloads/random-DR7-Full.fits")

In [3]:
dataR


Out[3]:
[<astropy.io.fits.hdu.image.PrimaryHDU object at 0x10eac5910>, <astropy.io.fits.hdu.table.BinTableHDU object at 0x10941ffd0>]

In [4]:
dataR=dataR[1].data

In [5]:
len(dataR)


Out[5]:
1664948

In [6]:
dataR


Out[6]:
FITS_rec([ (37.837992999999997, -0.62006700000000003, 28631, 0.025870252, 731, 0.4343251833233247, 1.0, 5.3768849, 1.9245105),
       (209.089259, 17.276520999999999, 607527, 0.027706305, 8282, 0.29326452640687256, 1.0, 12.520595, 0.84796357),
       (258.46493900000002, 28.215748000000001, 198135, 0.058957841, 2379, 0.22274562266367232, 1.0, 16.267647, 0.6556018),
       ...,
       (229.83728400000001, 32.709685999999998, 306299, 0.017827285, 4524, 0.29215485317197082, 0.99242425, 12.497682, 0.84948772),
       (132.83980500000001, 35.648184000000001, 242314, 0.036420144, 2914, 0.18659365873670164, 0.9952153, 18.512594, 0.57715893),
       (192.403007, 43.611770999999997, 352971, 0.016170749, 4240, 0.307947945236697, 0.94999999, 13.008629, 0.81675106)], 
      dtype=(numpy.record, [('RA', '>f8'), ('DEC', '>f8'), ('ILSS', '>i4'), ('EBV', '>f4'), ('SECTOR', '>i4'), ('Z', '>f8'), ('SECTOR_COMPLETENESS', '>f4'), ('COMOV_DENSITY', '>f4'), ('RADIAL_WEIGHT', '>f4')]))

In [7]:
dataR.columns


Out[7]:
ColDefs(
    name = 'RA'; format = 'D'
    name = 'DEC'; format = 'D'
    name = 'ILSS'; format = 'J'
    name = 'EBV'; format = 'E'
    name = 'SECTOR'; format = 'J'
    name = 'Z'; format = 'D'
    name = 'SECTOR_COMPLETENESS'; format = 'E'
    name = 'COMOV_DENSITY'; format = 'E'
    name = 'RADIAL_WEIGHT'; format = 'E'
)

In [8]:
dataR['Z']


Out[8]:
array([ 0.43432518,  0.29326453,  0.22274562, ...,  0.29215485,
        0.18659366,  0.30794795])

In [9]:
dataR['RA']


Out[9]:
array([  37.837993,  209.089259,  258.464939, ...,  229.837284,
        132.839805,  192.403007])

In [10]:
dataR['DEC']


Out[10]:
array([ -0.620067,  17.276521,  28.215748, ...,  32.709686,  35.648184,
        43.611771])

In [11]:
Ez = lambda x: 1/m.sqrt(0.3*(1+x)**3+0.7)

np.vectorize(Ez)
#Calculate comoving distance of a data point using the Redshift - This definition is based on the cosmology model we take. Here the distance for E-dS universe is considered. Also note that c/H0 ratio is cancelled in the equations and hence not taken.

def DC_LCDM(z):
  return integrate.quad(Ez, 0, z)[0]
DC_LCDM=np.vectorize(DC_LCDM)

In [14]:
rdr7f = open("./output/randDR7srarf.dat",'w')
rdr7f.write("z\t ra\t dec\t s\t rar\t decr \n")

for i in range(0,len(dataR)-1):
    rdr7f.write("%f\t " %dataR['Z'][i])
    rdr7f.write("%f\t %f\t " %(dataR['RA'][i],dataR['DEC'][i]))
    rdr7f.write("%f\t " %DC_LCDM(dataR['Z'][i]))
    rdr7f.write("%f\t %f\n " %(dataR['RA'][i]*pi/180.0,dataR['DEC'][i]*pi/180.0))
rdr7f.close()

In [15]:
data=ascii.read("./output/randDR7srarf.dat")

In [16]:
data


Out[16]:
<Table length=1664947>
zradecsrardecr
float64float64float64float64float64float64
0.43432537.837993-0.6200670.3898150.660398-0.010822
0.293265209.08925917.2765210.2730973.6492960.301532
0.222746258.46493928.2157480.2111824.5110640.492458
0.279301183.92714137.3790320.2610273.2101340.652387
0.324714170.6827771.2065430.2999392.9789760.021058
0.3653172.78043332.9814950.3338793.0155870.575636
0.431968154.27528832.1600720.3879412.6926120.561299
0.276107187.27224316.1288390.2582543.2685170.281501
0.241847130.65578438.458250.228192.2803740.671223
0.270847146.68173653.8459640.2536752.5600790.939789
..................
0.38002522.479891-10.8011480.3459990.392348-0.188516
0.376247241.16756543.7447580.34294.2091680.76349
0.43682202.26326639.4207320.3917953.530160.688022
0.341468165.96483738.4140290.3140442.8966330.670451
0.275896136.737966.6228730.258072.3865280.115591
0.320891158.15564325.0493360.2967012.7603370.437193
0.394642148.9219728.5169930.357932.5991790.14865
0.232127203.89439724.9515580.2195573.5586290.435487
0.292155229.83728432.7096860.2721414.0114170.570892
0.186594132.83980535.6481840.1785122.3184920.622178

In [17]:
len(data)


Out[17]:
1664947

In [18]:
data.remove_column('z')
data.remove_column('ra')
data.remove_column('dec')

In [19]:
data


Out[19]:
<Table length=1664947>
srardecr
float64float64float64
0.3898150.660398-0.010822
0.2730973.6492960.301532
0.2111824.5110640.492458
0.2610273.2101340.652387
0.2999392.9789760.021058
0.3338793.0155870.575636
0.3879412.6926120.561299
0.2582543.2685170.281501
0.228192.2803740.671223
0.2536752.5600790.939789
.........
0.3459990.392348-0.188516
0.34294.2091680.76349
0.3917953.530160.688022
0.3140442.8966330.670451
0.258072.3865280.115591
0.2967012.7603370.437193
0.357932.5991790.14865
0.2195573.5586290.435487
0.2721414.0114170.570892
0.1785122.3184920.622178

In [20]:
s=np.array(data['s'])
rar=np.array(data['rar'])
decr=np.array(data['decr'])

In [21]:
dat=np.array([s,rar,decr])

In [22]:
dat.reshape(3,len(data['s']))


Out[22]:
array([[ 0.389815,  0.273097,  0.211182, ...,  0.219557,  0.272141,
         0.178512],
       [ 0.660398,  3.649296,  4.511064, ...,  3.558629,  4.011417,
         2.318492],
       [-0.010822,  0.301532,  0.492458, ...,  0.435487,  0.570892,
         0.622178]])

In [23]:
dat=dat.transpose()

In [24]:
dat


Out[24]:
array([[ 0.389815,  0.660398, -0.010822],
       [ 0.273097,  3.649296,  0.301532],
       [ 0.211182,  4.511064,  0.492458],
       ..., 
       [ 0.219557,  3.558629,  0.435487],
       [ 0.272141,  4.011417,  0.570892],
       [ 0.178512,  2.318492,  0.622178]])

In [25]:
LCDMmetric(dat[0],dat[1])


Out[25]:
0.6543341323471775

In [26]:
bins=np.arange(0.,0.08,0.005)

In [27]:
print bins


[ 0.     0.005  0.01   0.015  0.02   0.025  0.03   0.035  0.04   0.045
  0.05   0.055  0.06   0.065  0.07   0.075]

In [28]:
Nbins=len(bins)

In [29]:
Nbins


Out[29]:
16

In [30]:
BT_R = BallTree(dat,metric='pyfunc',func=LCDMmetric) 
temp = pickle.dumps(BT_R)                     
BTR = pickle.loads(temp)

In [ ]:
pb=ProgressBar()

In [ ]:
start_time=time.time()
@pickle_results("RDR72RR.pkl")
def rrcal(BTR,dat,bins,Nbins):
    counts_RR=np.zeros(Nbins)
    for i in pb(range(Nbins)):
        counts_RR[i]=np.sum(BTR.query_radius(dat, bins[i],count_only=True))
    RR = np.diff(counts_RR)
    print counts_RR
    print RR
    return RR

#def mf_wrap(args):
#  return ddcal(*args)

#pool=mp.Pool(8)

#arg=[(BTD,dat,bins,Nbins)]
RRresult = rrcal(BTR,dat,bins,Nbins)
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime
#DDresult=pool.map(mf_wrap,arg)


@pickle_results: computing results and saving to 'RDR72RR.pkl'
 25% |##################                                                      |

In [ ]:
plt.plot(bins[1:len(bins)],RRresult[0],'ro')

In [ ]:
RRresult[0]

In [ ]:


In [ ]: