Correlation function of DR12G SDSS CMASS Catalog

First import all the modules such as healpy and astropy needed for analyzing the structure


In [1]:
import healpix_util as hu
import astropy as ap
import numpy as np
from astropy.io import fits
from astropy.table import Table
import astropy.io.ascii as ascii
from astropy.io import fits
from astropy.constants import c
import matplotlib.pyplot as plt
import math as m
from math import pi
#from scipy.constants import c
import scipy.special as sp
from astroML.decorators import pickle_results
from scipy import integrate
import warnings
from sklearn.neighbors import BallTree
import pickle
import multiprocessing as mp
import time
from cython_metric import *
from lcdmmetric import *
from progressbar import *
from tqdm import *
from functools import partial
import pymangle
#from astroML.datasets import fetch_sdss_specgals
#from astroML.correlation import bootstrap_two_point_angular
%matplotlib inline

Read the data file (taken from http://cosmo.nyu.edu/~eak306/SDSS-LRG.html ) converted to ascii with comoving distance etc. in V01 reading from pkl files for faster read


In [4]:
data=ascii.read("./output/dr12gcmnsrarf.dat")

In [5]:
data


Out[5]:
<Table length=618806>
zradecsrardecr
float64float64float64float64float64float64
0.54253129.17618848.9464990.4730782.254550.854278
0.399682117.41696339.2767590.362022.0493130.685509
0.537702116.91272439.4433110.4694762.0405120.688416
0.519172116.95017239.4907690.4555522.0411660.689244
0.543191117.52847140.1764930.4735712.0512590.701212
0.589608123.81615946.6367840.507672.1610.813965
0.548197127.60127749.7759370.4772932.2270620.868754
0.555224122.81641843.9408640.4825012.1435510.766913
0.380021122.93780444.0331840.3459962.1456690.768524
0.562535123.13237844.2008120.4878962.1490650.77145
..................
0.482233225.49998458.3616070.4273363.9357171.018602
0.476738172.85207360.0851870.4230853.0168381.048684
0.474484173.44870860.3325440.4213393.0272511.053002
0.456862173.66728260.1818360.4075973.0310661.050371
0.471463174.14012460.819710.4189923.0393191.061504
0.526395213.81728764.4620380.4609983.7318161.125075
0.479835215.48384663.981420.4254833.7609031.116686
0.448259218.30272763.1587470.4008373.8101011.102328
0.473205216.81682964.0369890.4203463.7841681.117656
0.485066215.16677965.0414420.4295223.7553691.135187

In [6]:
data.remove_column('z')
data.remove_column('ra')
data.remove_column('dec')

In [7]:
data


Out[7]:
<Table length=618806>
srardecr
float64float64float64
0.4730782.254550.854278
0.362022.0493130.685509
0.4694762.0405120.688416
0.4555522.0411660.689244
0.4735712.0512590.701212
0.507672.1610.813965
0.4772932.2270620.868754
0.4825012.1435510.766913
0.3459962.1456690.768524
0.4878962.1490650.77145
.........
0.4273363.9357171.018602
0.4230853.0168381.048684
0.4213393.0272511.053002
0.4075973.0310661.050371
0.4189923.0393191.061504
0.4609983.7318161.125075
0.4254833.7609031.116686
0.4008373.8101011.102328
0.4203463.7841681.117656
0.4295223.7553691.135187

In [8]:
rs=np.array(data['s'])
rrar=np.array(data['rar'])
rdecr=np.array(data['decr'])

In [9]:
dat=np.array([rs,rrar,rdecr])

In [10]:
dat


Out[10]:
array([[ 0.473078,  0.36202 ,  0.469476, ...,  0.400837,  0.420346,
         0.429522],
       [ 2.25455 ,  2.049313,  2.040512, ...,  3.810101,  3.784168,
         3.755369],
       [ 0.854278,  0.685509,  0.688416, ...,  1.102328,  1.117656,
         1.135187]])

In [12]:
dat.reshape(3,len(data))


Out[12]:
array([[ 0.473078,  0.36202 ,  0.469476, ...,  0.400837,  0.420346,
         0.429522],
       [ 2.25455 ,  2.049313,  2.040512, ...,  3.810101,  3.784168,
         3.755369],
       [ 0.854278,  0.685509,  0.688416, ...,  1.102328,  1.117656,
         1.135187]])

In [13]:
dat=dat.transpose()

In [14]:
dat


Out[14]:
array([[ 0.473078,  2.25455 ,  0.854278],
       [ 0.36202 ,  2.049313,  0.685509],
       [ 0.469476,  2.040512,  0.688416],
       ..., 
       [ 0.400837,  3.810101,  1.102328],
       [ 0.420346,  3.784168,  1.117656],
       [ 0.429522,  3.755369,  1.135187]])

In [15]:
# Saving the objects:
with open('dr12gcmnLCDM.pkl', 'w') as f:  # Python 3: open(..., 'wb')
    pickle.dump(dat, f)

In [3]:
# Getting back the objects:
with open('../pkl/dr12gcmnLCDM.pkl') as f:  # Python 3: open(..., 'rb')
    dat = pickle.load(f)
dat


Out[3]:
array([[ 0.473078,  2.25455 ,  0.854278],
       [ 0.36202 ,  2.049313,  0.685509],
       [ 0.469476,  2.040512,  0.688416],
       ..., 
       [ 0.400837,  3.810101,  1.102328],
       [ 0.420346,  3.784168,  1.117656],
       [ 0.429522,  3.755369,  1.135187]])

In [4]:
# Getting back the objects:
with open('../pkl/rdr12gcmnLCDM.pkl') as f:  # Python 3: open(..., 'rb')
    datR = pickle.load(f)
datR


Out[4]:
array([[ 0.473078,  4.351359,  0.720315],
       [ 0.36202 ,  2.647925,  0.308903],
       [ 0.469475,  3.890172,  1.066587],
       ..., 
       [ 0.400837,  2.035658,  0.814292],
       [ 0.420346,  2.734821,  1.023698],
       [ 0.429522,  4.468994,  0.481731]])

In [5]:
LCDMmetricsq(dat[0],datR[1])


Out[5]:
0.0783923333355121

In [21]:
%%time
BT_D = BallTree(dat,metric='pyfunc',func=LCDMmetricsq) 

with open('BTDdr12gcmnsLCDM.pkl', 'w') as f:
    pickle.dump(BT_D,f)


CPU times: user 31.4 s, sys: 513 ms, total: 31.9 s
Wall time: 32.5 s

In [8]:
with open('../pkl/BTDdr12gcmnsLCDM.pkl') as f:
    BTD = pickle.load(f)
    
BTD


Out[8]:
<sklearn.neighbors.ball_tree.BinaryTree at 0x7fd7945e16c0>

In [16]:
bins=np.arange(0.004,0.084,0.004)
bins


Out[16]:
array([ 0.004,  0.008,  0.012,  0.016,  0.02 ,  0.024,  0.028,  0.032,
        0.036,  0.04 ,  0.044,  0.048,  0.052,  0.056,  0.06 ,  0.064,
        0.068,  0.072,  0.076,  0.08 ])

In [17]:
binsq=bins**2
binsq


Out[17]:
array([  1.60000000e-05,   6.40000000e-05,   1.44000000e-04,
         2.56000000e-04,   4.00000000e-04,   5.76000000e-04,
         7.84000000e-04,   1.02400000e-03,   1.29600000e-03,
         1.60000000e-03,   1.93600000e-03,   2.30400000e-03,
         2.70400000e-03,   3.13600000e-03,   3.60000000e-03,
         4.09600000e-03,   4.62400000e-03,   5.18400000e-03,
         5.77600000e-03,   6.40000000e-03])

In [18]:
%%time
start_time=time.time()
counts_DD=BTD.two_point_correlation(dat,binsq)
print counts_DD
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('../pkl/BTDdr12gcmnsDDLCDM.pkl', 'w') as f:
    pickle.dump(counts_DD,f)

with open('../pkl/BTDdr12gcmnsDDLCDM.pkl') as f:
    counts_DD = pickle.load(f)
    
counts_DD


[   3254425   12392190   32334164   67664922  123025144  204096285
  317424713  468737219  663426786  905172030 1197901623 1545301400
 1950639388 2417101102 2947350432 3543620412 4209458629 4944876350
 5753245695 6636628986]
Total run time:
31638.8450139
CPU times: user 8h 37min 31s, sys: 1min 32s, total: 8h 39min 3s
Wall time: 8h 47min 18s

In [19]:
DD=np.diff(counts_DD)
DD


Out[19]:
array([  9137765,  19941974,  35330758,  55360222,  81071141, 113328428,
       151312506, 194689567, 241745244, 292729593, 347399777, 405337988,
       466461714, 530249330, 596269980, 665838217, 735417721, 808369345,
       883383291])

In [ ]:


In [20]:
%%time
start_time=time.time()
counts_DR=BTD.two_point_correlation(datR,binsq)
print counts_DR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('../pkl/BTDdr12gcmnsDRLCDM.pkl', 'w') as f:
    pickle.dump(counts_DR,f)

with open('../pkl/BTDdr12gcmnsDRLCDM.pkl') as f:
    counts_DR = pickle.load(f)
    
counts_DR


[    736505    5675630   18546250   42674926   81353880  138446090
  218612406  326088177  464442075  636625801  845383147 1093223208
 1382369815 1715275271 2093553664 2519357481 2993768066 3518471096
 4094492719 4723198652]
Total run time:
15914.482981
CPU times: user 3h 36min 44s, sys: 31.6 s, total: 3h 37min 15s
Wall time: 4h 25min 14s

In [21]:
counts_DR


Out[21]:
array([    736505,    5675630,   18546250,   42674926,   81353880,
        138446090,  218612406,  326088177,  464442075,  636625801,
        845383147, 1093223208, 1382369815, 1715275271, 2093553664,
       2519357481, 2993768066, 3518471096, 4094492719, 4723198652])

In [22]:
DR=np.diff(counts_DR)

In [23]:
DR


Out[23]:
array([  4939125,  12870620,  24128676,  38678954,  57092210,  80166316,
       107475771, 138353898, 172183726, 208757346, 247840061, 289146607,
       332905456, 378278393, 425803817, 474410585, 524703030, 576021623,
       628705933])

In [24]:
with open('../pkl/rBTDdr12gcmnsDDLCDM.pkl') as f:
    counts_RR = pickle.load(f)
    
counts_RR


Out[24]:
array([   1358459,    6273635,   19012588,   42810633,   80777592,
        136780874,  215684501,  321583256,  457878569,  627472414,
        832603166, 1075487654, 1358410654, 1682811804, 2050435215,
       2463042572, 2922567634, 3430652477, 3987726170, 4593614344])

In [25]:
RR=np.diff(counts_DR)

In [26]:
corrells=(DD+RR-2.0*DR)/RR

In [27]:
plt.plot(bins[1:len(bins)],corrells,'bo-')


Out[27]:
[<matplotlib.lines.Line2D at 0x11ad47750>]

In [29]:
plt.yscale('log')
plt.plot(bins[1:len(bins)],corrells,'bo-')


Out[29]:
[<matplotlib.lines.Line2D at 0x11acf1650>]

In [30]:
plt.yscale('log')
plt.xscale('log')
plt.plot(bins[1:len(bins)],corrells,'bo-')


Out[30]:
[<matplotlib.lines.Line2D at 0x11b0f67d0>]

In [28]:
corrells


Out[28]:
array([ 0.8500777 ,  0.54941829,  0.46426426,  0.43127505,  0.42000355,
        0.41366641,  0.40787551,  0.40718527,  0.40399589,  0.40224811,
        0.40170954,  0.40184245,  0.40118375,  0.40174364,  0.40033968,
        0.40350624,  0.40158848,  0.40336632,  0.40508184])

In [44]:
plt.plot(bins[1:len(bins)],correl,'go-')


Out[44]:
[<matplotlib.lines.Line2D at 0x113c4c5d0>]

In [45]:
plt.plot(bins[1:len(bins)],bins[1:len(bins)]*bins[1:len(bins)]*correl,'go-')


Out[45]:
[<matplotlib.lines.Line2D at 0x113d5fed0>]

In [46]:
plt.plot(bins[2:len(bins)],bins[2:len(bins)]*bins[2:len(bins)]*correl[1:len(bins)],'go-')


Out[46]:
[<matplotlib.lines.Line2D at 0x113e8ba90>]

In [47]:
plt.plot(bins[2:len(bins)],correl[1:len(bins)],'go-')


Out[47]:
[<matplotlib.lines.Line2D at 0x11054ac90>]

In [48]:
plt.plot(bins[2:len(bins)],correl[1:len(bins)],'go-')
plt.savefig("correl2x.pdf")



In [53]:
plt.plot(bins[2:len(bins)]*c/1e5,correl[1:len(bins)],'bo-')
plt.savefig("correl2x1.pdf")



In [52]:
plt.yscale('log')
plt.plot(bins[2:len(bins)]*c/1e5,correl[1:len(bins)],'ro-')


Out[52]:
[<matplotlib.lines.Line2D at 0x1142e1f10>]

In [54]:
%%time
start_time=time.time()
counts_DR=BTR.two_point_correlation(dat,bins)
print counts_DR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('BTR200kcDRLCDM.pkl', 'w') as f:
    pickle.dump(counts_DR,f)


[        0    236152   1835676   6024256  13930163  26635863  45156095
  70428809 103344608 144719046 195306049 255767295 326645884 408522145
 501808240 606822439]
Total run time:
5362.83386302
CPU times: user 1h 29min 5s, sys: 7.79 s, total: 1h 29min 13s
Wall time: 1h 29min 23s

In [55]:
with open('BTR200kcDRLCDM.pkl') as f:
    counts_DR = pickle.load(f)
    
counts_DR


Out[55]:
array([        0,    236152,   1835676,   6024256,  13930163,  26635863,
        45156095,  70428809, 103344608, 144719046, 195306049, 255767295,
       326645884, 408522145, 501808240, 606822439])

In [56]:
DR=np.diff(counts_DR)

In [57]:
corrells=(4.0 * DD - 4.0 * DR + RR) / RR

In [58]:
DR


Out[58]:
array([   236152,   1599524,   4188580,   7905907,  12705700,  18520232,
        25272714,  32915799,  41374438,  50587003,  60461246,  70878589,
        81876261,  93286095, 105014199])

In [59]:
corrells


Out[59]:
array([ 1.7052646 ,  0.29978504,  0.10500558,  0.05797354,  0.03719729,
        0.02859312,  0.02948462,  0.02686032,  0.02325382,  0.02064449,
        0.01721195,  0.01703756,  0.01286895,  0.01164703,  0.01089049])

In [60]:
plt.plot(bins[1:len(bins)],corrells,'go-')


Out[60]:
[<matplotlib.lines.Line2D at 0x114929c10>]

In [61]:
plt.plot(bins[1:len(bins)],bins[1:len(bins)]*bins[1:len(bins)]*corrells*(c*1e-5)**2,'go-')


Out[61]:
[<matplotlib.lines.Line2D at 0x1143fea50>]

In [72]:
plt.yscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')
plt.savefig("correllsfiglog.pdf")



In [71]:
plt.yscale('log')
plt.plot(bins[2:len(bins)]*c/1e5,corrells[1:len(bins)],'ro-')
plt.savefig("correllslog2x.pdf")



In [73]:
plt.yscale('log')
plt.xscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')
plt.savefig("correllsloglog.pdf")