Correlation function of DR72 SDSS VAGC Catalog

First import all the modules such as healpy and astropy needed for analyzing the structure


In [1]:
import healpix_util as hu
import astropy as ap
import numpy as np
from astropy.io import fits
from astropy.table import Table
import astropy.io.ascii as ascii
from astropy.io import fits
from astropy.constants import c
import matplotlib.pyplot as plt
import math as m
from math import pi
#from scipy.constants import c
import scipy.special as sp
from astroML.decorators import pickle_results
from scipy import integrate
import warnings
from sklearn.neighbors import BallTree
import pickle
import multiprocessing as mp
import time
from lcometric import *
from progressbar import *
from tqdm import *
from functools import partial
import pymangle
#from astroML.datasets import fetch_sdss_specgals
#from astroML.correlation import bootstrap_two_point_angular
%matplotlib inline

In [2]:
# Getting back the objects:
with open('datsLCf.pkl') as f:  # Python 3: open(..., 'rb')
    dat = pickle.load(f)
dat


Out[2]:
array([[ 0.37169 ,  0.980185, -0.003863],
       [ 0.312739,  1.016617,  0.003776],
       [ 0.345944,  0.950251,  0.010821],
       ..., 
       [ 0.290404,  2.829918,  0.173514],
       [ 0.313432,  2.830242,  0.172112],
       [ 0.199924,  2.831786,  0.173661]])

In [3]:
bins=np.arange(0.,0.08,0.005)

In [4]:
print bins


[ 0.     0.005  0.01   0.015  0.02   0.025  0.03   0.035  0.04   0.045
  0.05   0.055  0.06   0.065  0.07   0.075]

In [5]:
Nbins=len(bins)

In [6]:
Nbins


Out[6]:
16

In [7]:
binsq=(bins*0.01)**2

In [8]:
binsq


Out[8]:
array([  0.00000000e+00,   2.50000000e-09,   1.00000000e-08,
         2.25000000e-08,   4.00000000e-08,   6.25000000e-08,
         9.00000000e-08,   1.22500000e-07,   1.60000000e-07,
         2.02500000e-07,   2.50000000e-07,   3.02500000e-07,
         3.60000000e-07,   4.22500000e-07,   4.90000000e-07,
         5.62500000e-07])

In [9]:
LCometric(dat[0],dat[1])


Out[9]:
3.63548187745412e-07

In [10]:
%%time
BT_DLCo = BallTree(dat,metric='pyfunc',func=LCometric,leaf_size=5) 

with open('BTDdatsLCo.pkl', 'w') as f:
    pickle.dump(BT_DLCo,f)


CPU times: user 5.56 s, sys: 60.4 ms, total: 5.62 s
Wall time: 5.66 s

In [11]:
with open('BTDdatsLCo.pkl') as f:
    BTDLCo = pickle.load(f)
    
BTDLCo


Out[11]:
<sklearn.neighbors.ball_tree.BinaryTree at 0x7fa07b76ad00>

In [12]:
%%time
start_time=time.time()
counts_DD=BTDLCo.two_point_correlation(dat,binsq)
print counts_DD
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('BTDcDDLCo.pkl', 'w') as f:
    pickle.dump(counts_DD,f)


[   106799    437432   1554444   4083474   8769621  16354253  27569263
  43066792  63403858  89002211 120261350 157532554 201166258 251520499
 308882029 373506425]
Total run time:
974.912760973
CPU times: user 16min 11s, sys: 1.79 s, total: 16min 13s
Wall time: 16min 14s

In [13]:
with open('BTDcDDLCo.pkl') as f:
    counts_DD = pickle.load(f)
    
counts_DD


Out[13]:
array([   106799,    437432,   1554444,   4083474,   8769621,  16354253,
        27569263,  43066792,  63403858,  89002211, 120261350, 157532554,
       201166258, 251520499, 308882029, 373506425])

In [14]:
DD=np.diff(counts_DD)

In [15]:
DD


Out[15]:
array([  330633,  1117012,  2529030,  4686147,  7584632, 11215010,
       15497529, 20337066, 25598353, 31259139, 37271204, 43633704,
       50354241, 57361530, 64624396])

In [16]:
plt.plot(bins[1:len(bins)],DD,'ro-')


Out[16]:
[<matplotlib.lines.Line2D at 0x1144c0110>]

BallTree.two_point_correlation works almost 10 times faster! with leaf_size=5 Going with it to the random catalog


In [17]:
# Getting back the objects:
with open('rDR7200kLCsrarf.pkl') as f:  # Python 3: open(..., 'rb')
    datR = pickle.load(f)
datR


Out[17]:
array([[ 0.37169 ,  2.992549,  0.248026],
       [ 0.312739,  2.737752,  0.711236],
       [ 0.345944,  2.367643,  0.341962],
       ..., 
       [ 0.311286,  3.228052,  0.199013],
       [ 0.283066,  3.218296,  0.052979],
       [ 0.188215,  4.056418,  0.170602]])

In [18]:
%%time
BT_RLCo = BallTree(datR,metric='pyfunc',func=LCometric,leaf_size=5) 

with open('BTR200kdatsLCo.pkl', 'w') as f:
    pickle.dump(BT_RLCo,f)


CPU times: user 10.9 s, sys: 83.5 ms, total: 11 s
Wall time: 11.1 s

In [19]:
with open('BTR200kdatsLCo.pkl') as f:
    BTRLCo = pickle.load(f)
    
BTRLCo


Out[19]:
<sklearn.neighbors.ball_tree.BinaryTree at 0x7fa07db1a750>

In [20]:
%%time
start_time=time.time()
counts_RR=BTRLCo.two_point_correlation(datR,binsq)
print counts_RR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('BTR200kcRRLCo.pkl', 'w') as f:
    pickle.dump(counts_RR,f)

with open('BTR200kcRRLCo.pkl') as f:
    counts_RR = pickle.load(f)
    
counts_RR


[    202790     754913    4271170   13651425   31895794   61592027
  104974351  164237406  241743552  339760409  460032675  603903245
  771837674  964350605 1182622758 1428111723]
Total run time:
2786.71320391
CPU times: user 41min 57s, sys: 3.14 s, total: 42min
Wall time: 46min 26s

In [21]:
counts_RR


Out[21]:
array([    202790,     754913,    4271170,   13651425,   31895794,
         61592027,  104974351,  164237406,  241743552,  339760409,
        460032675,  603903245,  771837674,  964350605, 1182622758,
       1428111723])

In [22]:
RR=np.diff(counts_RR)

In [23]:
RR


Out[23]:
array([   552123,   3516257,   9380255,  18244369,  29696233,  43382324,
        59263055,  77506146,  98016857, 120272266, 143870570, 167934429,
       192512931, 218272153, 245488965])

In [24]:
plt.plot(bins[1:len(bins)],RR,'bo-')


Out[24]:
[<matplotlib.lines.Line2D at 0x112e91710>]

In [25]:
RR_zero = (RR == 0)
RR[RR_zero] = 1

In [41]:
%%time
start_time=time.time()
counts_DR=BTRLCo.two_point_correlation(dat,binsq)
print counts_DR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('BTR200kcDRLCo.pkl', 'w') as f:
    pickle.dump(counts_DR,f)


[      578    262224   1988575   6641678  15715747  30491972  52144012
  81791765 120708346 170063804 230750557 303568424 388721898 486531864
 597642497 722951924]
Total run time:
1290.79253507
CPU times: user 21min 22s, sys: 3.73 s, total: 21min 26s
Wall time: 21min 30s

In [42]:
with open('BTR200kcDRLCo.pkl') as f:
    counts_DR = pickle.load(f)
    
counts_DR


Out[42]:
array([      578,    262224,   1988575,   6641678,  15715747,  30491972,
        52144012,  81791765, 120708346, 170063804, 230750557, 303568424,
       388721898, 486531864, 597642497, 722951924])

In [28]:
DR=np.diff(counts_DR)

In [29]:
DR


Out[29]:
array([   261646,   1726351,   4653103,   9074069,  14776225,  21652040,
        29647753,  38916581,  49355458,  60686753,  72817867,  85153474,
        97809966, 111110633, 125309427])

In [30]:
corrells=(4.0 * DD - 4.0 * DR + RR) / RR

In [31]:
corrells


Out[31]:
array([ 1.49979443,  0.30683224,  0.09423656,  0.03796684,  0.03131242,
        0.03766981,  0.04492106,  0.04113333,  0.03048901,  0.02130009,
        0.0117044 ,  0.01104806,  0.01397325,  0.0150076 ,  0.01119741])

In [32]:
plt.plot(bins[1:len(bins)],corrells,'go-')


Out[32]:
[<matplotlib.lines.Line2D at 0x114678f50>]

In [33]:
plt.plot(bins[1:len(bins)],bins[1:len(bins)]*bins[1:len(bins)]*corrells*(c*1e-5)**2,'go-')


Out[33]:
[<matplotlib.lines.Line2D at 0x114bd8b90>]

In [34]:
plt.plot(bins[2:len(bins)],bins[2:len(bins)]*bins[2:len(bins)]*corrells[1:len(bins)]*(c*1e-5)**2,'go-')


Out[34]:
[<matplotlib.lines.Line2D at 0x11a0612d0>]

In [35]:
plt.plot(bins[2:len(bins)],corrells[1:len(bins)],'go-')


Out[35]:
[<matplotlib.lines.Line2D at 0x11a1768d0>]

In [36]:
plt.plot(bins[2:len(bins)],corrells[1:len(bins)],'go-')
plt.savefig("correl2xlsLCo.pdf")



In [37]:
plt.plot(bins[2:len(bins)]*c/1e5,corrells[1:len(bins)],'bo-')
plt.savefig("correl2x1lsLCo.pdf")



In [38]:
plt.yscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')
plt.savefig("correllsfiglogLCo.pdf")



In [39]:
plt.yscale('log')
plt.plot(bins[2:len(bins)]*c/1e5,corrells[1:len(bins)],'ro-')
plt.savefig("correllslog2xLCo.pdf")



In [40]:
plt.yscale('log')
plt.xscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')
plt.savefig("correllsloglogLCo.pdf")