Correlation function of DR72 SDSS VAGC Catalog

First import all the modules such as healpy and astropy needed for analyzing the structure


In [10]:
import healpix_util as hu
import astropy as ap
import numpy as np
from astropy.io import fits
from astropy.table import Table
import astropy.io.ascii as ascii
from astropy.io import fits
from astropy.constants import c
import matplotlib.pyplot as plt
import math as m
from math import pi
#from scipy.constants import c
import scipy.special as sp
from astroML.decorators import pickle_results
from scipy import integrate
import warnings
from sklearn.neighbors import BallTree
import pickle
import multiprocessing as mp
import time
from lcck0metric import *
from progressbar import *
from tqdm import *
from functools import partial
import pymangle
#from astroML.datasets import fetch_sdss_specgals
#from astroML.correlation import bootstrap_two_point_angular
%matplotlib inline

In [2]:
# Getting back the objects:
with open('datsLCf.pkl') as f:  # Python 3: open(..., 'rb')
    dat = pickle.load(f)
dat


Out[2]:
array([[ 0.37169 ,  0.980185, -0.003863],
       [ 0.312739,  1.016617,  0.003776],
       [ 0.345944,  0.950251,  0.010821],
       ..., 
       [ 0.290404,  2.829918,  0.173514],
       [ 0.313432,  2.830242,  0.172112],
       [ 0.199924,  2.831786,  0.173661]])

In [3]:
bins=np.arange(0.,0.08,0.005)

In [4]:
print bins


[ 0.     0.005  0.01   0.015  0.02   0.025  0.03   0.035  0.04   0.045
  0.05   0.055  0.06   0.065  0.07   0.075]

In [5]:
Nbins=len(bins)

In [6]:
Nbins


Out[6]:
16

In [7]:
binsq=(bins*0.007)**2

In [8]:
binsq


Out[8]:
array([  0.00000000e+00,   1.22500000e-09,   4.90000000e-09,
         1.10250000e-08,   1.96000000e-08,   3.06250000e-08,
         4.41000000e-08,   6.00250000e-08,   7.84000000e-08,
         9.92250000e-08,   1.22500000e-07,   1.48225000e-07,
         1.76400000e-07,   2.07025000e-07,   2.40100000e-07,
         2.75625000e-07])

In [11]:
LCck0metric(dat[0],dat[1])


Out[11]:
1.7824210044123225e-07

In [12]:
%%time
BT_DLCc = BallTree(dat,metric='pyfunc',func=LCck0metric,leaf_size=5) 

with open('BTDdatsLCck0.pkl', 'w') as f:
    pickle.dump(BT_DLCc,f)


CPU times: user 6.54 s, sys: 60.4 ms, total: 6.6 s
Wall time: 6.69 s

In [13]:
with open('BTDdatsLCck0.pkl') as f:
    BTDLCc = pickle.load(f)
    
BTDLCc


Out[13]:
<sklearn.neighbors.ball_tree.BinaryTree at 0x7fa57502c410>

In [14]:
%%time
start_time=time.time()
counts_DD=BTDLCc.two_point_correlation(dat,binsq)
print counts_DD
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('BTDcDDLCck0.pkl', 'w') as f:
    pickle.dump(counts_DD,f)


[        0    425339   1537983   4060819   8740339  16317084  27524164
  43014629  63345540  88938287 120192055 157458958 201089427 251439502
 308796226 373418518]
Total run time:
1697.91238689
CPU times: user 18min 40s, sys: 1.52 s, total: 18min 42s
Wall time: 28min 17s

In [15]:
with open('BTDcDDLCck0.pkl') as f:
    counts_DD = pickle.load(f)
    
counts_DD


Out[15]:
array([        0,    425339,   1537983,   4060819,   8740339,  16317084,
        27524164,  43014629,  63345540,  88938287, 120192055, 157458958,
       201089427, 251439502, 308796226, 373418518])

In [16]:
DD=np.diff(counts_DD)

In [17]:
DD


Out[17]:
array([  425339,  1112644,  2522836,  4679520,  7576745, 11207080,
       15490465, 20330911, 25592747, 31253768, 37266903, 43630469,
       50350075, 57356724, 64622292])

In [18]:
plt.plot(bins[1:len(bins)],DD,'ro-')


Out[18]:
[<matplotlib.lines.Line2D at 0x10d973110>]

BallTree.two_point_correlation works almost 10 times faster! with leaf_size=5 Going with it to the random catalog


In [19]:
# Getting back the objects:
with open('rDR7200kLCsrarf.pkl') as f:  # Python 3: open(..., 'rb')
    datR = pickle.load(f)
datR


Out[19]:
array([[ 0.37169 ,  2.992549,  0.248026],
       [ 0.312739,  2.737752,  0.711236],
       [ 0.345944,  2.367643,  0.341962],
       ..., 
       [ 0.311286,  3.228052,  0.199013],
       [ 0.283066,  3.218296,  0.052979],
       [ 0.188215,  4.056418,  0.170602]])

In [20]:
%%time
BT_RLCc = BallTree(datR,metric='pyfunc',func=LCck0metric,leaf_size=5) 

with open('BTR200kdatsLCck0.pkl', 'w') as f:
    pickle.dump(BT_RLCc,f)


CPU times: user 13.4 s, sys: 107 ms, total: 13.5 s
Wall time: 13.6 s

In [21]:
with open('BTR200kdatsLCck0.pkl') as f:
    BTRLCc = pickle.load(f)
    
BTRLCc


Out[21]:
<sklearn.neighbors.ball_tree.BinaryTree at 0x7fa5719057c0>

In [22]:
%%time
start_time=time.time()
counts_RR=BTRLCc.two_point_correlation(datR,binsq)
print counts_RR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('BTR200kcRRLCck0.pkl', 'w') as f:
    pickle.dump(counts_RR,f)


[         0     728533    4221283   13572913   31788620   61461112
  104820551  164058738  241540965  339531069  459785854  603638312
  771564159  964064012 1182327754 1427804504]
Total run time:
2999.19329596
CPU times: user 49min 46s, sys: 7.32 s, total: 49min 53s
Wall time: 49min 59s

In [23]:
with open('BTR200kcRRLCck0.pkl') as f:
    counts_RR = pickle.load(f)
    
counts_RR


Out[23]:
array([         0,     728533,    4221283,   13572913,   31788620,
         61461112,  104820551,  164058738,  241540965,  339531069,
        459785854,  603638312,  771564159,  964064012, 1182327754,
       1427804504])

In [24]:
RR=np.diff(counts_RR)

In [25]:
RR


Out[25]:
array([   728533,   3492750,   9351630,  18215707,  29672492,  43359439,
        59238187,  77482227,  97990104, 120254785, 143852458, 167925847,
       192499853, 218263742, 245476750])

In [26]:
plt.plot(bins[1:len(bins)],RR,'bo-')


Out[26]:
[<matplotlib.lines.Line2D at 0x10d8a1550>]

In [27]:
RR_zero = (RR == 0)
RR[RR_zero] = 1

In [28]:
%%time
start_time=time.time()
counts_DR=BTRLCc.two_point_correlation(dat,binsq)
print counts_DR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('BTR200kcDRLCck0.pkl', 'w') as f:
    pickle.dump(counts_DR,f)


[        0    248696   1963312   6600612  15660169  30422390  52063315
  81698582 120600538 169941397 230622094 303428099 388575295 486380096
 597484697 722785364]
Total run time:
1472.67572522
CPU times: user 24min 30s, sys: 4.68 s, total: 24min 35s
Wall time: 24min 32s

In [29]:
with open('BTR200kcDRLCck0.pkl') as f:
    counts_DR = pickle.load(f)
    
counts_DR


Out[29]:
array([        0,    248696,   1963312,   6600612,  15660169,  30422390,
        52063315,  81698582, 120600538, 169941397, 230622094, 303428099,
       388575295, 486380096, 597484697, 722785364])

In [30]:
DR=np.diff(counts_DR)

In [31]:
DR


Out[31]:
array([   248696,   1714616,   4637300,   9059557,  14762221,  21640925,
        29635267,  38901956,  49340859,  60680697,  72806005,  85147196,
        97804801, 111104601, 125300667])

In [32]:
corrells=(4.0 * DD - 4.0 * DR + RR) / RR

In [33]:
corrells


Out[33]:
array([ 1.96985586,  0.31060397,  0.09557414,  0.03818457,  0.03136198,
        0.03745572,  0.04488623,  0.04127459,  0.03059142,  0.0211806 ,
        0.01179021,  0.01107   ,  0.01392702,  0.01499211,  0.01125667])

In [34]:
plt.plot(bins[1:len(bins)],corrells,'go-')


Out[34]:
[<matplotlib.lines.Line2D at 0x10f79ecd0>]

In [35]:
plt.plot(bins[1:len(bins)],bins[1:len(bins)]*bins[1:len(bins)]*corrells*(c*1e-5)**2,'go-')


Out[35]:
[<matplotlib.lines.Line2D at 0x10f83fe50>]

In [36]:
plt.plot(bins[2:len(bins)],bins[2:len(bins)]*bins[2:len(bins)]*corrells[1:len(bins)]*(c*1e-5)**2,'go-')


Out[36]:
[<matplotlib.lines.Line2D at 0x112632cd0>]

In [37]:
plt.plot(bins[2:len(bins)],corrells[1:len(bins)],'go-')


Out[37]:
[<matplotlib.lines.Line2D at 0x112742d90>]

In [38]:
plt.plot(bins[2:len(bins)],corrells[1:len(bins)],'go-')
plt.savefig("correl2xlsLCck0.pdf")



In [39]:
plt.plot(bins[2:len(bins)]*c/1e5,corrells[1:len(bins)],'bo-')
plt.savefig("correl2x1lsLCck0.pdf")



In [40]:
plt.yscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')
plt.savefig("correllsfiglogLCck0.pdf")



In [41]:
plt.yscale('log')
plt.plot(bins[2:len(bins)]*c/1e5,corrells[1:len(bins)],'ro-')
plt.savefig("correllslog2xLCck0.pdf")



In [42]:
plt.yscale('log')
plt.xscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')
plt.savefig("correllsloglogLCck0.pdf")



In [ ]: