Correlation function of DR12G SDSS CMASS Catalog

First import all the modules such as healpy and astropy needed for analyzing the structure


In [1]:
import healpix_util as hu
import astropy as ap
import numpy as np
from astropy.io import fits
from astropy.table import Table
import astropy.io.ascii as ascii
from astropy.io import fits
from astropy.constants import c
import matplotlib.pyplot as plt
import math as m
from math import pi
#from scipy.constants import c
import scipy.special as sp
from astroML.decorators import pickle_results
from scipy import integrate
import warnings
from sklearn.neighbors import BallTree
import pickle
import multiprocessing as mp
import time
from cython_metric import *
from lcdmmetric import *
from progressbar import *
from tqdm import *
from functools import partial
import pymangle
#from astroML.datasets import fetch_sdss_specgals
#from astroML.correlation import bootstrap_two_point_angular
%matplotlib inline

Read the data file (taken from http://cosmo.nyu.edu/~eak306/SDSS-LRG.html ) converted to ascii with comoving distance etc. in V01 reading from pkl files for faster read


In [4]:
data=ascii.read("./output/dr12gcmnsrarf.dat")

In [5]:
data


Out[5]:
<Table length=618806>
zradecsrardecr
float64float64float64float64float64float64
0.54253129.17618848.9464990.4730782.254550.854278
0.399682117.41696339.2767590.362022.0493130.685509
0.537702116.91272439.4433110.4694762.0405120.688416
0.519172116.95017239.4907690.4555522.0411660.689244
0.543191117.52847140.1764930.4735712.0512590.701212
0.589608123.81615946.6367840.507672.1610.813965
0.548197127.60127749.7759370.4772932.2270620.868754
0.555224122.81641843.9408640.4825012.1435510.766913
0.380021122.93780444.0331840.3459962.1456690.768524
0.562535123.13237844.2008120.4878962.1490650.77145
..................
0.482233225.49998458.3616070.4273363.9357171.018602
0.476738172.85207360.0851870.4230853.0168381.048684
0.474484173.44870860.3325440.4213393.0272511.053002
0.456862173.66728260.1818360.4075973.0310661.050371
0.471463174.14012460.819710.4189923.0393191.061504
0.526395213.81728764.4620380.4609983.7318161.125075
0.479835215.48384663.981420.4254833.7609031.116686
0.448259218.30272763.1587470.4008373.8101011.102328
0.473205216.81682964.0369890.4203463.7841681.117656
0.485066215.16677965.0414420.4295223.7553691.135187

In [6]:
data.remove_column('z')
data.remove_column('ra')
data.remove_column('dec')

In [7]:
data


Out[7]:
<Table length=618806>
srardecr
float64float64float64
0.4730782.254550.854278
0.362022.0493130.685509
0.4694762.0405120.688416
0.4555522.0411660.689244
0.4735712.0512590.701212
0.507672.1610.813965
0.4772932.2270620.868754
0.4825012.1435510.766913
0.3459962.1456690.768524
0.4878962.1490650.77145
.........
0.4273363.9357171.018602
0.4230853.0168381.048684
0.4213393.0272511.053002
0.4075973.0310661.050371
0.4189923.0393191.061504
0.4609983.7318161.125075
0.4254833.7609031.116686
0.4008373.8101011.102328
0.4203463.7841681.117656
0.4295223.7553691.135187

In [8]:
rs=np.array(data['s'])
rrar=np.array(data['rar'])
rdecr=np.array(data['decr'])

In [9]:
dat=np.array([rs,rrar,rdecr])

In [10]:
dat


Out[10]:
array([[ 0.473078,  0.36202 ,  0.469476, ...,  0.400837,  0.420346,
         0.429522],
       [ 2.25455 ,  2.049313,  2.040512, ...,  3.810101,  3.784168,
         3.755369],
       [ 0.854278,  0.685509,  0.688416, ...,  1.102328,  1.117656,
         1.135187]])

In [12]:
dat.reshape(3,len(data))


Out[12]:
array([[ 0.473078,  0.36202 ,  0.469476, ...,  0.400837,  0.420346,
         0.429522],
       [ 2.25455 ,  2.049313,  2.040512, ...,  3.810101,  3.784168,
         3.755369],
       [ 0.854278,  0.685509,  0.688416, ...,  1.102328,  1.117656,
         1.135187]])

In [13]:
dat=dat.transpose()

In [14]:
dat


Out[14]:
array([[ 0.473078,  2.25455 ,  0.854278],
       [ 0.36202 ,  2.049313,  0.685509],
       [ 0.469476,  2.040512,  0.688416],
       ..., 
       [ 0.400837,  3.810101,  1.102328],
       [ 0.420346,  3.784168,  1.117656],
       [ 0.429522,  3.755369,  1.135187]])

In [15]:
# Saving the objects:
with open('dr12gcmnLCDM.pkl', 'w') as f:  # Python 3: open(..., 'wb')
    pickle.dump(dat, f)

In [3]:
# Getting back the objects:
with open('../pkl/dr12gcmnLCDM.pkl') as f:  # Python 3: open(..., 'rb')
    dat = pickle.load(f)
dat


Out[3]:
array([[ 0.473078,  2.25455 ,  0.854278],
       [ 0.36202 ,  2.049313,  0.685509],
       [ 0.469476,  2.040512,  0.688416],
       ..., 
       [ 0.400837,  3.810101,  1.102328],
       [ 0.420346,  3.784168,  1.117656],
       [ 0.429522,  3.755369,  1.135187]])

In [4]:
# Getting back the objects:
with open('../pkl/rdr12gcmnLCDM.pkl') as f:  # Python 3: open(..., 'rb')
    datR = pickle.load(f)
datR


Out[4]:
array([[ 0.473078,  4.351359,  0.720315],
       [ 0.36202 ,  2.647925,  0.308903],
       [ 0.469475,  3.890172,  1.066587],
       ..., 
       [ 0.400837,  2.035658,  0.814292],
       [ 0.420346,  2.734821,  1.023698],
       [ 0.429522,  4.468994,  0.481731]])

In [5]:
LCDMmetricsq(dat[0],datR[1])


Out[5]:
0.0783923333355121

In [21]:
%%time
BT_D = BallTree(dat,metric='pyfunc',func=LCDMmetricsq) 

with open('BTDdr12gcmnsLCDM.pkl', 'w') as f:
    pickle.dump(BT_D,f)


CPU times: user 31.4 s, sys: 513 ms, total: 31.9 s
Wall time: 32.5 s

In [8]:
with open('../pkl/BTDdr12gcmnsLCDM.pkl') as f:
    BTD = pickle.load(f)
    
BTD


Out[8]:
<sklearn.neighbors.ball_tree.BinaryTree at 0x7fd7945e16c0>

In [16]:
bins=np.arange(0.004,0.084,0.004)
bins


Out[16]:
array([ 0.004,  0.008,  0.012,  0.016,  0.02 ,  0.024,  0.028,  0.032,
        0.036,  0.04 ,  0.044,  0.048,  0.052,  0.056,  0.06 ,  0.064,
        0.068,  0.072,  0.076,  0.08 ])

In [17]:
binsq=bins**2
binsq


Out[17]:
array([  1.60000000e-05,   6.40000000e-05,   1.44000000e-04,
         2.56000000e-04,   4.00000000e-04,   5.76000000e-04,
         7.84000000e-04,   1.02400000e-03,   1.29600000e-03,
         1.60000000e-03,   1.93600000e-03,   2.30400000e-03,
         2.70400000e-03,   3.13600000e-03,   3.60000000e-03,
         4.09600000e-03,   4.62400000e-03,   5.18400000e-03,
         5.77600000e-03,   6.40000000e-03])

In [18]:
%%time
start_time=time.time()
counts_DD=BTD.two_point_correlation(dat,binsq)
print counts_DD
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('../pkl/BTDdr12gcmnsDDLCDM.pkl', 'w') as f:
    pickle.dump(counts_DD,f)

with open('../pkl/BTDdr12gcmnsDDLCDM.pkl') as f:
    counts_DD = pickle.load(f)
    
counts_DD


[   3254425   12392190   32334164   67664922  123025144  204096285
  317424713  468737219  663426786  905172030 1197901623 1545301400
 1950639388 2417101102 2947350432 3543620412 4209458629 4944876350
 5753245695 6636628986]
Total run time:
31638.8450139
CPU times: user 8h 37min 31s, sys: 1min 32s, total: 8h 39min 3s
Wall time: 8h 47min 18s

In [19]:
DD=np.diff(counts_DD)
DD


Out[19]:
array([  9137765,  19941974,  35330758,  55360222,  81071141, 113328428,
       151312506, 194689567, 241745244, 292729593, 347399777, 405337988,
       466461714, 530249330, 596269980, 665838217, 735417721, 808369345,
       883383291])

In [ ]:


In [20]:
%%time
start_time=time.time()
counts_DR=BTD.two_point_correlation(datR,binsq)
print counts_DR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('../pkl/BTDdr12gcmnsDRLCDM.pkl', 'w') as f:
    pickle.dump(counts_DR,f)

with open('../pkl/BTDdr12gcmnsDRLCDM.pkl') as f:
    counts_DR = pickle.load(f)
    
counts_DR


[    736505    5675630   18546250   42674926   81353880  138446090
  218612406  326088177  464442075  636625801  845383147 1093223208
 1382369815 1715275271 2093553664 2519357481 2993768066 3518471096
 4094492719 4723198652]
Total run time:
15914.482981
CPU times: user 3h 36min 44s, sys: 31.6 s, total: 3h 37min 15s
Wall time: 4h 25min 14s

In [21]:
counts_DR


Out[21]:
array([    736505,    5675630,   18546250,   42674926,   81353880,
        138446090,  218612406,  326088177,  464442075,  636625801,
        845383147, 1093223208, 1382369815, 1715275271, 2093553664,
       2519357481, 2993768066, 3518471096, 4094492719, 4723198652])

In [22]:
DR=np.diff(counts_DR)

In [23]:
DR


Out[23]:
array([  4939125,  12870620,  24128676,  38678954,  57092210,  80166316,
       107475771, 138353898, 172183726, 208757346, 247840061, 289146607,
       332905456, 378278393, 425803817, 474410585, 524703030, 576021623,
       628705933])

In [24]:
with open('../pkl/rBTDdr12gcmnsDDLCDM.pkl') as f:
    counts_RR = pickle.load(f)
    
counts_RR


Out[24]:
array([   1358459,    6273635,   19012588,   42810633,   80777592,
        136780874,  215684501,  321583256,  457878569,  627472414,
        832603166, 1075487654, 1358410654, 1682811804, 2050435215,
       2463042572, 2922567634, 3430652477, 3987726170, 4593614344])

In [25]:
RR=np.diff(counts_DR)

In [26]:
corrells=(DD+RR-2.0*DR)/RR

In [27]:
plt.plot(bins[1:len(bins)],corrells,'bo-')


Out[27]:
[<matplotlib.lines.Line2D at 0x11ad47750>]

In [31]:
plt.yscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')


Out[31]:
[<matplotlib.lines.Line2D at 0x11b4e5210>]

In [33]:
plt.yscale('log')
plt.xscale('log')
plt.plot(bins[3:len(bins)]*c/1e5,corrells[2:len(corrells)],'bo-')


Out[33]:
[<matplotlib.lines.Line2D at 0x11b8e0890>]

In [28]:
corrells


Out[28]:
array([ 0.8500777 ,  0.54941829,  0.46426426,  0.43127505,  0.42000355,
        0.41366641,  0.40787551,  0.40718527,  0.40399589,  0.40224811,
        0.40170954,  0.40184245,  0.40118375,  0.40174364,  0.40033968,
        0.40350624,  0.40158848,  0.40336632,  0.40508184])

In [34]:
err=(1.0+corrells)/np.sqrt(DD)

In [35]:
err


Out[35]:
array([  6.12026145e-04,   3.46964151e-04,   2.46344565e-04,
         1.92364173e-04,   1.57708931e-04,   1.32793710e-04,
         1.14452911e-04,   1.00850938e-04,   9.02998165e-05,
         8.19580369e-05,   7.52044017e-05,   6.96290634e-05,
         6.48764517e-05,   6.08735359e-05,   5.73471609e-05,
         5.43913689e-05,   5.16836853e-05,   4.93589749e-05,
         4.72745095e-05])

In [67]:
#plt.errorbar(bins[1:len(bins)]*c/1e5,corrells,yerr=err)
#plt.plot(bins[1:len(bins)]*c/1e5,err,'ro-')
plt.errorbar(c.value/1e5*bins[1:len(bins)],corrells,yerr=err,fmt='ro-')


Out[67]:
<Container object of 3 artists>

In [45]:
bins[1:len(bins)]


Out[45]:
array([ 0.008,  0.012,  0.016,  0.02 ,  0.024,  0.028,  0.032,  0.036,
        0.04 ,  0.044,  0.048,  0.052,  0.056,  0.06 ,  0.064,  0.068,
        0.072,  0.076,  0.08 ])

In [58]:
plt.yscale('log')
plt.xscale('log')
plt.errorbar(bins[1:len(bins)],corrells,yerr=err,fmt='bo-')


Out[58]:
<Container object of 3 artists>

In [66]:
plt.yscale('log')
plt.xscale('log')
plt.errorbar(c.value/1e5*bins[3:len(bins)],corrells[2:len(corrells)],yerr=err[2:len(err)],fmt='bo-')


Out[66]:
<Container object of 3 artists>

In [75]:
#plt.yscale('log')
plt.xscale('log')
plt.errorbar(c.value/1e5*bins[3:len(bins)],corrells[2:len(corrells)],yerr=err[2:len(err)],fmt='ro-')


Out[75]:
<Container object of 3 artists>

In [73]:
plt.yscale('log')
plt.xscale('log')
plt.errorbar(c.value/1e5*bins[2:len(bins)],(bins[2:len(bins)])**1.5*(corrells[1:len(corrells)]),yerr=err[1:len(err)],fmt='bo-')


Out[73]:
<Container object of 3 artists>

In [ ]:


In [64]:
c


Out[64]:
$2.9979246 \times 10^{8} \; \mathrm{\frac{m}{s}}$

In [65]:
c.value


Out[65]:
299792458.0

In [44]:
plt.plot(bins[1:len(bins)],correl,'go-')


Out[44]:
[<matplotlib.lines.Line2D at 0x113c4c5d0>]

In [39]:
help(plt.errorbar)


Help on function errorbar in module matplotlib.pyplot:

errorbar(x, y, yerr=None, xerr=None, fmt=u'', ecolor=None, elinewidth=None, capsize=None, barsabove=False, lolims=False, uplims=False, xlolims=False, xuplims=False, errorevery=1, capthick=None, hold=None, data=None, **kwargs)
    Plot an errorbar graph.
    
    Plot x versus y with error deltas in yerr and xerr.
    Vertical errorbars are plotted if yerr is not None.
    Horizontal errorbars are plotted if xerr is not None.
    
    x, y, xerr, and yerr can all be scalars, which plots a
    single error bar at x, y.
    
    Parameters
    ----------
    x : scalar
    y : scalar
    
    xerr/yerr : scalar or array-like, shape(n,1) or shape(2,n), optional
        If a scalar number, len(N) array-like object, or an Nx1
        array-like object, errorbars are drawn at +/-value relative
        to the data. Default is None.
    
        If a sequence of shape 2xN, errorbars are drawn at -row1
        and +row2 relative to the data.
    
    fmt : plot format string, optional, default: None
        The plot format symbol. If fmt is 'none' (case-insensitive),
        only the errorbars are plotted.  This is used for adding
        errorbars to a bar plot, for example.  Default is '',
        an empty plot format string; properties are
        then identical to the defaults for :meth:`plot`.
    
    ecolor : mpl color, optional, default: None
        A matplotlib color arg which gives the color the errorbar lines;
        if None, use the color of the line connecting the markers.
    
    elinewidth : scalar, optional, default: None
        The linewidth of the errorbar lines. If None, use the linewidth.
    
    capsize : scalar, optional, default: None
        The length of the error bar caps in points; if None, it will
        take the value from ``errorbar.capsize``
        :data:`rcParam<matplotlib.rcParams>`.
    
    capthick : scalar, optional, default: None
        An alias kwarg to markeredgewidth (a.k.a. - mew). This
        setting is a more sensible name for the property that
        controls the thickness of the error bar cap in points. For
        backwards compatibility, if mew or markeredgewidth are given,
        then they will over-ride capthick. This may change in future
        releases.
    
    barsabove : bool, optional, default: False
        if True , will plot the errorbars above the plot
        symbols. Default is below.
    
    lolims / uplims / xlolims / xuplims : bool, optional, default:None
        These arguments can be used to indicate that a value gives
        only upper/lower limits. In that case a caret symbol is
        used to indicate this. lims-arguments may be of the same
        type as *xerr* and *yerr*.  To use limits with inverted
        axes, :meth:`set_xlim` or :meth:`set_ylim` must be called
        before :meth:`errorbar`.
    
    errorevery : positive integer, optional, default:1
        subsamples the errorbars. e.g., if errorevery=5, errorbars for
        every 5-th datapoint will be plotted. The data plot itself still
        shows all data points.
    
    Returns
    -------
    plotline : :class:`~matplotlib.lines.Line2D` instance
        x, y plot markers and/or line
    caplines : list of :class:`~matplotlib.lines.Line2D` instances
        error bar cap
    barlinecols : list of :class:`~matplotlib.collections.LineCollection`
        horizontal and vertical error ranges.
    
    Other Parameters
    ----------------
    kwargs : All other keyword arguments are passed on to the plot
        command for the markers. For example, this code makes big red
        squares with thick green edges::
    
            x,y,yerr = rand(3,10)
            errorbar(x, y, yerr, marker='s', mfc='red',
                     mec='green', ms=20, mew=4)
    
        where mfc, mec, ms and mew are aliases for the longer
        property names, markerfacecolor, markeredgecolor, markersize
        and markeredgewidth.
    
        valid kwargs for the marker properties are
    
          agg_filter: unknown
      alpha: float (0.0 transparent through 1.0 opaque) 
      animated: [True | False] 
      antialiased or aa: [True | False] 
      axes: an :class:`~matplotlib.axes.Axes` instance 
      clip_box: a :class:`matplotlib.transforms.Bbox` instance 
      clip_on: [True | False] 
      clip_path: [ (:class:`~matplotlib.path.Path`, :class:`~matplotlib.transforms.Transform`) | :class:`~matplotlib.patches.Patch` | None ] 
      color or c: any matplotlib color 
      contains: a callable function 
      dash_capstyle: ['butt' | 'round' | 'projecting'] 
      dash_joinstyle: ['miter' | 'round' | 'bevel'] 
      dashes: sequence of on/off ink in points 
      drawstyle: ['default' | 'steps' | 'steps-pre' | 'steps-mid' | 'steps-post'] 
      figure: a :class:`matplotlib.figure.Figure` instance 
      fillstyle: ['full' | 'left' | 'right' | 'bottom' | 'top' | 'none'] 
      gid: an id string 
      label: string or anything printable with '%s' conversion. 
      linestyle or ls: ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | ``'-'`` | ``'--'`` | ``'-.'`` | ``':'`` | ``'None'`` | ``' '`` | ``''``]
      linewidth or lw: float value in points 
      marker: :mod:`A valid marker style <matplotlib.markers>`
      markeredgecolor or mec: any matplotlib color 
      markeredgewidth or mew: float value in points 
      markerfacecolor or mfc: any matplotlib color 
      markerfacecoloralt or mfcalt: any matplotlib color 
      markersize or ms: float 
      markevery: [None | int | length-2 tuple of int | slice | list/array of int | float | length-2 tuple of float]
      path_effects: unknown
      picker: float distance in points or callable pick function ``fn(artist, event)`` 
      pickradius: float distance in points 
      rasterized: [True | False | None] 
      sketch_params: unknown
      snap: unknown
      solid_capstyle: ['butt' | 'round' |  'projecting'] 
      solid_joinstyle: ['miter' | 'round' | 'bevel'] 
      transform: a :class:`matplotlib.transforms.Transform` instance 
      url: a url string 
      visible: [True | False] 
      xdata: 1D array 
      ydata: 1D array 
      zorder: any number 
    
    Examples
    --------
    .. plot:: mpl_examples/statistics/errorbar_demo.py
    
    .. note::
        In addition to the above described arguments, this function can take a
        **data** keyword argument. If such a **data** argument is given, the
        following arguments are replaced by **data[<arg>]**:
    
        * All arguments with the following names: 'x', 'xerr', 'y', 'yerr'.


In [45]:
plt.plot(bins[1:len(bins)],bins[1:len(bins)]*bins[1:len(bins)]*correl,'go-')


Out[45]:
[<matplotlib.lines.Line2D at 0x113d5fed0>]

In [46]:
plt.plot(bins[2:len(bins)],bins[2:len(bins)]*bins[2:len(bins)]*correl[1:len(bins)],'go-')


Out[46]:
[<matplotlib.lines.Line2D at 0x113e8ba90>]

In [47]:
plt.plot(bins[2:len(bins)],correl[1:len(bins)],'go-')


Out[47]:
[<matplotlib.lines.Line2D at 0x11054ac90>]

In [48]:
plt.plot(bins[2:len(bins)],correl[1:len(bins)],'go-')
plt.savefig("correl2x.pdf")



In [53]:
plt.plot(bins[2:len(bins)]*c/1e5,correl[1:len(bins)],'bo-')
plt.savefig("correl2x1.pdf")



In [52]:
plt.yscale('log')
plt.plot(bins[2:len(bins)]*c/1e5,correl[1:len(bins)],'ro-')


Out[52]:
[<matplotlib.lines.Line2D at 0x1142e1f10>]

In [54]:
%%time
start_time=time.time()
counts_DR=BTR.two_point_correlation(dat,bins)
print counts_DR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('BTR200kcDRLCDM.pkl', 'w') as f:
    pickle.dump(counts_DR,f)


[        0    236152   1835676   6024256  13930163  26635863  45156095
  70428809 103344608 144719046 195306049 255767295 326645884 408522145
 501808240 606822439]
Total run time:
5362.83386302
CPU times: user 1h 29min 5s, sys: 7.79 s, total: 1h 29min 13s
Wall time: 1h 29min 23s

In [55]:
with open('BTR200kcDRLCDM.pkl') as f:
    counts_DR = pickle.load(f)
    
counts_DR


Out[55]:
array([        0,    236152,   1835676,   6024256,  13930163,  26635863,
        45156095,  70428809, 103344608, 144719046, 195306049, 255767295,
       326645884, 408522145, 501808240, 606822439])

In [56]:
DR=np.diff(counts_DR)

In [57]:
corrells=(4.0 * DD - 4.0 * DR + RR) / RR

In [58]:
DR


Out[58]:
array([   236152,   1599524,   4188580,   7905907,  12705700,  18520232,
        25272714,  32915799,  41374438,  50587003,  60461246,  70878589,
        81876261,  93286095, 105014199])

In [59]:
corrells


Out[59]:
array([ 1.7052646 ,  0.29978504,  0.10500558,  0.05797354,  0.03719729,
        0.02859312,  0.02948462,  0.02686032,  0.02325382,  0.02064449,
        0.01721195,  0.01703756,  0.01286895,  0.01164703,  0.01089049])

In [60]:
plt.plot(bins[1:len(bins)],corrells,'go-')


Out[60]:
[<matplotlib.lines.Line2D at 0x114929c10>]

In [61]:
plt.plot(bins[1:len(bins)],bins[1:len(bins)]*bins[1:len(bins)]*corrells*(c*1e-5)**2,'go-')


Out[61]:
[<matplotlib.lines.Line2D at 0x1143fea50>]

In [72]:
plt.yscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')
plt.savefig("correllsfiglog.pdf")



In [71]:
plt.yscale('log')
plt.plot(bins[2:len(bins)]*c/1e5,corrells[1:len(bins)],'ro-')
plt.savefig("correllslog2x.pdf")



In [73]:
plt.yscale('log')
plt.xscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')
plt.savefig("correllsloglog.pdf")