Correlation function of DR72 SDSS VAGC Catalog

First import all the modules such as healpy and astropy needed for analyzing the structure


In [1]:
import healpix_util as hu
import astropy as ap
import numpy as np
from astropy.io import fits
from astropy.table import Table
import astropy.io.ascii as ascii
from astropy.io import fits
from astropy.constants import c
import matplotlib.pyplot as plt
import math as m
from math import pi
import scipy.special as sp
from astroML.decorators import pickle_results
from scipy import integrate
import warnings
from sklearn.neighbors import BallTree
import pickle
import multiprocessing as mp
import time
#from cython_metric import *
from lcdmmetric import *
from progressbar import *
from tqdm import *
from functools import partial
import pymangle
#from astroML.datasets import fetch_sdss_specgals
#from astroML.correlation import bootstrap_two_point_angular
%matplotlib inline

Read the data file (taken from http://cosmo.nyu.edu/~eak306/SDSS-LRG.html ) converted to ascii with comoving distance etc. in V01 reading from pkl files for faster read


In [2]:
# Getting back the objects:
with open('../output/datsLCDM.pkl') as f:  # Python 3: open(..., 'rb')
    dat = pickle.load(f)
dat


Out[2]:
array([[ 0.402352,  0.980185, -0.003863],
       [ 0.335419,  1.016617,  0.003776],
       [ 0.373033,  0.950251,  0.010821],
       ..., 
       [ 0.310267,  2.829918,  0.173514],
       [ 0.336202,  2.830242,  0.172112],
       [ 0.209897,  2.831786,  0.173661]])

In [3]:
bins=np.arange(0.001,0.0801,0.001)

In [4]:
print bins


[ 0.001  0.002  0.003  0.004  0.005  0.006  0.007  0.008  0.009  0.01
  0.011  0.012  0.013  0.014  0.015  0.016  0.017  0.018  0.019  0.02
  0.021  0.022  0.023  0.024  0.025  0.026  0.027  0.028  0.029  0.03
  0.031  0.032  0.033  0.034  0.035  0.036  0.037  0.038  0.039  0.04
  0.041  0.042  0.043  0.044  0.045  0.046  0.047  0.048  0.049  0.05
  0.051  0.052  0.053  0.054  0.055  0.056  0.057  0.058  0.059  0.06
  0.061  0.062  0.063  0.064  0.065  0.066  0.067  0.068  0.069  0.07
  0.071  0.072  0.073  0.074  0.075  0.076  0.077  0.078  0.079  0.08 ]

In [5]:
binsq=bins**2
binsq


Out[5]:
array([  1.00000000e-06,   4.00000000e-06,   9.00000000e-06,
         1.60000000e-05,   2.50000000e-05,   3.60000000e-05,
         4.90000000e-05,   6.40000000e-05,   8.10000000e-05,
         1.00000000e-04,   1.21000000e-04,   1.44000000e-04,
         1.69000000e-04,   1.96000000e-04,   2.25000000e-04,
         2.56000000e-04,   2.89000000e-04,   3.24000000e-04,
         3.61000000e-04,   4.00000000e-04,   4.41000000e-04,
         4.84000000e-04,   5.29000000e-04,   5.76000000e-04,
         6.25000000e-04,   6.76000000e-04,   7.29000000e-04,
         7.84000000e-04,   8.41000000e-04,   9.00000000e-04,
         9.61000000e-04,   1.02400000e-03,   1.08900000e-03,
         1.15600000e-03,   1.22500000e-03,   1.29600000e-03,
         1.36900000e-03,   1.44400000e-03,   1.52100000e-03,
         1.60000000e-03,   1.68100000e-03,   1.76400000e-03,
         1.84900000e-03,   1.93600000e-03,   2.02500000e-03,
         2.11600000e-03,   2.20900000e-03,   2.30400000e-03,
         2.40100000e-03,   2.50000000e-03,   2.60100000e-03,
         2.70400000e-03,   2.80900000e-03,   2.91600000e-03,
         3.02500000e-03,   3.13600000e-03,   3.24900000e-03,
         3.36400000e-03,   3.48100000e-03,   3.60000000e-03,
         3.72100000e-03,   3.84400000e-03,   3.96900000e-03,
         4.09600000e-03,   4.22500000e-03,   4.35600000e-03,
         4.48900000e-03,   4.62400000e-03,   4.76100000e-03,
         4.90000000e-03,   5.04100000e-03,   5.18400000e-03,
         5.32900000e-03,   5.47600000e-03,   5.62500000e-03,
         5.77600000e-03,   5.92900000e-03,   6.08400000e-03,
         6.24100000e-03,   6.40000000e-03])

In [9]:
%%time
BT_D = BallTree(dat,metric='pyfunc',func=LCDMmetricsq,leaf_size=5) 

with open('../pkl/BTDdatsLCDMsq.pkl', 'w') as f:
    pickle.dump(BT_D,f)

with open('../pkl/BTDdatsLCDMsq.pkl') as f:
    BTD = pickle.load(f)
    
BTD


CPU times: user 5.88 s, sys: 105 ms, total: 5.98 s
Wall time: 6.3 s

In [11]:
%%time
start_time=time.time()
counts_DD=BTD.two_point_correlation(dat,binsq)
print counts_DD
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('../pkl/BTDR72sDDLCDM.pkl', 'w') as f:
    pickle.dump(counts_DD,f)


[   120359    161290    218775    293390    389054    508471    657729
    836581   1053407   1309830   1612064   1961551   2364928   2827630
   3355304   3950137   4616908   5362000   6191387   7104872   8107724
   9210814  10415902  11727209  13147935  14683043  16336892  18110060
  20017428  22056497  24230687  26557413  29027844  31640018  34406049
  37329549  40409130  43648867  47058511  50636022  54388292  58311573
  62413549  66704196  71170633  75829823  80674283  85709067  90951344
  96373311 101985260 107812675 113839249 120062139 126504299 133149770
 140022402 147097820 154401439 161915340 169670590 177611901 185777031
 194169740 202779640 211622078 220705681 230018354 239572052 249368640
 259374682 269636247 280125617 290856761 301826910 313078370 324563356
 336309742 348290810 360519829]
Total run time:
1121.55524492
CPU times: user 18min 37s, sys: 2.19 s, total: 18min 39s
Wall time: 18min 41s

In [12]:
with open('../pkl/BTDR72sDDLCDM.pkl') as f:
    counts_DD = pickle.load(f)
    
counts_DD


Out[12]:
array([   120359,    161290,    218775,    293390,    389054,    508471,
          657729,    836581,   1053407,   1309830,   1612064,   1961551,
         2364928,   2827630,   3355304,   3950137,   4616908,   5362000,
         6191387,   7104872,   8107724,   9210814,  10415902,  11727209,
        13147935,  14683043,  16336892,  18110060,  20017428,  22056497,
        24230687,  26557413,  29027844,  31640018,  34406049,  37329549,
        40409130,  43648867,  47058511,  50636022,  54388292,  58311573,
        62413549,  66704196,  71170633,  75829823,  80674283,  85709067,
        90951344,  96373311, 101985260, 107812675, 113839249, 120062139,
       126504299, 133149770, 140022402, 147097820, 154401439, 161915340,
       169670590, 177611901, 185777031, 194169740, 202779640, 211622078,
       220705681, 230018354, 239572052, 249368640, 259374682, 269636247,
       280125617, 290856761, 301826910, 313078370, 324563356, 336309742,
       348290810, 360519829])

In [14]:
DD=np.diff(counts_DD)
DD


Out[14]:
array([   40931,    57485,    74615,    95664,   119417,   149258,
         178852,   216826,   256423,   302234,   349487,   403377,
         462702,   527674,   594833,   666771,   745092,   829387,
         913485,  1002852,  1103090,  1205088,  1311307,  1420726,
        1535108,  1653849,  1773168,  1907368,  2039069,  2174190,
        2326726,  2470431,  2612174,  2766031,  2923500,  3079581,
        3239737,  3409644,  3577511,  3752270,  3923281,  4101976,
        4290647,  4466437,  4659190,  4844460,  5034784,  5242277,
        5421967,  5611949,  5827415,  6026574,  6222890,  6442160,
        6645471,  6872632,  7075418,  7303619,  7513901,  7755250,
        7941311,  8165130,  8392709,  8609900,  8842438,  9083603,
        9312673,  9553698,  9796588, 10006042, 10261565, 10489370,
       10731144, 10970149, 11251460, 11484986, 11746386, 11981068, 12229019])

In [15]:
len(dat)


Out[15]:
105830

BallTree.two_point_correlation works almost 10 times faster! with leaf_size=5 Going with it to the random catalog


In [16]:
# Getting back the objects:
with open('../output/datRs100kLCDM.pkl') as f:  # Python 3: open(..., 'rb')
    datR = pickle.load(f)
datR


Out[16]:
array([[  3.99386000e-01,   3.50575800e+00,   5.25069000e-01],
       [  1.81160000e-01,   3.05099200e+00,   1.35846000e-01],
       [  2.90664000e-01,   2.71814000e+00,   2.41300000e-03],
       ..., 
       [  3.28141000e-01,   2.20094200e+00,   5.23620000e-02],
       [  2.32659000e-01,   2.95958900e+00,   1.14364000e-01],
       [  3.67697000e-01,   2.96577700e+00,   3.87508000e-01]])

In [17]:
%%time
BT_R = BallTree(dat,metric='pyfunc',func=LCDMmetricsq,leaf_size=5) 

with open('../pkl/BTRdatsLCDMsq.pkl', 'w') as f:
    pickle.dump(BT_R,f)

with open('../pkl/BTRdatsLCDMsq.pkl') as f:
    BTR = pickle.load(f)
    
BTR


CPU times: user 5.82 s, sys: 124 ms, total: 5.94 s
Wall time: 6.12 s

In [18]:
%%time
start_time=time.time()
counts_RR=BTR.two_point_correlation(datR,binsq)
print counts_RR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('../pkl/BTRDR72100kRRLCDMsq.pkl', 'w') as f:
    pickle.dump(counts_RR,f)

with open('../pkl/BTRDR72100kRRLCDMsq.pkl') as f:
    counts_RR = pickle.load(f)
    
counts_RR


[      856      6868     22778     53022    102707    175892    278112
    412689    584420    798001   1057333   1365554   1729990   2154432
   2644393   3201884   3833172   4541419   5336165   6213636   7185698
   8256623   9429444  10707376  12094501  13596820  15215997  16956838
  18827403  20826667  22962718  25241300  27665203  30233374  32941518
  35806210  38830820  42016433  45370565  48889094  52572202  56430732
  60464225  64673158  69070603  73648870  78413295  83371016  88519999
  93872933  99399885 105140264 111073461 117223289 123574550 130121490
 136897063 143867851 151062052 158459131 166074450 173894077 181949649
 190221088 198720389 207449167 216389174 225561159 234984199 244594965
 254476253 264580730 274912619 285476794 296282727 307325575 318614973
 330151100 341929031 353942601]
Total run time:
1125.35932922
CPU times: user 18min 41s, sys: 2.35 s, total: 18min 43s
Wall time: 18min 45s

In [19]:
RR=np.diff(counts_RR)
RR


Out[19]:
array([    6012,    15910,    30244,    49685,    73185,   102220,
         134577,   171731,   213581,   259332,   308221,   364436,
         424442,   489961,   557491,   631288,   708247,   794746,
         877471,   972062,  1070925,  1172821,  1277932,  1387125,
        1502319,  1619177,  1740841,  1870565,  1999264,  2136051,
        2278582,  2423903,  2568171,  2708144,  2864692,  3024610,
        3185613,  3354132,  3518529,  3683108,  3858530,  4033493,
        4208933,  4397445,  4578267,  4764425,  4957721,  5148983,
        5352934,  5526952,  5740379,  5933197,  6149828,  6351261,
        6546940,  6775573,  6970788,  7194201,  7397079,  7615319,
        7819627,  8055572,  8271439,  8499301,  8728778,  8940007,
        9171985,  9423040,  9610766,  9881288, 10104477, 10331889,
       10564175, 10805933, 11042848, 11289398, 11536127, 11777931, 12013570])

In [41]:
%%time
start_time=time.time()
counts_DR=BTD.two_point_correlation(datR,binsq)
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('../pkl/BTRDR72100kDRLCDMsq_.pkl', 'w') as f:
    pickle.dump(counts_DR,f)


Total run time:
1112.736691
CPU times: user 18min 26s, sys: 3.37 s, total: 18min 30s
Wall time: 18min 32s

In [42]:
with open('../pkl/BTRDR72100kDRLCDMsq_.pkl') as f:
    counts_DR = pickle.load(f)
counts_DR


Out[42]:
array([      856,      6868,     22778,     53022,    102707,    175892,
          278112,    412689,    584420,    798001,   1057333,   1365554,
         1729990,   2154432,   2644393,   3201884,   3833172,   4541419,
         5336165,   6213636,   7185698,   8256623,   9429444,  10707376,
        12094501,  13596820,  15215997,  16956838,  18827403,  20826667,
        22962718,  25241300,  27665203,  30233374,  32941518,  35806210,
        38830820,  42016433,  45370565,  48889094,  52572202,  56430732,
        60464225,  64673158,  69070603,  73648870,  78413295,  83371016,
        88519999,  93872933,  99399885, 105140264, 111073461, 117223289,
       123574550, 130121490, 136897063, 143867851, 151062052, 158459131,
       166074450, 173894077, 181949649, 190221088, 198720389, 207449167,
       216389174, 225561159, 234984199, 244594965, 254476253, 264580730,
       274912619, 285476794, 296282727, 307325575, 318614973, 330151100,
       341929031, 353942601])

In [43]:
DR=np.diff(counts_DR)
DR


Out[43]:
array([    6012,    15910,    30244,    49685,    73185,   102220,
         134577,   171731,   213581,   259332,   308221,   364436,
         424442,   489961,   557491,   631288,   708247,   794746,
         877471,   972062,  1070925,  1172821,  1277932,  1387125,
        1502319,  1619177,  1740841,  1870565,  1999264,  2136051,
        2278582,  2423903,  2568171,  2708144,  2864692,  3024610,
        3185613,  3354132,  3518529,  3683108,  3858530,  4033493,
        4208933,  4397445,  4578267,  4764425,  4957721,  5148983,
        5352934,  5526952,  5740379,  5933197,  6149828,  6351261,
        6546940,  6775573,  6970788,  7194201,  7397079,  7615319,
        7819627,  8055572,  8271439,  8499301,  8728778,  8940007,
        9171985,  9423040,  9610766,  9881288, 10104477, 10331889,
       10564175, 10805933, 11042848, 11289398, 11536127, 11777931, 12013570])

In [44]:
correl100k=(DD-2.0*DR+RR)/RR
correl100k


Out[44]:
array([ 5.8082169 ,  2.61313639,  1.46710091,  0.92541008,  0.63171415,
        0.46016435,  0.32899381,  0.26259091,  0.200589  ,  0.16543273,
        0.13388445,  0.10685278,  0.09014188,  0.07697143,  0.06698225,
        0.05620731,  0.05202281,  0.04358751,  0.04104295,  0.03167493,
        0.03003478,  0.0275123 ,  0.02611641,  0.02422348,  0.02182559,
        0.02141335,  0.01856976,  0.0196748 ,  0.01990983,  0.01785491,
        0.02112893,  0.01919549,  0.01713398,  0.02137516,  0.02052856,
        0.01817457,  0.01699014,  0.01655033,  0.01676326,  0.01877816,
        0.01678126,  0.01697858,  0.01941442,  0.01568911,  0.01767547,
        0.01679846,  0.01554404,  0.01811892,  0.01289629,  0.01537864,
        0.01516207,  0.01573806,  0.01188033,  0.01431196,  0.01504993,
        0.01432484,  0.01500978,  0.01520919,  0.01579299,  0.01837494,
        0.01556136,  0.01360028,  0.01466129,  0.01301272,  0.0130213 ,
        0.01606218,  0.01533888,  0.0138658 ,  0.01933478,  0.01262528,
        0.01554638,  0.01524223,  0.01580521,  0.01519684,  0.01889114,
        0.01732493,  0.01822613,  0.01724726,  0.0179338 ])

In [45]:
plt.plot(bins[1:len(bins)],correl100k)


Out[45]:
[<matplotlib.lines.Line2D at 0x113ff40d0>]

In [47]:
plt.yscale('log')
plt.plot(c*1e-5*bins[1:len(bins)],correl100k)


Out[47]:
[<matplotlib.lines.Line2D at 0x11714a290>]

In [48]:
plt.xscale('log')
plt.yscale('log')
plt.plot(c*1e-5*bins[1:len(bins)],correl100k)


Out[48]:
[<matplotlib.lines.Line2D at 0x1140483d0>]

In [32]:
(DD*1.0/RR)


Out[32]:
array([ 6.8082169 ,  3.61313639,  2.46710091,  1.92541008,  1.63171415,
        1.46016435,  1.32899381,  1.26259091,  1.200589  ,  1.16543273,
        1.13388445,  1.10685278,  1.09014188,  1.07697143,  1.06698225,
        1.05620731,  1.05202281,  1.04358751,  1.04104295,  1.03167493,
        1.03003478,  1.0275123 ,  1.02611641,  1.02422348,  1.02182559,
        1.02141335,  1.01856976,  1.0196748 ,  1.01990983,  1.01785491,
        1.02112893,  1.01919549,  1.01713398,  1.02137516,  1.02052856,
        1.01817457,  1.01699014,  1.01655033,  1.01676326,  1.01877816,
        1.01678126,  1.01697858,  1.01941442,  1.01568911,  1.01767547,
        1.01679846,  1.01554404,  1.01811892,  1.01289629,  1.01537864,
        1.01516207,  1.01573806,  1.01188033,  1.01431196,  1.01504993,
        1.01432484,  1.01500978,  1.01520919,  1.01579299,  1.01837494,
        1.01556136,  1.01360028,  1.01466129,  1.01301272,  1.0130213 ,
        1.01606218,  1.01533888,  1.0138658 ,  1.01933478,  1.01262528,
        1.01554638,  1.01524223,  1.01580521,  1.01519684,  1.01889114,
        1.01732493,  1.01822613,  1.01724726,  1.0179338 ])

In [35]:
(DR*1.0)/RR


Out[35]:
array([ 6.8082169 ,  3.61313639,  2.46710091,  1.92541008,  1.63171415,
        1.46016435,  1.32899381,  1.26259091,  1.200589  ,  1.16543273,
        1.13388445,  1.10685278,  1.09014188,  1.07697143,  1.06698225,
        1.05620731,  1.05202281,  1.04358751,  1.04104295,  1.03167493,
        1.03003478,  1.0275123 ,  1.02611641,  1.02422348,  1.02182559,
        1.02141335,  1.01856976,  1.0196748 ,  1.01990983,  1.01785491,
        1.02112893,  1.01919549,  1.01713398,  1.02137516,  1.02052856,
        1.01817457,  1.01699014,  1.01655033,  1.01676326,  1.01877816,
        1.01678126,  1.01697858,  1.01941442,  1.01568911,  1.01767547,
        1.01679846,  1.01554404,  1.01811892,  1.01289629,  1.01537864,
        1.01516207,  1.01573806,  1.01188033,  1.01431196,  1.01504993,
        1.01432484,  1.01500978,  1.01520919,  1.01579299,  1.01837494,
        1.01556136,  1.01360028,  1.01466129,  1.01301272,  1.0130213 ,
        1.01606218,  1.01533888,  1.0138658 ,  1.01933478,  1.01262528,
        1.01554638,  1.01524223,  1.01580521,  1.01519684,  1.01889114,
        1.01732493,  1.01822613,  1.01724726,  1.0179338 ])

In [39]:
plt.plot(bins[1:len(bins)],(DR*1.0)/RR-1.0)


Out[39]:
[<matplotlib.lines.Line2D at 0x1134aba90>]

In [40]:
#plt.xscale('log')
plt.yscale('log')
plt.plot(c*1e-5*bins[1:len(bins)],(DR*1.0)/RR-1.0)


Out[40]:
[<matplotlib.lines.Line2D at 0x1134d0f50>]

In [ ]:


In [36]:
DD-DR


Out[36]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [37]:
DD


Out[37]:
array([   40931,    57485,    74615,    95664,   119417,   149258,
         178852,   216826,   256423,   302234,   349487,   403377,
         462702,   527674,   594833,   666771,   745092,   829387,
         913485,  1002852,  1103090,  1205088,  1311307,  1420726,
        1535108,  1653849,  1773168,  1907368,  2039069,  2174190,
        2326726,  2470431,  2612174,  2766031,  2923500,  3079581,
        3239737,  3409644,  3577511,  3752270,  3923281,  4101976,
        4290647,  4466437,  4659190,  4844460,  5034784,  5242277,
        5421967,  5611949,  5827415,  6026574,  6222890,  6442160,
        6645471,  6872632,  7075418,  7303619,  7513901,  7755250,
        7941311,  8165130,  8392709,  8609900,  8842438,  9083603,
        9312673,  9553698,  9796588, 10006042, 10261565, 10489370,
       10731144, 10970149, 11251460, 11484986, 11746386, 11981068, 12229019])

In [38]:
DR


Out[38]:
array([   40931,    57485,    74615,    95664,   119417,   149258,
         178852,   216826,   256423,   302234,   349487,   403377,
         462702,   527674,   594833,   666771,   745092,   829387,
         913485,  1002852,  1103090,  1205088,  1311307,  1420726,
        1535108,  1653849,  1773168,  1907368,  2039069,  2174190,
        2326726,  2470431,  2612174,  2766031,  2923500,  3079581,
        3239737,  3409644,  3577511,  3752270,  3923281,  4101976,
        4290647,  4466437,  4659190,  4844460,  5034784,  5242277,
        5421967,  5611949,  5827415,  6026574,  6222890,  6442160,
        6645471,  6872632,  7075418,  7303619,  7513901,  7755250,
        7941311,  8165130,  8392709,  8609900,  8842438,  9083603,
        9312673,  9553698,  9796588, 10006042, 10261565, 10489370,
       10731144, 10970149, 11251460, 11484986, 11746386, 11981068, 12229019])

In [ ]:


In [49]:
dataR=ascii.read("/Users/rohin/Downloads/random-DR7-Full.ascii")

In [50]:
Ez = lambda x: 1/m.sqrt(0.3*(1+x)**3+0.7)

np.vectorize(Ez)
#Calculate comoving distance of a data point using the Redshift - This definition is based on the cosmology model we take. Here the distance for E-dS universe is considered. Also note that c/H0 ratio is cancelled in the equations and hence not taken.

def DC_LCDM(z):
    return integrate.quad(Ez, 0, z)[0]
DC_LCDM=np.vectorize(DC_LCDM)

dr12gcmn = open("../output/rdr12gcmnsrarf.dat",'w')
dr12gcmn.write("z\t ra\t dec\t s\t rar\t decr \n")

for i in range(0,len(zr)):
    dr12gcmn.write("%f\t " %zr[i])
    dr12gcmn.write("%f\t %f\t " %(rar[i],decr[i]))
    dr12gcmn.write("%f\t " %DC_LCDM(zr[i]))
    dr12gcmn.write("%f\t %f\n " %(rar[i]*pi/180.0,decr[i]*pi/180.0))
dr12gcmn.close()


Out[50]:
<Table length=1664948>
col1col2col3col4col5col6col7col8
float64float64float64float64float64float64int64int64
37.837993-0.6200670.4343251833231.05.37688491.924510528631731
209.08925917.2765210.2932645264071.012.52059460.84796366075278282
258.46493928.2157480.2227456226641.016.26764680.65560181981352379
183.92714137.3790320.2793006790121.012.55551150.84565144676986459
170.6827771.2065430.3247144808041.014.69675640.724504979476323
172.78043332.9814950.3652999495971.012.4197750.85471125197436242
154.27528832.1600720.4319681312171.05.51946831.87694924986226159
187.27224316.1288390.2761071223881.012.61100670.84200244472125607
130.65578438.458250.241847030251.014.84859180.71721922133322595
146.68173653.8459640.2708473635461.012.737690.83378951862632179
........................
241.16756543.7447580.3762474899870.984815610.91693690.96973763142513461
202.26326639.4207320.4368201562930.97142865.21125221.98287834607036262
165.96483738.4140290.3414682434670.99009915.29548070.69660114548056352
136.737966.6228730.2758957997320.989361712.61503890.84173852749803533
158.15564325.0493360.3208910382410.964912314.23914240.74738725476857526
148.9219728.5169930.3946423491850.98275868.6568261.2158093228373762
203.89439724.9515580.2321268722370.986111115.92791560.66936925869678270
229.83728432.7096860.2921548531720.992424212.49768160.84948773062994524
132.83980535.6481840.1865936587370.995215318.51259420.57715892423142914
192.40300743.6117710.3079479452370.9513.00862880.81675113529714240

In [51]:
import pyfits
dataRf=pyfits.open("/Users/rohin/Downloads/random-DR7-Full.fits")
dataRf=dataRf[1].data
dataRf.columns


Out[51]:
ColDefs(
    name = 'RA'; format = 'D'
    name = 'DEC'; format = 'D'
    name = 'ILSS'; format = 'J'
    name = 'EBV'; format = 'E'
    name = 'SECTOR'; format = 'J'
    name = 'Z'; format = 'D'
    name = 'SECTOR_COMPLETENESS'; format = 'E'
    name = 'COMOV_DENSITY'; format = 'E'
    name = 'RADIAL_WEIGHT'; format = 'E'
)

In [52]:
z=dataRf['Z']
ra=dataRf['RA']
dec=dataRf['DEC']

In [53]:
Ez = lambda x: 1/m.sqrt(0.3*(1+x)**3+0.7)

np.vectorize(Ez)
#Calculate comoving distance of a data point using the Redshift - This definition is based on the cosmology model we take. Here the distance for E-dS universe is considered. Also note that c/H0 ratio is cancelled in the equations and hence not taken.

def DC_LCDM(z):
    return integrate.quad(Ez, 0, z)[0]
DC_LCDM=np.vectorize(DC_LCDM)

In [54]:
dr72r = open("../output/DR72srandf16M.dat",'w')
dr72r.write("z\t ra\t dec\t s\t rar\t decr \n")

for i in range(0,len(z)):
    dr72r.write("%f\t " %z[i])
    dr72r.write("%f\t %f\t " %(ra[i],dec[i]))
    dr72r.write("%f\t " %DC_LCDM(z[i]))
    dr72r.write("%f\t %f\n " %(ra[i]*pi/180.0,dec[i]*pi/180.0))
dr72r.close()

In [55]:
z


Out[55]:
array([ 0.43432518,  0.29326453,  0.22274562, ...,  0.29215485,
        0.18659366,  0.30794795])

In [6]:
dataR=ascii.read("../output/DR72srandf16M.dat")

In [7]:
dataR


Out[7]:
<Table length=1664948>
zradecsrardecr
float64float64float64float64float64float64
0.43432537.837993-0.6200670.3898150.660398-0.010822
0.293265209.08925917.2765210.2730973.6492960.301532
0.222746258.46493928.2157480.2111824.5110640.492458
0.279301183.92714137.3790320.2610273.2101340.652387
0.324714170.6827771.2065430.2999392.9789760.021058
0.3653172.78043332.9814950.3338793.0155870.575636
0.431968154.27528832.1600720.3879412.6926120.561299
0.276107187.27224316.1288390.2582543.2685170.281501
0.241847130.65578438.458250.228192.2803740.671223
0.270847146.68173653.8459640.2536752.5600790.939789
..................
0.376247241.16756543.7447580.34294.2091680.76349
0.43682202.26326639.4207320.3917953.530160.688022
0.341468165.96483738.4140290.3140442.8966330.670451
0.275896136.737966.6228730.258072.3865280.115591
0.320891158.15564325.0493360.2967012.7603370.437193
0.394642148.9219728.5169930.357932.5991790.14865
0.232127203.89439724.9515580.2195573.5586290.435487
0.292155229.83728432.7096860.2721414.0114170.570892
0.186594132.83980535.6481840.1785122.3184920.622178
0.307948192.40300743.6117710.2856883.3580660.761169

In [8]:
dataR.remove_column('z')
dataR.remove_column('ra')
dataR.remove_column('dec')

In [9]:
dataR


Out[9]:
<Table length=1664948>
srardecr
float64float64float64
0.3898150.660398-0.010822
0.2730973.6492960.301532
0.2111824.5110640.492458
0.2610273.2101340.652387
0.2999392.9789760.021058
0.3338793.0155870.575636
0.3879412.6926120.561299
0.2582543.2685170.281501
0.228192.2803740.671223
0.2536752.5600790.939789
.........
0.34294.2091680.76349
0.3917953.530160.688022
0.3140442.8966330.670451
0.258072.3865280.115591
0.2967012.7603370.437193
0.357932.5991790.14865
0.2195573.5586290.435487
0.2721414.0114170.570892
0.1785122.3184920.622178
0.2856883.3580660.761169

In [14]:
rs=np.array(dataR['s'])
rrar=np.array(dataR['rar'])
rdecr=np.array(dataR['decr'])

In [15]:
datR=np.array([rs,rrar,rdecr])

In [16]:
datR


Out[16]:
array([[ 0.389815,  0.273097,  0.211182, ...,  0.272141,  0.178512,
         0.285688],
       [ 0.660398,  3.649296,  4.511064, ...,  4.011417,  2.318492,
         3.358066],
       [-0.010822,  0.301532,  0.492458, ...,  0.570892,  0.622178,
         0.761169]])

In [17]:
datR.reshape(3,len(dataR))


Out[17]:
array([[ 0.389815,  0.273097,  0.211182, ...,  0.272141,  0.178512,
         0.285688],
       [ 0.660398,  3.649296,  4.511064, ...,  4.011417,  2.318492,
         3.358066],
       [-0.010822,  0.301532,  0.492458, ...,  0.570892,  0.622178,
         0.761169]])

In [18]:
datR=datR.transpose()

In [19]:
datR


Out[19]:
array([[ 0.389815,  0.660398, -0.010822],
       [ 0.273097,  3.649296,  0.301532],
       [ 0.211182,  4.511064,  0.492458],
       ..., 
       [ 0.272141,  4.011417,  0.570892],
       [ 0.178512,  2.318492,  0.622178],
       [ 0.285688,  3.358066,  0.761169]])

In [66]:
# Saving the objects:
with open('../pkl/datRs16MLCDM.pkl', 'w') as f:  # Python 3: open(..., 'wb')
    pickle.dump(datR, f)

In [6]:
with open('../pkl/datRs16MLCDM.pkl') as f:  # Python 3: open(..., 'wb')
    datR=pickle.load(f)

In [7]:
len(datR)


Out[7]:
1664948

In [20]:
%%time
BTR = BallTree(datR,metric='pyfunc',func=LCDMmetricsq,leaf_size=5)


CPU times: user 1min 53s, sys: 591 ms, total: 1min 54s
Wall time: 1min 54s

In [ ]:
with open('../pkl/BTR16MdatsLCDM.pkl', 'w') as f:
    pickle.dump(BT_R2,f)

In [8]:
with open('../pkl/BTR16MdatsLCDM.pkl') as f:
    BTR = pickle.load(f)
    
BTR


Out[8]:
<sklearn.neighbors.ball_tree.BinaryTree at 0x7f9e568214e0>

In [ ]:
%%time
start_time=time.time()
counts_RR=BTR.two_point_correlation(datR,binsq)
print counts_RR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('../pkl/BTR16MRRLCDM.pkl', 'w') as f:
    pickle.dump(counts_RR,f)

In [22]:
with open('../pkl/BTR16MRRLCDM.pkl') as f:
    counts_RR = pickle.load(f)
    
counts_RR


Out[22]:
array([    1887274,     3372199,     7266132,    14721793,    26946281,
          45202302,    70798847,   104989532,   149022683,   204035720,
         271140354,   351416636,   446022991,   556048066,   682695202,
         827058405,   990267218,  1173327163,  1377219150,  1602944345,
        1851367023,  2123467438,  2420058932,  2742288808,  3091110964,
        3467774815,  3873280817,  4308721870,  4775274947,  5273944512,
        5805814885,  6372099654,  6973416207,  7611190498,  8285709449,
        8998163113,  9749323406, 10540068465, 11370495629, 12241455204,
       13154285784, 14108858789, 15107080462, 16148904638, 17235381003,
       18367335185, 19545914648, 20771694841, 22046049915, 23369112209,
       24740881827, 26162281292, 27633338854, 29154835284, 30726983424,
       32349997787, 34023883635, 35750235064, 37528917094, 39359956138,
       41245095325, 43184624235, 45179975050, 47230957600, 49336943873,
       51500581797, 53721497333, 55998502436, 58332075820, 60722812465,
       63169355810, 65673395678, 68233479653, 70848790989, 73520096233,
       76248985938, 79035033522, 81879029412, 84780026255, 87741240370])

In [23]:
RR=np.diff(counts_RR)
RR


Out[23]:
array([   1484925,    3893933,    7455661,   12224488,   18256021,
         25596545,   34190685,   44033151,   55013037,   67104634,
         80276282,   94606355,  110025075,  126647136,  144363203,
        163208813,  183059945,  203891987,  225725195,  248422678,
        272100415,  296591494,  322229876,  348822156,  376663851,
        405506002,  435441053,  466553077,  498669565,  531870373,
        566284769,  601316553,  637774291,  674518951,  712453664,
        751160293,  790745059,  830427164,  870959575,  912830580,
        954573005,  998221673, 1041824176, 1086476365, 1131954182,
       1178579463, 1225780193, 1274355074, 1323062294, 1371769618,
       1421399465, 1471057562, 1521496430, 1572148140, 1623014363,
       1673885848, 1726351429, 1778682030, 1831039044, 1885139187,
       1939528910, 1995350815, 2050982550, 2105986273, 2163637924,
       2220915536, 2277005103, 2333573384, 2390736645, 2446543345,
       2504039868, 2560083975, 2615311336, 2671305244, 2728889705,
       2786047584, 2843995890, 2900996843, 2961214115])

In [ ]:
%%time
start_time=time.time()
counts_DR=BTR.two_point_correlation(dat,binsq)
print counts_DR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

with open('../pkl/BTR16MDRLCDM.pkl', 'w') as f:
    pickle.dump(counts_DR,f)

In [24]:
with open('../pkl/BTR16MDRLCDM.pkl') as f:
    counts_DR = pickle.load(f)
    
counts_DR


Out[24]:
array([     13454,     105871,     351719,     824797,    1604158,
          2766136,    4397059,    6580479,    9395344,   12904231,
         17189402,   22317589,   28361376,   35408055,   43514219,
         52748994,   63184815,   74898709,   87934399,  102388365,
        118296328,  135731305,  154746952,  175417991,  197778782,
        221910899,  247916864,  275839103,  305771972,  337741674,
        371865195,  408158104,  446657562,  487570105,  530789155,
        576460418,  624560296,  675133046,  728359242,  784107969,
        842474383,  903672922,  967647122, 1034436760, 1104271799,
       1176901535, 1252556036, 1331284426, 1413071686, 1497873692,
       1585849828, 1677014441, 1771479112, 1869086642, 1970044912,
       2074342278, 2181917097, 2292932525, 2407123738, 2524984452,
       2646010183, 2770843115, 2899324883, 3031240700, 3166720040,
       3306033477, 3448715285, 3594764007, 3744812031, 3898281306,
       4055652549, 4216804926, 4381431947, 4549751140, 4721545382,
       4897120374, 5076127067, 5259154741, 5445649412, 5636143169])

In [25]:
DR=np.diff(counts_DR)
DR


Out[25]:
array([    92417,    245848,    473078,    779361,   1161978,   1630923,
         2183420,   2814865,   3508887,   4285171,   5128187,   6043787,
         7046679,   8106164,   9234775,  10435821,  11713894,  13035690,
        14453966,  15907963,  17434977,  19015647,  20671039,  22360791,
        24132117,  26005965,  27922239,  29932869,  31969702,  34123521,
        36292909,  38499458,  40912543,  43219050,  45671263,  48099878,
        50572750,  53226196,  55748727,  58366414,  61198539,  63974200,
        66789638,  69835039,  72629736,  75654501,  78728390,  81787260,
        84802006,  87976136,  91164613,  94464671,  97607530, 100958270,
       104297366, 107574819, 111015428, 114191213, 117860714, 121025731,
       124832932, 128481768, 131915817, 135479340, 139313437, 142681808,
       146048722, 150048024, 153469275, 157371243, 161152377, 164627021,
       168319193, 171794242, 175574992, 179006693, 183027674, 186494671,
       190493757])

In [26]:
with open('../pkl/BTDR72sDDLCDM.pkl') as f:
    counts_DD = pickle.load(f)
    
counts_DD


Out[26]:
array([   120359,    161290,    218775,    293390,    389054,    508471,
          657729,    836581,   1053407,   1309830,   1612064,   1961551,
         2364928,   2827630,   3355304,   3950137,   4616908,   5362000,
         6191387,   7104872,   8107724,   9210814,  10415902,  11727209,
        13147935,  14683043,  16336892,  18110060,  20017428,  22056497,
        24230687,  26557413,  29027844,  31640018,  34406049,  37329549,
        40409130,  43648867,  47058511,  50636022,  54388292,  58311573,
        62413549,  66704196,  71170633,  75829823,  80674283,  85709067,
        90951344,  96373311, 101985260, 107812675, 113839249, 120062139,
       126504299, 133149770, 140022402, 147097820, 154401439, 161915340,
       169670590, 177611901, 185777031, 194169740, 202779640, 211622078,
       220705681, 230018354, 239572052, 249368640, 259374682, 269636247,
       280125617, 290856761, 301826910, 313078370, 324563356, 336309742,
       348290810, 360519829])

In [27]:
DD=np.diff(counts_DD)
DD


Out[27]:
array([   40931,    57485,    74615,    95664,   119417,   149258,
         178852,   216826,   256423,   302234,   349487,   403377,
         462702,   527674,   594833,   666771,   745092,   829387,
         913485,  1002852,  1103090,  1205088,  1311307,  1420726,
        1535108,  1653849,  1773168,  1907368,  2039069,  2174190,
        2326726,  2470431,  2612174,  2766031,  2923500,  3079581,
        3239737,  3409644,  3577511,  3752270,  3923281,  4101976,
        4290647,  4466437,  4659190,  4844460,  5034784,  5242277,
        5421967,  5611949,  5827415,  6026574,  6222890,  6442160,
        6645471,  6872632,  7075418,  7303619,  7513901,  7755250,
        7941311,  8165130,  8392709,  8609900,  8842438,  9083603,
        9312673,  9553698,  9796588, 10006042, 10261565, 10489370,
       10731144, 10970149, 11251460, 11484986, 11746386, 11981068, 12229019])

In [37]:
len(dataR)


Out[37]:
1664948

In [38]:
len(dat)


Out[38]:
105830

In [39]:
NrbyNd=1.0*len(dataR)/len(dat)
NrbyNd


Out[39]:
15.732287631106491

In [30]:
corrells16m=(DD*1.0-2.0*DR+RR*1.0)/RR

In [31]:
corrells16m


Out[31]:
array([ 0.90309073,  0.88849038,  0.88310346,  0.88031744,  0.87924318,
        0.87839812,  0.87751085,  0.87707207,  0.87709548,  0.87678782,
        0.87659011,  0.8764967 ,  0.87611319,  0.87615469,  0.87618232,
        0.87620233,  0.87609143,  0.87619919,  0.87597996,  0.87596513,
        0.87590293,  0.87583526,  0.87576952,  0.87586552,  0.87593945,
        0.87581422,  0.87582404,  0.87577326,  0.87586903,  0.87577264,
        0.87592975,  0.87605782,  0.87579789,  0.87595298,  0.87589505,
        0.87603155,  0.87618543,  0.87591597,  0.87609076,  0.87623053,
        0.87588818,  0.87593294,  0.87590168,  0.87555768,  0.87578978,
        0.8757279 ,  0.87565308,  0.875755  ,  0.87590755,  0.87582439,
        0.87582533,  0.8756658 ,  0.87578533,  0.87566415,  0.87557149,
        0.87557275,  0.8754857 ,  0.87570639,  0.8753672 ,  0.8757141 ,
        0.87536945,  0.87531095,  0.87545534,  0.87542712,  0.8753098 ,
        0.87560085,  0.87580846,  0.87549466,  0.87571113,  0.87544204,
        0.8753841 ,  0.87548663,  0.87538492,  0.87548471,  0.87544439,
        0.87562007,  0.87541861,  0.87555716,  0.87547051])

In [33]:
plt.plot(bins[1:len(bins)],corrells16m,'bo-')


Out[33]:
[<matplotlib.lines.Line2D at 0x125b68fd0>]

In [35]:
plt.yscale('log')
plt.plot(bins[1:len(bins)],corrells16m,'b.-')


Out[35]:
[<matplotlib.lines.Line2D at 0x125cbf610>]

In [36]:
plt.xscale('log')
plt.yscale('log')
plt.plot(c*1e-5*bins[1:len(bins)],corrells16m,'b.-')


Out[36]:
[<matplotlib.lines.Line2D at 0x113d95990>]

In [42]:
corrells16mr=(DD*NrbyNd**2-2.0*NrbyNd*DR+RR*1.0)/RR

In [43]:
corrells16mr


Out[43]:
array([  5.86405734e+00,   2.66728980e+00,   1.48049098e+00,
         9.30880004e-01,   6.16298936e-01,   4.38431359e-01,
         2.85371405e-01,   2.07346306e-01,   1.46753218e-01,
         1.05476124e-01,   6.75142394e-02,   4.52288609e-02,
         2.56788147e-02,   1.73080556e-02,   7.05856300e-03,
        -7.40094003e-04,  -6.00269337e-03,  -4.87089803e-03,
        -1.31607644e-02,  -1.57146002e-02,  -1.27290487e-02,
        -1.16746722e-02,  -1.12364268e-02,  -8.92725854e-03,
        -7.15823484e-03,  -8.44612029e-03,  -9.76622407e-03,
        -6.83535466e-03,  -5.14170851e-03,  -6.93608756e-03,
         3.88193380e-04,   2.31354491e-03,  -4.69711686e-03,
        -1.10292832e-03,  -1.39339382e-03,  -9.40288311e-05,
         1.70176054e-03,  -4.92489900e-04,   2.64187716e-03,
         5.54458625e-03,   1.90018906e-05,   5.60692829e-04,
         2.18139494e-03,  -4.95965521e-03,  -1.21363792e-04,
        -2.40103897e-03,  -4.27611686e-03,  -1.22196307e-03,
        -2.44398653e-03,  -5.37801946e-03,  -3.33667358e-03,
        -6.54410091e-03,  -6.23559650e-03,  -6.35750838e-03,
        -8.54674871e-03,  -5.91451490e-03,  -8.97929527e-03,
        -3.71886851e-03,  -9.65088162e-03,  -1.81517620e-03,
        -1.17415599e-02,  -1.32126998e-02,  -1.09490048e-02,
        -1.22609542e-02,  -1.44449749e-02,  -9.12728696e-03,
        -5.89543863e-03,  -9.87204409e-03,  -5.60732152e-03,
        -1.16620901e-02,  -1.06882776e-02,  -9.24388276e-03,
        -9.47119952e-03,  -7.09850208e-03,  -3.92527924e-03,
        -1.33951993e-03,  -2.67382436e-03,  -5.53617170e-04,
        -1.97528136e-03])

In [ ]:


In [46]:
plt.yscale('log')
plt.plot(bins[1:len(bins)],(1.0+corrells16mr),'b.-')


Out[46]:
[<matplotlib.lines.Line2D at 0x1142a7190>]

In [ ]:


In [ ]:


In [ ]:


In [53]:
RR_zero = (RR == 0)
RR[RR_zero] = 1

In [54]:
correl=1.0*DD/RR-1.0

In [55]:
correl


Out[55]:
array([ 1.35830876,  0.28122978,  0.08986654,  0.04728624,  0.03223924,
        0.02651578,  0.02913773,  0.02925552,  0.02856214,  0.02910658,
        0.02825571,  0.02974039,  0.02806518,  0.02901232,  0.02976213])

In [64]:
plt.plot(bins[1:len(bins)],correl,'go-')


Out[64]:
[<matplotlib.lines.Line2D at 0x1142d6d50>]

In [67]:
plt.yscale('log')
plt.plot(bins[1:len(bins)],correl,'go-')


Out[67]:
[<matplotlib.lines.Line2D at 0x11442a850>]

In [63]:
plt.plot(bins[1:len(bins)],bins[1:len(bins)]**2*correl,'go-')


Out[63]:
[<matplotlib.lines.Line2D at 0x1141b6790>]

In [58]:
plt.plot(bins[2:len(bins)],bins[2:len(bins)]*bins[2:len(bins)]*correl[1:len(bins)],'go')


Out[58]:
[<matplotlib.lines.Line2D at 0x113557210>]

In [59]:
plt.plot(bins[2:len(bins)],correl[1:len(bins)],'go')


Out[59]:
[<matplotlib.lines.Line2D at 0x113673c10>]

In [60]:
plt.plot(bins[2:len(bins)],correl[1:len(bins)],'go-')
plt.savefig("correl1.pdf")



In [61]:
plt.plot(bins[2:len(bins)]*299792.458/100,correl[1:len(bins)],'bo-')


Out[61]:
[<matplotlib.lines.Line2D at 0x113bf2c90>]

In [ ]:


In [ ]:


In [ ]:
start_time=time.time()
arg=[(dat,bins)]
pool=mp.Pool(8)
#%timeit
#@pickle_results("DR72DD2ptc.pkl")
def mf_wrap(args):
    return BTD.two_point_correlation(*args)

%timeit counts_DD=pool.map(mf_wrap,arg)

end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

In [ ]:
from functools import partial

def harvester(text, case):
    X = case[0]
    return text + str(X)


partial_harvester = partial(harvester, case=RAW_DATASET)

partial_qr=partial(BTD.query_radius,count_only=True)

if __name__ == '__main__':
    pool = multiprocessing.Pool(processes=6)
    case_data = RAW_DATASET
    pool.map(partial_harvester, case_data, 1)
    pool.close()
    pool.join()

mapfunc = partial(BTD.query_radius, count_only=True)
map(mapfunc, volume_ids)

In [ ]:
#ascii.write("DR72DDbinned.dat",(bins[1:len(bins)],DDresult))
start_time=time.time()
@pickle_results("DR72DDmp1.pkl")
def ddcal(BTD,dat,bins,Nbins):
    counts_DD=np.zeros(Nbins)
    for i in tqdm(range(Nbins)):
        counts_DD[i]=np.sum(BTD.query_radius(dat, bins[i],count_only=True))
    DD = np.diff(counts_DD)
    print counts_DD
    print DD
    return DD

def mf_wrap(args):
    return ddcal(*args)

pool=mp.Pool(8)

arg=[(BTD,dat,bins,Nbins)]
%timeit DDresult=pool.map(mf_wrap,arg) 
#DDresult = ddcal(BTD,dat,bins,Nbins)
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime

In [ ]:
%timeit dat

In [ ]:
DDresult[0]

In [ ]:
DDresult[1]

In [ ]:
plt.plot(bins[1:len(bins)],DDresult[0],'ro')

In [ ]:


In [ ]:


In [ ]:
def myfun(a,b):
  print a + b
  return a+b

def mf_wrap(args):
  return myfun(*args)

p = mp.Pool(4)

fl = [(a,b) for a in range(3) for b in range(2)]

p.map(mf_wrap, fl)

In [ ]:


In [ ]:


In [ ]:


In [ ]:
counts_DD=np.zeros(Nbins)

for i in range(Nbins):
    counts_DD[i]=np.sum(BTD.query_radius(dat, bins[i],count_only=True))
DD = np.diff(counts_DD)

In [ ]:
print counts_DD
print DD

In [ ]:
plt.plot(bins[1:len(bins)],DD,'ro')

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
dataR=fits.open("/Users/rohin/Downloads/random-DR7-Full.fits")

In [ ]:
dataR=dataR[1].data

In [ ]:
len(dataR)

In [ ]:


In [ ]:
tdata=np.array(data)

In [ ]:
type(tdata[4])

In [ ]:
tdata.shape

In [ ]:


In [ ]:
tdata.shape

In [ ]:
tdata=np.atleast_d(tdata)

In [ ]:
tdata.shape

In [ ]:
tdata.reshape(len(tdata),3)

In [ ]:
tdata=np.asarray(data)
tdata=tdata.transpose()

In [ ]:


In [ ]:


In [ ]:
tdata

In [ ]:
len(tdata)

In [ ]:


In [ ]:
BTD.two_point_correlationpoint_correlationpoint_correlationpoint_correlationtime
stime=time.time()
tpcf=BTD.two_point_correlation(dat,bins)
print time.time()-stime
print tpcf
plt.plot(bins,tpcf)

In [ ]:
stime=time.time()
tpcfd=BTD.two_point_correlation(dat,bins,dualtree=True)
print time.time()-stime
print tpcfd
plt.plot(bins,tpcfd)

In [ ]:


In [ ]:


In [ ]:


In [ ]:
X

In [ ]:


In [ ]:
np.random.seed(0)
X = np.random.random((30,3))
r = np.linspace(0, 1, 10)
tree = BallTree(X,metric='pyfunc',func=LCDMmetric)                
s = pickle.dumps(tree)                     
treedump = pickle.loads(s) 
treedump.two_point_correlation(X,r)

In [ ]:
BT_D = BallTree(data)
        BT_R = BallTree(data_R)

        counts_DD = np.zeros(Nbins + 1)
        counts_RR = np.zeros(Nbins + 1)

        for i in range(Nbins + 1):
            counts_DD[i] = np.sum(BT_D.query_radius(data, bins[i],
                                                    count_only=True))
            counts_RR[i] = np.sum(BT_R.query_radius(data_R, bins[i],
                                                    count_only=True))

    DD = np.diff(counts_DD)
    RR = np.diff(counts_RR)

    # check for zero in the denominator
    RR_zero = (RR == 0)
    RR[RR_zero] = 1

    if method == 'standard':
        corr = factor ** 2 * DD / RR - 1
    elif method == 'landy-szalay':
        if sklearn_has_two_point:
            counts_DR = KDT_R.two_point_correlation(data, bins)
        else:
            counts_DR = np.zeros(Nbins + 1)
            for i in range(Nbins + 1):
                counts_DR[i] = np.sum(BT_R.query_radius(data, bins[i],
                                                        count_only=True))
        DR = np.diff(counts_DR)

        corr = (factor ** 2 * DD - 2 * factor * DR + RR) / RR

    corr[RR_zero] = np.nan

    return corr

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
dr7fdat=np.array([data['s'][0:300] data['rar'][0:300] data['decr'][0:300]])
dr7fdat

In [ ]:
dr7fdat[2]

In [ ]:


In [ ]:
def LCDMmetric(p1,p2):
    costheta=m.sin(dec1)*m.sin(dec2)+m.cos(dec1)*m.cos(dec2)*m.cos(ra1-ra2)
    s1=DC_LCDM(z1)
    s2=DC_LCDM(z2)
    return np.sqrt(s1**2+s2**2-2.0*s1*s2*costheta)

In [ ]:


In [ ]:
#fdata=fits.open("/Users/rohin/Downloads/DR7-Full.fits")

In [ ]:
#fdata.writeto("./output/DR7fulltrim.fits")

In [ ]:
fdata=fits.open("./output/DR7fulltrim.fits")

In [ ]:
cols=fdata[1].columns

In [ ]:
cols.del_col('ZTYPE')

In [ ]:
cols.del_col('SECTOR')
cols.del_col('FGOTMAIN')
cols.del_col('QUALITY')
cols.del_col('ISBAD')
cols.del_col('M')
cols.del_col('MMAX')
cols.del_col('ILSS')
cols.del_col('ICOMB')
cols.del_col('VAGC_SELECT')
cols.del_col('LSS_INDEX')
cols.del_col('FIBERWEIGHT')
cols.del_col('PRIMTARGET')
cols.del_col('MG')
cols.del_col('SECTOR_COMPLETENESS')
cols.del_col('COMOV_DENSITY')
cols.del_col('RADIAL_WEIGHT')

In [ ]:
fdata[1].columns

In [ ]:
fdata.writeto("./output/DR7fullzradec.fits")

In [ ]:
fdat=fits.open("./output/DR7fullzradec.fits")

In [ ]:
fdat[1].columns

In [ ]:
fdat[1].data['Z']

In [ ]:
fdat[1].data['RA']

In [ ]:


In [ ]:
comovlcdm=DC_LCDM(fdat[1].data['Z'])

In [ ]:
fdat[1].data['Z']

In [ ]:
comovlcdm

In [ ]:
comovlcdm.dtype

In [ ]:
#cols=fdat[1].columns

In [ ]:
nc=fits.Column(name='COMOV',format='D',array=comovlcdm)

In [ ]:
nc1=fits.Column(name='COMOV',format='D')

In [ ]:
fdata[1].data['Z']

In [ ]:
fdata[1].data['RA']

In [ ]:
nc

In [ ]:
nc.dtype

In [ ]:
#cols.add_col(nc)

In [ ]:
fdat[1].columns

In [ ]:
fdat[1].columns.info()

In [ ]:
fdat[1].columns.add_col(nc1)

In [ ]:
fdat[1].data['COMOV']=comovlcdm

In [ ]:
comovlcdm

In [ ]:
fdat[1].data['Z']

In [ ]:
fdat[1].data['COMOV']

In [ ]:
fdat[1].data['RA']

In [ ]:
fdat[1].data['RA']=fdat[1].data['RA']*pi/180.0

In [ ]:
comovlcdm=DC_LCDM(fdat[1].data['Z'])
comovlcdm

In [ ]:


In [ ]:

Random catalog created based on the survey limitations also taken from http://cosmo.nyu.edu/~eak306/SDSS-LRG.html


In [ ]:
dataR=fits.open("/Users/rohin/Downloads/random-DR7-Full.fits")

In [ ]:
dataR

In [ ]:
dataR=dataR[1].data

In [ ]:
len(dataR)

In [ ]:
NSIDE=512
dr72hpix=hu.HealPix("ring",NSIDE)

In [ ]:
pixdata = open("./output/pixdatadr72VAGCfullrand.dat",'w')
pixdata.write("z\t pix \n")

for i in range(0,len(data)-1):
    pixdata.write("%f\t" %data['z'][i])
    pixdata.write("%d\n" %dr72hpix.eq2pix(dataR['ra'][i],dataR['dec'][i]))
pixdata.close()

In [ ]:
pixdata = ascii.read("./output/pixdatadr72VAGCfullrand.dat")
hpixdata=np.array(np.zeros(hu.nside2npix(NSIDE)))
for j in range(len(pixdata)):
    hpixdata[pixdata[j]['pix']]+=1

In [ ]:
hpixdata

In [ ]:
hu.mollview(hpixdata,rot=180)

In [ ]:
hu.orthview(hpixdata)

In [ ]:
"""
Tools for computing two-point correlation functions.
"""

#from .utils import check_random_state
# From scikit-learn utilities:
def check_random_state(seed):
    """Turn seed into a np.random.RandomState instance

    If seed is None, return the RandomState singleton used by np.random.
    If seed is an int, return a new RandomState instance seeded with seed.
    If seed is already a RandomState instance, return it.
    Otherwise raise ValueError.
    """
    if seed is None or seed is np.random:
        return np.random.mtrand._rand
    if isinstance(seed, (int, np.integer)):
        return np.random.RandomState(seed)
    if isinstance(seed, np.random.RandomState):
        return seed
    raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
                     ' instance' % seed)

# Check if scikit-learn's two-point functionality is available.
# This was added in scikit-learn version 0.14
try:
    from sklearn.neighbors import KDTree
    sklearn_has_two_point = True
except ImportError:
    import warnings
    sklearn_has_two_point = False


def uniform_sphere(RAlim, DEClim, size=1):
    """Draw a uniform sample on a sphere

    Parameters
    ----------
    RAlim : tuple
        select Right Ascension between RAlim[0] and RAlim[1]
        units are degrees
    DEClim : tuple
        select Declination between DEClim[0] and DEClim[1]
    size : int (optional)
        the size of the random arrays to return (default = 1)

    Returns
    -------
    RA, DEC : ndarray
        the random sample on the sphere within the given limits.
        arrays have shape equal to size.
    """
    zlim = np.sin(np.pi * np.asarray(DEClim) / 180.)

    z = zlim[0] + (zlim[1] - zlim[0]) * np.random.random(size)
    DEC = (180. / np.pi) * np.arcsin(z)
    RA = RAlim[0] + (RAlim[1] - RAlim[0]) * np.random.random(size)

    return RA, DEC


def ra_dec_to_xyz(ra, dec):
    """Convert ra & dec to Euclidean points

    Parameters
    ----------
    ra, dec : ndarrays

    Returns
    x, y, z : ndarrays
    """
    sin_ra = np.sin(ra * np.pi / 180.)
    cos_ra = np.cos(ra * np.pi / 180.)

    sin_dec = np.sin(np.pi / 2. - dec * np.pi / 180.)
    cos_dec = np.cos(np.pi / 2. - dec * np.pi / 180.)

    return (cos_ra * sin_dec,
            sin_ra * sin_dec,
            cos_dec)


def angular_dist_to_euclidean_dist(D, r=1):
    """convert angular distances to euclidean distances"""
    return 2 * r * np.sin(0.5 * D * np.pi / 180.)


def two_point(data, bins, method='standard',
              data_R=None, random_state=None):
    """Two-point correlation function

    Parameters
    ----------
    data : array_like
        input data, shape = [n_samples, n_features]
    bins : array_like
        bins within which to compute the 2-point correlation.
        shape = Nbins + 1
    method : string
        "standard" or "landy-szalay".
    data_R : array_like (optional)
        if specified, use this as the random comparison sample
    random_state : integer, np.random.RandomState, or None
        specify the random state to use for generating background

    Returns
    -------
    corr : ndarray
        the estimate of the correlation function within each bin
        shape = Nbins
    """
    data = np.asarray(data)
    bins = np.asarray(bins)
    rng = check_random_state(random_state)

    if method not in ['standard', 'landy-szalay']:
        raise ValueError("method must be 'standard' or 'landy-szalay'")

    if bins.ndim != 1:
        raise ValueError("bins must be a 1D array")

    if data.ndim == 1:
        data = data[:, np.newaxis]
    elif data.ndim != 2:
        raise ValueError("data should be 1D or 2D")

    n_samples, n_features = data.shape
    Nbins = len(bins) - 1

    # shuffle all but one axis to get background distribution
    if data_R is None:
        data_R = data.copy()
        for i in range(n_features - 1):
            rng.shuffle(data_R[:, i])
    else:
        data_R = np.asarray(data_R)
        if (data_R.ndim != 2) or (data_R.shape[-1] != n_features):
            raise ValueError('data_R must have same n_features as data')

    factor = len(data_R) * 1. / len(data)

    if sklearn_has_two_point:
        # Fast two-point correlation functions added in scikit-learn v. 0.14
        KDT_D = KDTree(data)
        KDT_R = KDTree(data_R)

        counts_DD = KDT_D.two_point_correlation(data, bins)
        counts_RR = KDT_R.two_point_correlation(data_R, bins)

    else:
        warnings.warn("Version 0.3 of astroML will require scikit-learn "
                      "version 0.14 or higher for correlation function "
                      "calculations. Upgrade to sklearn 0.14+ now for much "
                      "faster correlation function calculations.")

        BT_D = BallTree(data)
        BT_R = BallTree(data_R)

        counts_DD = np.zeros(Nbins + 1)
        counts_RR = np.zeros(Nbins + 1)

        for i in range(Nbins + 1):
            counts_DD[i] = np.sum(BT_D.query_radius(data, bins[i],
                                                    count_only=True))
            counts_RR[i] = np.sum(BT_R.query_radius(data_R, bins[i],
                                                    count_only=True))

    DD = np.diff(counts_DD)
    RR = np.diff(counts_RR)

    # check for zero in the denominator
    RR_zero = (RR == 0)
    RR[RR_zero] = 1

    if method == 'standard':
        corr = factor ** 2 * DD / RR - 1
    elif method == 'landy-szalay':
        if sklearn_has_two_point:
            counts_DR = KDT_R.two_point_correlation(data, bins)
        else:
            counts_DR = np.zeros(Nbins + 1)
            for i in range(Nbins + 1):
                counts_DR[i] = np.sum(BT_R.query_radius(data, bins[i],
                                                        count_only=True))
        DR = np.diff(counts_DR)

        corr = (factor ** 2 * DD - 2 * factor * DR + RR) / RR

    corr[RR_zero] = np.nan

    return corr


def bootstrap_two_point(data, bins, Nbootstrap=10,
                        method='standard', return_bootstraps=False,
                        random_state=None):
    """Bootstrapped two-point correlation function

    Parameters
    ----------
    data : array_like
        input data, shape = [n_samples, n_features]
    bins : array_like
        bins within which to compute the 2-point correlation.
        shape = Nbins + 1
    Nbootstrap : integer
        number of bootstrap resamples to perform (default = 10)
    method : string
        "standard" or "landy-szalay".
    return_bootstraps: bool
        if True, return full bootstrapped samples
    random_state : integer, np.random.RandomState, or None
        specify the random state to use for generating background

    Returns
    -------
    corr, corr_err : ndarrays
        the estimate of the correlation function and the bootstrap
        error within each bin. shape = Nbins
    """
    data = np.asarray(data)
    bins = np.asarray(bins)
    rng = check_random_state(random_state)

    if method not in ['standard', 'landy-szalay']:
        raise ValueError("method must be 'standard' or 'landy-szalay'")

    if bins.ndim != 1:
        raise ValueError("bins must be a 1D array")

    if data.ndim == 1:
        data = data[:, np.newaxis]
    elif data.ndim != 2:
        raise ValueError("data should be 1D or 2D")

    if Nbootstrap < 2:
        raise ValueError("Nbootstrap must be greater than 1")

    n_samples, n_features = data.shape

    # get the baseline estimate
    corr = two_point(data, bins, method=method, random_state=rng)

    bootstraps = np.zeros((Nbootstrap, len(corr)))

    for i in range(Nbootstrap):
        indices = rng.randint(0, n_samples, n_samples)
        bootstraps[i] = two_point(data[indices, :], bins, method=method,
                                  random_state=rng)

    # use masked std dev in case of NaNs
    corr_err = np.asarray(np.ma.masked_invalid(bootstraps).std(0, ddof=1))

    if return_bootstraps:
        return corr, corr_err, bootstraps
    else:
        return corr, corr_err


def two_point_angular(ra, dec, bins, method='standard', random_state=None):
    """Angular two-point correlation function

    A separate function is needed because angular distances are not
    euclidean, and random sampling needs to take into account the
    spherical volume element.

    Parameters
    ----------
    ra : array_like
        input right ascention, shape = (n_samples,)
    dec : array_like
        input declination
    bins : array_like
        bins within which to compute the 2-point correlation.
        shape = Nbins + 1
    method : string
        "standard" or "landy-szalay".
    random_state : integer, np.random.RandomState, or None
        specify the random state to use for generating background

    Returns
    -------
    corr : ndarray
        the estimate of the correlation function within each bin
        shape = Nbins
    """
    ra = np.asarray(ra)
    dec = np.asarray(dec)
    rng = check_random_state(random_state)

    if method not in ['standard', 'landy-szalay']:
        raise ValueError("method must be 'standard' or 'landy-szalay'")

    if bins.ndim != 1:
        raise ValueError("bins must be a 1D array")

    if (ra.ndim != 1) or (dec.ndim != 1) or (ra.shape != dec.shape):
        raise ValueError('ra and dec must be 1-dimensional '
                         'arrays of the same length')

    n_features = len(ra)
    Nbins = len(bins) - 1

    # draw a random sample with N points
    ra_R, dec_R = uniform_sphere((min(ra), max(ra)),
                                 (min(dec), max(dec)),
                                 2 * len(ra))

    data = np.asarray(ra_dec_to_xyz(ra, dec), order='F').T
    data_R = np.asarray(ra_dec_to_xyz(ra_R, dec_R), order='F').T

    # convert spherical bins to cartesian bins
    bins_transform = angular_dist_to_euclidean_dist(bins)

    return two_point(data, bins_transform, method=method,
                     data_R=data_R, random_state=rng)


def bootstrap_two_point_angular(ra, dec, bins, method='standard',
                                Nbootstraps=10, random_state=None):
    # type: (object, object, object, object, object, object) -> object
    """Angular two-point correlation function

    A separate function is needed because angular distances are not
    euclidean, and random sampling needs to take into account the
    spherical volume element.

    Parameters
    ----------
    ra : array_like
        input right ascention, shape = (n_samples,)
    dec : array_like
        input declination
    bins : array_like
        bins within which to compute the 2-point correlation.
        shape = Nbins + 1
    method : string
        "standard" or "landy-szalay".
    Nbootstraps : int
        number of bootstrap resamples
    random_state : integer, np.random.RandomState, or None
        specify the random state to use for generating background

    Returns
    -------
    corr : ndarray
        the estimate of the correlation function within each bin
        shape = Nbins
    dcorr : ndarray
        error estimate on dcorr (sample standard deviation of
        bootstrap resamples)
    bootstraps : ndarray
        The full sample of bootstraps used to compute corr and dcorr
    """
    ra = np.asarray(ra)
    dec = np.asarray(dec)
    rng = check_random_state(random_state)

    if method not in ['standard', 'landy-szalay']:
        raise ValueError("method must be 'standard' or 'landy-szalay'")

    if bins.ndim != 1:
        raise ValueError("bins must be a 1D array")

    if (ra.ndim != 1) or (dec.ndim != 1) or (ra.shape != dec.shape):
        raise ValueError('ra and dec must be 1-dimensional '
                         'arrays of the same length')

    n_features = len(ra)
    Nbins = len(bins) - 1
    data = np.asarray(ra_dec_to_xyz(ra, dec), order='F').T

    # convert spherical bins to cartesian bins
    bins_transform = angular_dist_to_euclidean_dist(bins)

    bootstraps = []

    for i in range(Nbootstraps):
        # draw a random sample with N points
        ra_R, dec_R = uniform_sphere((min(ra), max(ra)),
                                     (min(dec), max(dec)),
                                     2 * len(ra))

        data_R = np.asarray(ra_dec_to_xyz(ra_R, dec_R), order='F').T

        if i > 0:
            # random sample of the data
            ind = np.random.randint(0, data.shape[0], data.shape[0])
            data_b = data[ind]
        else:
            data_b = data

        bootstraps.append(two_point(data_b, bins_transform, method=method,
                                    data_R=data_R, random_state=rng))

    bootstraps = np.asarray(bootstraps)
    corr = np.mean(bootstraps, 0)
    corr_err = np.std(bootstraps, 0, ddof=1)

    return corr, corr_err, bootstraps

In [ ]:
sklearn_has_two_point

In [ ]:
help(KDTree)

In [ ]:
dataxyz=ra_dec_to_xyz(data['ra'],data['dec'])

In [ ]:
dataxyz=np.asarray(dataxyz)

In [ ]:
dataxyz=dataxyz.transpose()

In [ ]:
dataxyz

In [ ]:
dataxyzR=ra_dec_to_xyz(dataR['ra'],dataR['dec'])

In [ ]:
dataxyzR=np.asarray(dataxyzR)

In [ ]:
dataxyzR=dataxyzR.transpose()

In [ ]:
dataxyzR

In [ ]:
bins=np.arange(0.0,1.05,0.05)

In [ ]:
bins

In [ ]:


In [ ]:
#@pickle_results("tpcf_std.pkl")
tpcf=two_point(dataxyz,bins,method='standard',data_R=dataxyzR, random_state=None)

In [ ]:
tpcf

In [ ]:
plt.plot(bins[1:],tpcf,'ro')

In [ ]:
tpcfam=two_point(dataxyz,bins,method='standard',data_R=None, random_state=None)

In [ ]:
tpcfam

In [ ]:
plt.plot(bins[1:],tpcfam,'bo')

In [ ]:
bins2=np.arange(0.2,0.6,0.02)

In [ ]:
tpcfamb2=two_point(dataxyz,bins2,method='standard',data_R=None, random_state=None)

In [ ]:
plt.plot(bins2[1:],tpcfamb2,'go')

The above doesn't show any BAO feature... It used inbuilt astroML method to generate random catalog... by shuffling the original data's content... That way all of the random points fall in the same survey area and will adhere to all the filtering criteria... the factor or ratio of data pts vs. random pts will be 1... instead of large no. in case if we take existing random catalog or create one


In [ ]:
rng = check_random_state(None)

n_samples, n_features = dataxyz.shape
Nbins = len(bins) - 1

# shuffle all but one axis to get background distribution
data_Rxyz = dataxyz.copy()
print data_Rxyz
for i in range(n_features - 1):
    rng.shuffle(data_Rxyz[:, i])
print data_Rxyz

Lets see how it looks with a healpix map


In [ ]:


In [ ]:
NSIDE=512
dr72hpix=hu.HealPix("ring",NSIDE)

In [ ]:
import math as m

def cart2sph(x,y,z):
    XsqPlusYsq = x**2 + y**2
    r = m.sqrt(XsqPlusYsq + z**2)               # r
    elev = m.atan2(z,m.sqrt(XsqPlusYsq))     # theta
    az = m.atan2(y,x)                           # phi
    return r, elev, az

def cart2sphA(pts):
    return np.array([cart2sph(x,y,z) for x,y,z in pts])

def appendSpherical(xyz):
    np.hstack((xyz, cart2sphA(xyz)))

In [ ]:
ang=cart2sphA(data_Rxyz)

In [ ]:
ang

In [ ]:
ang.shape

In [ ]:
#ang.resize((105831, 2))
np.squeeze(ang, axis=None)

In [ ]:
help(ang.squeeze)

In [ ]:
ang2=ang[:,1:]

In [ ]:
ang2

In [ ]:
ang2.shape

In [ ]:
ang2[2,0]

In [ ]:


In [ ]:
pixdata = open("./output/pixdatadr72VAGCfullrandam.dat",'w')
pixdata.write("pix \n")
for i in range(0,len(ang2)-1):
    #pixdata.write("%f\t" %data['z'][i])
    pixdata.write("%d\n" %dr72hpix.ang2pix(ang2[i,0],ang2[i,1]))
pixdata.close()

In [ ]:
pixdata = ascii.read("./output/pixdatadr72VAGCfullrandam.dat")
hpixdata=np.array(np.zeros(hu.nside2npix(NSIDE)))
for j in range(len(pixdata)):
    hpixdata[pixdata[j]['pix']]+=1

In [ ]:
hpixdata

In [ ]:
hu.mollview(hpixdata,rot=180)

In [ ]:
hu.orthview(hpixdata)

This method doesnt seem to produce right random catalogs...doing it with ra and dec as follows


In [ ]:
data

In [ ]:
data['z'],data['ra'],data['dec']

In [ ]:
datzradec=np.array([data['z'], data['ra'], data['dec']])

In [ ]:
datzradec

In [ ]:
rng = check_random_state(None)

n_features, n_samples  = datzradec.shape

# shuffle all but one axis to get background distribution
data_Rzradec = datzradec.copy()
print data_Rzradec
for i in range(1,n_features):
    rng.shuffle(data_Rzradec[:, i])
print data_Rzradec

In [ ]:
min(data_Rzradec[:, 1])

In [ ]:
max(data_Rzradec[:, 1])

In [ ]:
min(data_Rzradec[:, 2])

In [ ]:
max(data_Rzradec[:, 2])

In [ ]:


In [ ]:
min(datzradec[:, 1])

In [ ]:
max(datzradec[:, 1])

In [ ]:
min(datzradec[:, 2])

In [ ]:
max(datzradec[:, 2])

In [ ]:
range(1,3)

In [ ]:


In [ ]:


In [ ]:
help(rng.shuffle)

In [ ]:
n_samples

In [ ]:
n_features

In [ ]:
data_Rzradec

In [ ]:
data_Rzradec[0][2]

In [ ]:
len(data_Rzradec[0][:])

In [ ]:
data_Rzradec[0][:]

In [ ]:
pixdata = open("./output/pixdatadr72VAGCfullrandamrd.dat",'w')
pixdata.write("z\t pix \n")
for i in range(0,len(data_Rzradec[0][:])-1):
    pixdata.write("%f\t" %data_Rzradec[0][i])
    pixdata.write("%d\n" %dr72hpix.eq2pix(data_Rzradec[1][i],data_Rzradec[2][i]))
pixdata.close()

In [ ]:
pixdata = ascii.read("./output/pixdatadr72VAGCfullrandamrd.dat")
hpixdata=np.array(np.zeros(hu.nside2npix(NSIDE)))
for j in range(len(pixdata)):
    hpixdata[pixdata[j]['pix']]+=1

In [ ]:
hpixdata

In [ ]:
hu.mollview(hpixdata,rot=180)

In [ ]:
hu.orthview(hpixdata)

In [ ]:
dataxyz

In [ ]:
dataxyzR1=ra_dec_to_xyz(data_Rzradec[1][:],data_Rzradec[2][:])

In [ ]:
data_Rzradec[1][:]

In [ ]:


In [ ]:


In [ ]:
dataxyzR1

In [ ]:
dataxyzR1=np.asarray(dataxyzR1)

In [ ]:


In [ ]:
dataxyzR1=dataxyzR1.transpose()

In [ ]:
dataxyzR1

In [ ]:
bins=np.arange(0.025,1.025,0.025)

In [ ]:
bins

In [ ]:


In [ ]:
#@pickle_results("tpcf_std.pkl")
tpcf=two_point(dataxyz,bins,method='standard',data_R=dataxyzR1, random_state=None)

In [ ]:
tpcf

In [ ]:
plt.plot(bins[1:],tpcf,'ro')

In [ ]:
bins=np.arange(0.0,1.05,0.05)

In [ ]:
#@pickle_results("tpcf_std.pkl")
tpcf=two_point(dataxyz,bins,method='standard',data_R=dataxyzR1, random_state=None)

In [ ]:
tpcf

In [ ]:
plt.plot(bins[1:],tpcf,'ro')

In [ ]:
btpcf=bootstrap_two_point(dataxyz, bins, Nbootstrap=10,
                        method='standard', return_bootstraps=False,
                        random_state=None)

In [ ]:
btpcf

In [ ]:
plt.errorbar(bins[1:],btpcf[0],yerr=btpcf[1],fmt='ro-')

In [ ]:
help(plt.errorbar)

In [ ]:
@pickle_results("tpcf_ls.pkl")
tpcfls=two_point(dataxyz,bins,method='landy-szalay',
              data_R=dataxyzR, random_state=None)

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
#------------------------------------------------------------
# Set up correlation function computation
#  This calculation takes a long time with the bootstrap resampling,
#  so we'll save the results.
@pickle_results("correlation_functionsdr72.pkl")
def compute_results(Nbins=16, Nbootstraps=10,  method='landy-szalay', rseed=0):
    np.random.seed(rseed)
    bins = 10 ** np.linspace(np.log10(1. / 60.), np.log10(6), 16)

    results = [bins]
    for D in [data]:
        results += bootstrap_two_point_angular(D['ra'],
                                               D['dec'],
                                               bins=bins,
                                               method=method,
                                               Nbootstraps=Nbootstraps)

    return results

(bins, r_corr, r_corr_err, r_bootstraps) = compute_results()

bin_centers = 0.5 * (bins[1:] + bins[:-1])

In [ ]:
bins

In [ ]:
r_corr

In [ ]:
r_corr_err

In [ ]:
r_bootstraps

In [ ]:
#------------------------------------------------------------
# Plot the results

label = '$0.15<z<0.25$\n$N=33813$' 

fig = plt.figure(figsize=(6, 6))
plt.xscale('log')
plt.yscale('log')
plt.errorbar(bin_centers, r_corr, r_corr_err,fmt='.k', ecolor='gray', lw=1)
fig.text(0.8, 0.8, label, ha='right', va='top')
plt.xlabel(r'$\theta\ (deg)$')
plt.ylabel(r'$w(\theta)$')
plt.show()

plt.show()
fig.savefig("wth_dr72015025.pdf")

In [ ]:


In [ ]:
data=ascii.read('./input/sdssdr72_sorted_z.dat')

In [ ]:
data

In [ ]:
#m_max = 19

# redshift and magnitude cuts
data = data[data['z'] > 0.05]
data = data[data['z'] < 0.15]
#data = data[data['petroMag_r'] < m_max]

# RA/DEC cuts
RAmin, RAmax = 140, 220
DECmin, DECmax = 5, 45
data = data[data['ra'] < RAmax]
data = data[data['ra'] > RAmin]
data = data[data['dec'] < DECmax]
data = data[data['dec'] > DECmin]

#ur = data['modelMag_u'] - data['modelMag_r']
#flag_red = (ur > 2.22)
#flag_blue = ~flag_red

#datag 

print "data size:"
print "  total gals: ", len(data)
#print "  blue gals:", len(data_blue)

In [ ]:
NSIDE=512
dr72hpix=hu.HealPix("ring",NSIDE)

In [ ]:
pixdata = open("./output/pixdatadr72005015.dat",'w')
pixdata.write("z\t pix \n")

for i in range(0,len(data)-1):
    pixdata.write("%f\t" %data['z'][i])
    pixdata.write("%d\n" %dr72hpix.eq2pix(data['ra'][i],data['dec'][i]))
pixdata.close()

In [ ]:
pixdata = ascii.read("./output/pixdatadr72005015.dat")
hpixdata=np.array(np.zeros(hu.nside2npix(NSIDE)))
for j in range(len(pixdata)):
    hpixdata[pixdata[j]['pix']]+=1

In [ ]:
hpixdata

In [ ]:
hu.mollview(hpixdata,rot=180)

In [ ]:
hu.orthview(hpixdata)

In [ ]:
#------------------------------------------------------------
# Set up correlation function computation
#  This calculation takes a long time with the bootstrap resampling,
#  so we'll save the results.
@pickle_results("correlation_functionsdr720515.pkl")
def compute_results(Nbins=16, Nbootstraps=10,  method='landy-szalay', rseed=0):
    np.random.seed(rseed)
    bins = 10 ** np.linspace(np.log10(1. / 60.), np.log10(6), 16)

    results = [bins]
    for D in [data]:
        results += bootstrap_two_point_angular(D['ra'],
                                               D['dec'],
                                               bins=bins,
                                               method=method,
                                               Nbootstraps=Nbootstraps)

    return results

(bins, r_corr, r_corr_err, r_bootstraps) = compute_results()

bin_centers = 0.5 * (bins[1:] + bins[:-1])

In [ ]:
bins

In [ ]:
r_corr

In [ ]:
r_corr_err

In [ ]:
r_bootstraps

In [ ]:
#------------------------------------------------------------
# Plot the results
label = '$0.05<z<0.15$\n$N=138051$'

fig = plt.figure(figsize=(6, 6))
plt.xscale('log')
plt.yscale('log')
plt.errorbar(bin_centers, r_corr, r_corr_err,fmt='.k', ecolor='gray', lw=1)
fig.text(0.8, 0.8, label, ha='right', va='top')
plt.xlabel(r'$\theta\ (deg)$')
plt.ylabel(r'$w(\theta)$')
plt.show()
fig.savefig("wth_dr72005015.pdf")

In [ ]:
plt.errorbar(bins[0:len(bins)-1],r_corr,r_corr_err)

In [ ]:
data=ascii.read('./input/sdssdr72_sorted_z.dat')

In [ ]:
data

In [ ]:
data['z']

In [ ]:
#m_max = 19

# redshift and magnitude cuts
data = data[data['z'] > 0.05]
data = data[data['z'] <= 0.10]
#data = data[data['petroMag_r'] < m_max]

# RA/DEC cuts
RAmin, RAmax = 140, 220
DECmin, DECmax = 5, 45
data = data[data['ra'] < RAmax]
data = data[data['ra'] > RAmin]
data = data[data['dec'] < DECmax]
data = data[data['dec'] > DECmin]

#ur = data['modelMag_u'] - data['modelMag_r']
#flag_red = (ur > 2.22)
#flag_blue = ~flag_red

#datag 

print "data size:"
print "  total gals: ", len(data)
#print "  blue gals:", len(data_blue)

In [ ]:
NSIDE=512
dr72hpix=hu.HealPix("ring",NSIDE)

In [ ]:
pixdata = open("./output/pixdatadr7200501.dat",'w')
pixdata.write("z\t pix \n")

for i in range(0,len(data)-1):
    pixdata.write("%f\t" %data['z'][i])
    pixdata.write("%d\n" %dr72hpix.eq2pix(data['ra'][i],data['dec'][i]))
pixdata.close()

In [ ]:
pixdata = ascii.read("./output/pixdatadr7200501.dat")
hpixdata=np.array(np.zeros(hu.nside2npix(NSIDE)))
for j in range(len(pixdata)):
    hpixdata[pixdata[j]['pix']]+=1

In [ ]:
hpixdata

In [ ]:
hu.mollview(hpixdata,rot=180)

In [ ]:
hu.orthview(hpixdata)

In [ ]:
#------------------------------------------------------------
# Set up correlation function computation
#  This calculation takes a long time with the bootstrap resampling,
#  so we'll save the results.
@pickle_results("correlation_functionsdr720501.pkl")
def compute_results(Nbins=16, Nbootstraps=10,  method='landy-szalay', rseed=0):
    np.random.seed(rseed)
    bins = 10 ** np.linspace(np.log10(1. / 60.), np.log10(6), 16)

    results = [bins]
    for D in [data]:
        results += bootstrap_two_point_angular(D['ra'],
                                               D['dec'],
                                               bins=bins,
                                               method=method,
                                               Nbootstraps=Nbootstraps)

    return results

(bins, r_corr, r_corr_err, r_bootstraps) = compute_results()

bin_centers = 0.5 * (bins[1:] + bins[:-1])

In [ ]:
bins

In [ ]:
r_corr

In [ ]:
r_corr_err

In [ ]:
r_bootstraps

In [ ]:
#------------------------------------------------------------
# Plot the results

label = '$0.05<z<0.10$\n$N=78939$'

fig = plt.figure(figsize=(6, 6))
plt.xscale('log')
plt.yscale('log')
plt.errorbar(bin_centers, r_corr, r_corr_err,fmt='.k', ecolor='gray', lw=1)
fig.text(0.8, 0.8, label, ha='right', va='top')
plt.xlabel(r'$\theta\ (deg)$')
plt.ylabel(r'$w(\theta)$')
plt.show()
fig.savefig("wth_dr720501.pdf")

In [ ]:
plt.errorbar(bins[0:len(bins)-1],r_corr,r_corr_err)

In [ ]:


In [ ]:
data=ascii.read('./input/sdssdr72_sorted_z.dat')

In [ ]:
data

In [ ]:
data['z']

In [ ]:
#m_max = 19

# redshift and magnitude cuts
data = data[data['z'] > 0.10]
data = data[data['z'] <= 0.15]
#data = data[data['petroMag_r'] < m_max]

# RA/DEC cuts
RAmin, RAmax = 140, 220
DECmin, DECmax = 5, 45
data = data[data['ra'] < RAmax]
data = data[data['ra'] > RAmin]
data = data[data['dec'] < DECmax]
data = data[data['dec'] > DECmin]

#ur = data['modelMag_u'] - data['modelMag_r']
#flag_red = (ur > 2.22)
#flag_blue = ~flag_red

#datag 

print "data size:"
print "  total gals: ", len(data)
#print "  blue gals:", len(data_blue)

In [ ]:
NSIDE=512
dr72hpix=hu.HealPix("ring",NSIDE)

In [ ]:
pixdata = open("./output/pixdatadr72001015.dat",'w')
pixdata.write("z\t pix \n")

for i in range(0,len(data)-1):
    pixdata.write("%f\t" %data['z'][i])
    pixdata.write("%d\n" %dr72hpix.eq2pix(data['ra'][i],data['dec'][i]))
pixdata.close()

In [ ]:
pixdata = ascii.read("./output/pixdatadr72001015.dat")
hpixdata=np.array(np.zeros(hu.nside2npix(NSIDE)))
for j in range(len(pixdata)):
    hpixdata[pixdata[j]['pix']]+=1

In [ ]:
hpixdata

In [ ]:
hu.mollview(hpixdata,rot=180)

In [ ]:
hu.orthview(hpixdata)

In [ ]:
#------------------------------------------------------------
# Set up correlation function computation
#  This calculation takes a long time with the bootstrap resampling,
#  so we'll save the results.
@pickle_results("correlation_functionsdr72001015.pkl")
def compute_results(Nbins=16, Nbootstraps=10,  method='landy-szalay', rseed=0):
    np.random.seed(rseed)
    bins = 10 ** np.linspace(np.log10(1. / 60.), np.log10(6), 16)

    results = [bins]
    for D in [data]:
        results += bootstrap_two_point_angular(D['ra'],
                                               D['dec'],
                                               bins=bins,
                                               method=method,
                                               Nbootstraps=Nbootstraps)

    return results

(bins, r_corr, r_corr_err, r_bootstraps) = compute_results()

bin_centers = 0.5 * (bins[1:] + bins[:-1])

In [ ]:
bins

In [ ]:
r_corr

In [ ]:
r_corr_err

In [ ]:
r_bootstraps

In [ ]:
#------------------------------------------------------------
# Plot the results

label = '$0.10<z<0.15$\n$N=59112$'
fig = plt.figure(figsize=(6, 6))
plt.xscale('log')
plt.yscale('log')
plt.errorbar(bin_centers, r_corr, r_corr_err,fmt='.k', ecolor='gray', lw=1)
fig.text(0.8, 0.8, label, ha='right', va='top')
plt.xlabel(r'$\theta\ (deg)$')
plt.ylabel(r'$w(\theta)$')
plt.show()
fig.savefig("wth_dr7201015.pdf")

In [ ]:
plt.errorbar(bins[0:len(bins)-1],r_corr,r_corr_err)

In [ ]:
hu.mollview(hpixdatab,rot=180)

In [ ]:
hu.orthview(hpixdatab)

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
help(hu.mollview)

In [ ]:
from astroML.datasets import fetch_sdss_specgals
from astroML.correlation import bootstrap_two_point_angular

In [ ]:
help(bootstrap_two_point_angular)

In [ ]:
help(astroML.correlation)

In [ ]:
import astroML.correlation

In [ ]:
import sklearn.neighbors

In [ ]:
help(sklearn.neighbors)

Sorted and reduced column set data can now be 'read' to reduce RAM requirements of the table reading.


In [ ]:
sdssdr72=ascii.read('./input/dssdr72_sorted_z.dat')

Create a healpix map with NSIDE=64 (no. of pixels = 49152 as $NPIX=12\times NSIDE^2$) because the no. of galaxies in the survey are less. For higher resolution (later for dr12) we will consider NSIDE=512 or even 1024. For now, we will create a 64 NSIDE map.


In [ ]:
NSIDE=64
dt72hpix=hu.HealPix("ring",NSIDE)

We have data of galaxies with redshifts between 0 and 0.5 ($0<z<0.5$). To look at a time slice/at a certain epoch we need to choose the list of galaxies within a redshift window. As, measurement of redshift has $\pm 0.05$ error. We can bin all the data into redshifts with range limited to 0.05 variation each. So, we have 10 databins with almost identical redshifts. We save each databin in a different file.


In [ ]:
j=0
for i in range(1,17):
    pixdata = open("/home/rohin/Desktop/healpix/binned1/pixdata%d_%d.dat"%(NSIDE,i),'w')
    pixdata.write("ra\t dec\t z\t pix \n")
    #for j in range(len(sdssdr72)):
    try:
        while sdssdr72[j]['z']<0.03*i:
            pixdata.write("%f\t" %sdssdr72[j]['ra'])
            pixdata.write("%f\t" %sdssdr72[j]['dec'])
            pixdata.write("%f\t" %sdssdr72[j]['z'])
            pixdata.write("%d\n" %dt72hpix.eq2pix(sdssdr72[j]['ra'],sdssdr72[j]['dec']))
            #print dt72hpix.eq2pix(sdssdr72[j]['ra'],sdssdr72[j]['dec'])
            j=j+1
    except:
        pass
    pixdata.close()

In [ ]:
for i in range(1,17):
    pixdata = ascii.read("/home/rohin/Desktop/healpix/binned1/pixdata%d_%d.dat"%(NSIDE,i))
    mpixdata = open("/home/rohin/Desktop/healpix/binned1/masked/pixdata%d_%d.dat"%(NSIDE,i),'w')
    mpixdata.write("ra\t dec\t z\t pix \n")
    for j in range((len(pixdata)-1)):
        if 100<pixdata[j]['ra']<250:
            mpixdata.write("%f\t" %pixdata[j]['ra'])
            mpixdata.write("%f\t" %pixdata[j]['dec'])
            mpixdata.write("%f\t" %pixdata[j]['z'])
            mpixdata.write("%d\n" %pixdata[j]['pix'])
    #pixdata.write("/home/rohin/Desktop/healpix/binned1/masked/pixdata_%d.dat"%i,format='ascii')
                
                
            #print dt72hpix.eq2pix(sdssdr72[j]['ra'],sdssdr72[j]['dec'])
    mpixdata.close()

We now, take each databin and assign the total no. of galaxies as the value of each pixel. The following routine will calculate the no. of galaxies by couting the occurence of pixel numbers in the file.


In [ ]:
pixdata = ascii.read("/home/rohin/Desktop/healpix/binned1/masked/pixdata%d_2.dat"%NSIDE)
hpixdata=np.array(np.zeros(hu.nside2npix(NSIDE)))
for j in range(len(pixdata)):
    hpixdata[pixdata[j]['pix']]+=1

In [ ]:
hpixdata

In [ ]:
hu.orthview(hpixdata,rot=180)

In [ ]:
pixcl=hu.anafast(hpixdata,lmax=300)
ell = np.arange(len(pixcl))
plt.figure()
plt.plot(ell,np.log(pixcl))
plt.show()

In [ ]:
pixcl=hu.anafast(hpixdata,lmax=300)
ell = np.arange(len(pixcl))
plt.figure()
plt.plot(ell,np.sqrt(ell*(ell+1)*pixcl/(4*math.pi)))
plt.show()

In [ ]:
theta=np.arange(0,np.pi,0.001)

In [ ]:
correldat = np.polynomial.legendre.legval(np.cos(theta),(2*ell+1)*np.absolute(pixcl))/(4*math.pi)

In [ ]:
plt.figure()
plt.plot(theta[0:600]*180/math.pi,correldat[0:600])
plt.show()

In [ ]:
plt.figure()
plt.plot(theta*180/math.pi,correldat)
plt.show()

In [ ]:
randra,randdec=hu.randsphere(2200000)

In [ ]:
randhp=hu.HealPix("RING",NSIDE)

In [ ]:
randhppix=randhp.eq2pix(randra,randdec)

In [ ]:
randpixdat=np.array(np.zeros(hu.nside2npix(NSIDE)))

In [ ]:
for j in range(len(randhppix)):
    randpixdat[randhppix[j]]+=1

In [ ]:
randmaphp=hu.mollview(randpixdat)

In [ ]:
randcl=hu.anafast(randpixdat,lmax=300)
ell = np.arange(len(randcl))
plt.figure()
plt.plot(ell,np.sqrt(ell*(ell+1)*randcl/(4*math.pi)))
plt.show()

In [ ]:
correlrand = np.polynomial.legendre.legval(np.cos(theta),(2*ell+1)*np.absolute(randcl))/(4*math.pi)
plt.figure()
plt.plot(theta[0:600]*180/math.pi,correlrand[0:600])
plt.show()

In [ ]:
finalcorrel=correldat-correlrand
plt.figure()
plt.plot(theta[0:600]*180/math.pi,finalcorrel[0:600])
plt.show()

In [ ]:
finalpix=hpixdata-randpixdat

In [ ]:
hu.mollview(finalpix,rot=180)

In [ ]:
cl=hu.anafast(finalpix,lmax=300)
ell = np.arange(len(cl))
plt.figure()
plt.plot(ell,np.sqrt(ell*(ell+1)*cl/(4*math.pi)))
plt.show()

In [ ]:
correlrand = np.polynomial.legendre.legval(np.cos(theta),(2*ell+1)*np.absolute(cl))/(4*math.pi)
plt.figure()
plt.plot(theta[0:600]*180/math.pi,correlrand[0:600])
plt.show()

In [ ]:
finalcl=pixcl-randcl
correlrand = np.polynomial.legendre.legval(np.cos(theta),(2*ell+1)*np.absolute(finalcl))/(4*math.pi)
plt.figure()
plt.plot(theta[0:600]*180/math.pi,correlrand[0:600])
plt.show()

In [ ]:
help(fits)

In [ ]:
data[1].data['z']

In [ ]: