First import all the modules such as healpy and astropy needed for analyzing the structure
In [1]:
import healpix_util as hu
import astropy as ap
import numpy as np
from astropy.io import fits
from astropy.table import Table
import astropy.io.ascii as ascii
from astropy.io import fits
from astropy.constants import c
import matplotlib.pyplot as plt
import math as m
from math import pi
#from scipy.constants import c
import scipy.special as sp
from astroML.decorators import pickle_results
from scipy import integrate
import warnings
from sklearn.neighbors import BallTree
import pickle
import multiprocessing as mp
import time
from aptestmetricdt import *
from aptestmetricdz import *
from scipy.spatial import distance as d
from apcat import *
from progressbar import *
from tqdm import *
from functools import partial
import pymangle
from apdz import *
from apdt import *
from scipy.optimize import curve_fit
#from astroML.datasets import fetch_sdss_specgals
#from astroML.correlation import bootstrap_two_point_angular
%matplotlib inline
In [2]:
# Getting back the objects:
with open('../output/datzAP.pkl') as f: # Python 3: open(..., 'rb')
dat = pickle.load(f)
dat
Out[2]:
Read the data file (taken from http://cosmo.nyu.edu/~eak306/SDSS-LRG.html ) converted to ascii with comoving distance etc. in V01 reading from pkl files for faster read
In [3]:
# Getting back the objects:
with open('../output/rdatzAP.pkl') as f: # Python 3: open(..., 'rb')
datR = pickle.load(f)
datR
Out[3]:
In [4]:
rd2d=np.zeros((20,20))
In [5]:
rng = np.array([[0, 0.02], [0, 0.02]])
In [6]:
%%time
ddbt=BallTree(dat,metric='pyfunc',func=APzdth)
In [7]:
rng
Out[7]:
In [8]:
bins=np.arange(0.0,0.02001,0.001)
In [9]:
bins
Out[9]:
In [10]:
%%time
for i in tqdm(xrange(len(datR))):
ind=ddbt.query_radius(datR[i].reshape(1,-1),0.021)
for j in ind:
dist0=d.cdist([datR[i],],dat[j],APdz)[0]
dist1=d.cdist([datR[i],],dat[j],APzdth)[0]
rd2d+=np.histogram2d(dist0, dist1,range=rng,bins=(bins,bins))[0]
print rd2d
In [11]:
with open('rd2ddr72v06cdist200kopt2.pkl','w') as f:
pickle.dump(rd2d,f)
rd2d
Out[11]:
In [12]:
dr2d1=np.array([[ 2205., 6509., 10617., 14888., 19135., 22806., 27350.,
30627., 34655., 38549., 41577., 45369., 49577., 53261.,
57240., 60768., 63806., 67306., 71212., 74776.],
[ 2128., 6383., 10532., 14979., 18853., 23107., 26913.,
30642., 34333., 38065., 41954., 45513., 49499., 53067.,
57135., 60108., 63503., 67014., 70806., 74665.],
[ 2092., 6391., 10654., 14809., 18990., 22834., 26508.,
30568., 34650., 38387., 42084., 45143., 49488., 53330.,
56165., 60097., 63303., 67144., 70479., 74103.],
[ 2247., 6400., 10500., 14779., 18684., 22789., 26594.,
30704., 34519., 38178., 42047., 45556., 48968., 52392.,
56163., 60330., 63307., 66879., 70654., 74348.],
[ 2177., 6230., 10340., 14784., 18631., 22585., 26420.,
30845., 34098., 38171., 41496., 45234., 49094., 52691.,
56408., 59457., 63411., 66856., 70481., 74022.],
[ 2096., 6303., 10624., 14560., 18722., 22623., 26528.,
30239., 34154., 38140., 41459., 45541., 49034., 52193.,
55814., 59624., 63021., 66900., 70365., 73375.],
[ 2116., 6333., 10478., 14634., 18835., 22557., 26436.,
30340., 34052., 37778., 41286., 45360., 48667., 52378.,
56007., 59912., 62522., 66802., 69625., 73675.],
[ 2189., 6326., 10472., 14840., 18712., 22568., 26304.,
30083., 33981., 37972., 41651., 45039., 48446., 52628.,
55612., 59590., 62745., 66145., 70004., 72427.],
[ 2166., 6325., 10496., 14407., 18415., 22493., 26147.,
30209., 33923., 37973., 41162., 45098., 49033., 52131.,
55565., 59018., 62269., 65989., 69622., 72875.],
[ 2100., 6255., 10528., 14459., 18531., 22533., 26487.,
30160., 33683., 37591., 41224., 44711., 48625., 51873.,
55524., 59254., 62227., 65706., 69482., 72917.],
[ 2128., 6457., 10431., 14537., 18634., 22172., 26048.,
29986., 33730., 37514., 40935., 44329., 48276., 52141.,
55179., 58650., 62581., 66163., 69452., 72695.],
[ 2077., 6304., 10282., 14489., 18333., 22617., 25961.,
30042., 33060., 37106., 40536., 44903., 48005., 51465.,
54936., 58906., 62288., 65332., 69156., 72473.],
[ 2096., 6283., 10304., 14390., 18162., 22164., 26022.,
29688., 33532., 37415., 41027., 44497., 48171., 51561.,
55065., 58332., 62011., 65375., 68791., 72184.],
[ 1997., 6202., 10161., 14317., 18073., 22174., 25932.,
29731., 33118., 37008., 40333., 44390., 48149., 50994.,
54619., 57655., 61769., 64973., 68538., 72164.],
[ 2177., 6144., 10177., 14274., 18243., 21932., 25646.,
29484., 32853., 37053., 40587., 44570., 47524., 51278.,
54741., 57904., 60909., 65417., 68491., 72223.],
[ 2144., 6248., 10135., 14215., 18261., 22048., 25676.,
29599., 33066., 36720., 40301., 43999., 47269., 51008.,
54608., 57814., 61531., 65050., 68119., 71587.],
[ 2056., 6089., 10218., 14305., 18246., 22008., 25379.,
29295., 33099., 36613., 40526., 43903., 47068., 50177.,
54459., 57496., 61353., 64993., 67824., 71765.],
[ 2182., 6286., 10273., 14212., 18125., 21717., 25602.,
29543., 32993., 36432., 39977., 43196., 47120., 50479.,
53572., 57702., 60687., 64071., 67382., 71241.],
[ 2065., 6144., 9957., 13940., 17881., 21754., 25439.,
29237., 32840., 36541., 39511., 43342., 46823., 50636.,
53757., 57248., 60289., 64381., 67125., 71214.],
[ 2040., 5963., 10038., 14009., 17905., 21711., 25475.,
29043., 32682., 36300., 39542., 42924., 46708., 50511.,
54318., 57356., 60479., 63775., 67051., 70686.]])
In [13]:
rd2d-dr2d1
Out[13]:
In [ ]:
In [ ]:
In [ ]:
In [37]:
%%time
ind=ddbt.query_radius(dat[0].reshape(1,-1),bins[0])
ind1=ind.reshape(1,0)
In [35]:
ind1
Out[35]:
In [38]:
for i in ind1:
print dat[i]
In [45]:
%%time
for i in ind:
dist0=d.cdist([dat[0],],dat[i],APdz)[0]
dist1=d.cdist([dat[0],],dat[i],APzdth)[0]
print np.histogram2d(dist0, dist1,range=rng)
dd2d+=np.histogram2d(dist0, dist1,range=rng,bins=(bins,bins))[0]
print dd2d
In [40]:
help(np.histogram2d)
In [ ]:
d.
In [24]:
ind
Out[24]:
In [19]:
np.random.seed(0)
X = np.random.random((10, 3)) # 10 points in 3 dimensions
tree = BallTree(X, leaf_size=2) # doctest: +SKIP
#print(tree.query_radius(X[0], r=0.3, count_only=True))
ind = tree.query_radius(X[0].reshape(1,-1), r=0.3) # doctest: +SKIP
print(ind) # indices of neighbors within distance 0.3
In [11]:
help(ddbt.query_radius)
In [ ]:
dr2d=np.zeros((20,20))
In [ ]:
rng = np.array([[0, 0.02], [0, 0.02]])
In [ ]:
%%time
dist0=d.cdist([dat[0],],datR,APdz)[0]
dist1=d.cdist([dat[0],],datR,APzdth)[0]
print np.histogram2d(dist0, dist1,range=rng)
In [ ]:
%%time
for i in tqdm(xrange(len(dat))):
dist0=d.cdist([dat[i],],datR,APdz)[0]
dist1=d.cdist([dat[i],],datR,APzdth)[0]
dr2d+=np.histogram2d(dist0, dist1,range=rng)[0]
print dr2d
In [ ]:
with open('dr2ddr72v06cdist200k.pkl','w') as f:
pickle.dump(dr2d,f)
dr2d
In [ ]:
In [ ]:
dist0
In [ ]:
len(dist0)
In [ ]:
dist0.size
In [ ]:
dist0.flatten
print dist0
In [ ]:
len(dist0)
In [ ]:
%%time
dist0=d.cdist([dat[0],],dat,APdz)[0]
dist1=d.cdist([dat[0],],dat,APzdtheta)[0]
print np.histogram2d(dist0, dist1,range=rng)
#print dd2d
In [ ]:
In [ ]:
dist0
In [ ]:
In [ ]:
dist0=dist0[0]
In [ ]:
dist0[1]
In [ ]:
dist0[1:len(dist0)]
In [ ]:
len(dist0)
In [ ]:
dist0.size
In [ ]:
help(dist0.flatten)
In [ ]:
dist0[0]
In [ ]:
np.array.flatten(dist0)
In [ ]:
len(dist0)
In [ ]:
In [ ]:
In [ ]:
%%time
while len(dat)>0:
i=len(dat)-1
while i>0:
dist[i]=APcat(dat[0],dat[i])
i-=1
dd2d+=np.histogram2d(dist[:,0], dist[:,1],range=rng)[0]
dat=np.delete(dat,0,axis=0)
In [ ]:
dd2d
In [ ]:
In [ ]:
In [ ]:
%%time
while len(dat)>0:
i=len(dat)-1
while i>0:
dist=500*APcat(dat[0],dat[i])
dd2d+=np.histogram2d(dist[0], dist[1], bins = 10, range = rng)
i-=1
dat=np.delete(dat,0,axis=0)
In [ ]:
def binDists2d(dat, dz = lambda u, v: abs(u[0]-v[0]), zdth = lambda u, v: 0.5*(u[0]+v[0])*np.arccos(np.sin(u[2])*np.sin(v[2])+np.cos(u[2])*np.cos(v[2])*np.cos(u[1]-v[1]))):
i, j = np.triu_indices(dat.shape[0], 1)
dist0 = dz(dat[i], dat[j])
dist1 = zdth(dat[i], dat[j])
rng = np.array([[0, 0.02], [0, 0.02]])
return np.histogram2d(dist0, dist1, bins = 10, range = rng)
In [ ]:
td=np.random.rand(1000,3)
In [ ]:
binDists2d(td)
In [ ]:
%%time
binDists2d(dat)
In [ ]:
In [ ]:
In [ ]:
def binDists2d(dat, f1 = 'euclidean', f2 = 'cosine'):
dist0 = APcat(dat, f1)
dist1 = d.pdist(dat, f2)
return np.histogram2d(dist0, dist1, bins = 10)
In [ ]:
dd2d
In [ ]:
plt.contour(dd2d)
In [ ]:
with open('DR72DD2DMI.pkl', 'w') as f:
pickle.dump(dd2d,f)
In [ ]:
with open('DR72DD2DMI.pkl') as f:
DD2D = pickle.load(f)
DD2D
In [ ]:
dzbin=zdthbin=np.arange(0.002,0.022,0.002)
In [ ]:
plt.contour(dzbin,zdthbin,dd2d)
In [ ]:
dzbin
In [ ]:
plt.contour(dzbin,zdthbin,dd2d,levels=[ 5041., 13955., 23161., 31557., 38796., 46402., 53552.,
60708., 67437., 74549.])
In [ ]:
In [ ]:
for i in range(len(dat)-1):
for j in range(len(dat)-1):
dist=APcat(dat[0],dat[i])
ind0=int(dist[0]/0.002)
ind1=int(dist[1]/0.002)
if ind0>9 or ind1>9:
pass
else:
dd2d[ind0,ind1]+=1
dat=np.delete(dat,0,0)
print len(dat)
print dd2d
In [ ]:
for i in range(len(dat)):
In [ ]:
bins=np.arange(0,0.022,0.002)
print bins
In [ ]:
%%time
from apmetric6 import *
BTdat6 = BallTree(dat,metric='pyfunc',func=APmetric6,leaf_size=5)
In [ ]:
BTdat6
In [ ]:
%%time
per6=BTdat6.two_point_correlation(dat,bins)
In [ ]:
print per6
One has to change if condition in the metric definition of dz<=0.002 to 0.002<dz<=0.004 and so on
In [ ]:
%%time
from apmetric4 import *
BTdat4 = BallTree(dat,metric='pyfunc',func=APmetric4,leaf_size=5)
In [ ]:
BTdat4
In [ ]:
%%time
per4=BTdat4.two_point_correlation(dat,bins)
In [ ]:
print per4
In [ ]:
In [ ]:
%%time
from apmetric3 import *
BTdat3 = BallTree(dat,metric='pyfunc',func=APmetric3,leaf_size=5)
BTdat3
%%time
per3=BTdat3.two_point_correlation(dat,bins)
print per3print bins
In [ ]:
Nbins=len(bins)
In [ ]:
Nbins
In [ ]:
LCfmetric=LCDMmetric
In [ ]:
LCfmetric(dat[0],dat[1])
In [ ]:
%%time
start_time=time.time()
counts_DD=BTDLC.two_point_correlation(dat,bins)
print counts_DD
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime
with open('BTDcDDLCf.pkl', 'w') as f:
pickle.dump(counts_DD,f)
In [ ]:
with open('BTDcDDLCf.pkl') as f:
counts_DD = pickle.load(f)
counts_DD
In [ ]:
DD=np.diff(counts_DD)
In [ ]:
DD
In [ ]:
plt.plot(bins[1:len(bins)],DD,'ro-')
BallTree.two_point_correlation works almost 10 times faster! with leaf_size=5 Going with it to the random catalog
In [ ]:
dataR=ascii.read("./output/rand200kdr72.dat")
In [ ]:
dataR
In [ ]:
len(dataR)
In [ ]:
dataR=ascii.read("./output/rDR7200kLCsrarf.dat")
In [ ]:
dataR
In [ ]:
dataR.remove_column('z')
dataR.remove_column('ra')
dataR.remove_column('dec')
In [ ]:
dataR
In [ ]:
rs=np.array(dataR['s'])
rrar=np.array(dataR['rar'])
rdecr=np.array(dataR['decr'])
In [ ]:
datR=np.array([rs,rrar,rdecr])
In [ ]:
datR
In [ ]:
datR.reshape(3,len(dataR))
In [ ]:
datR=datR.transpose()
In [ ]:
datR
In [ ]:
# Saving the objects:
with open('rDR7200kLCsrarf.pkl', 'w') as f: # Python 3: open(..., 'wb')
pickle.dump(datR, f)
In [ ]:
# Getting back the objects:
with open('rDR7200kLCsrarf.pkl') as f: # Python 3: open(..., 'rb')
datR = pickle.load(f)
datR
In [ ]:
%%time
BT_RLC = BallTree(datR,metric='pyfunc',func=LCfmetric,leaf_size=5)
with open('BTR200kdatsLCf.pkl', 'w') as f:
pickle.dump(BT_RLC,f)
In [ ]:
with open('BTR200kdatsLCf.pkl') as f:
BTRLC = pickle.load(f)
BTRLC
In [ ]:
%%time
start_time=time.time()
counts_RR=BTRLC.two_point_correlation(datR,bins)
print counts_RR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime
with open('BTR200kcRRLCf.pkl', 'w') as f:
pickle.dump(counts_RR,f)
with open('BTR200kcRRLCf.pkl') as f:
counts_RR = pickle.load(f)
counts_RR
In [ ]:
counts_RR
In [ ]:
RR=np.diff(counts_RR)
In [ ]:
RR
In [ ]:
plt.plot(bins[1:len(bins)],RR,'bo-')
In [ ]:
RR_zero = (RR == 0)
RR[RR_zero] = 1
In [ ]:
%%time
start_time=time.time()
counts_DR=BTRLC.two_point_correlation(dat,bins)
print counts_DR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime
with open('BTR200kcDRLCf.pkl', 'w') as f:
pickle.dump(counts_DR,f)
In [ ]:
with open('BTR200kcDRLCf.pkl') as f:
counts_DR = pickle.load(f)
counts_DR
In [ ]:
DR=np.diff(counts_DR)
In [ ]:
DR
In [ ]:
corrells=(4.0 * DD - 4.0 * DR + RR) / RR
In [ ]:
corrells
In [ ]:
plt.plot(bins[1:len(bins)],corrells,'go-')
In [ ]:
plt.plot(bins[1:len(bins)],bins[1:len(bins)]*bins[1:len(bins)]*corrells*(c*1e-5)**2,'go-')
In [ ]:
plt.plot(bins[2:len(bins)],bins[2:len(bins)]*bins[2:len(bins)]*corrells[1:len(bins)]*(c*1e-5)**2,'go-')
In [ ]:
plt.plot(bins[2:len(bins)],corrells[1:len(bins)],'go-')
In [ ]:
plt.plot(bins[2:len(bins)],corrells[1:len(bins)],'go-')
plt.savefig("correl2xlsLCf.pdf")
In [ ]:
plt.plot(bins[2:len(bins)]*c/1e5,corrells[1:len(bins)],'bo-')
plt.savefig("correl2x1lsLCf.pdf")
In [ ]:
plt.yscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')
plt.savefig("correllsfiglogLCf.pdf")
In [ ]:
plt.yscale('log')
plt.plot(bins[2:len(bins)]*c/1e5,corrells[1:len(bins)],'ro-')
plt.savefig("correllslog2xLCf.pdf")
In [ ]:
plt.yscale('log')
plt.xscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')
plt.savefig("correllsloglogLCf.pdf")
In [ ]:
In [ ]:
In [ ]:
from functools import partial
def harvester(text, case):
X = case[0]
return text + str(X)
partial_harvester = partial(harvester, case=RAW_DATASET)
partial_qr=partial(BTD.query_radius,count_only=True)
if __name__ == '__main__':
pool = multiprocessing.Pool(processes=6)
case_data = RAW_DATASET
pool.map(partial_harvester, case_data, 1)
pool.close()
pool.join()
mapfunc = partial(BTD.query_radius, count_only=True)
map(mapfunc, volume_ids)
In [ ]:
In [ ]:
#ascii.write("DR72DDbinned.dat",(bins[1:len(bins)],DDresult))
start_time=time.time()
@pickle_results("DR72DDmp1.pkl")
def ddcal(BTD,dat,bins,Nbins):
counts_DD=np.zeros(Nbins)
for i in tqdm(range(Nbins)):
counts_DD[i]=np.sum(BTD.query_radius(dat, bins[i],count_only=True))
DD = np.diff(counts_DD)
print counts_DD
print DD
return DD
def mf_wrap(args):
return ddcal(*args)
pool=mp.Pool(8)
arg=[(BTD,dat,bins,Nbins)]
%timeit DDresult=pool.map(mf_wrap,arg)
#DDresult = ddcal(BTD,dat,bins,Nbins)
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime
In [ ]:
%timeit dat
In [ ]:
DDresult[0]
In [ ]:
DDresult[1]
In [ ]:
plt.plot(bins[1:len(bins)],DDresult[0],'ro')
In [ ]:
In [ ]:
In [ ]:
def myfun(a,b):
print a + b
return a+b
def mf_wrap(args):
return myfun(*args)
p = mp.Pool(4)
fl = [(a,b) for a in range(3) for b in range(2)]
p.map(mf_wrap, fl)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
counts_DD=np.zeros(Nbins)
for i in range(Nbins):
counts_DD[i]=np.sum(BTD.query_radius(dat, bins[i],count_only=True))
DD = np.diff(counts_DD)
In [ ]:
print counts_DD
print DD
In [ ]:
plt.plot(bins[1:len(bins)],DD,'ro')
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
dataR=fits.open("/Users/rohin/Downloads/random-DR7-Full.fits")
In [ ]:
dataR=dataR[1].data
In [ ]:
len(dataR)
In [ ]:
In [ ]:
tdata=np.array(data)
In [ ]:
type(tdata[4])
In [ ]:
tdata.shape
In [ ]:
In [ ]:
tdata.shape
In [ ]:
tdata=np.atleast_d(tdata)
In [ ]:
tdata.shape
In [ ]:
tdata.reshape(len(tdata),3)
In [ ]:
tdata=np.asarray(data)
tdata=tdata.transpose()
In [ ]:
In [ ]:
In [ ]:
tdata
In [ ]:
len(tdata)
In [ ]:
In [ ]:
BTD.two_point_correlationpoint_correlationpoint_correlationpoint_correlationtime
stime=time.time()
tpcf=BTD.two_point_correlation(dat,bins)
print time.time()-stime
print tpcf
plt.plot(bins,tpcf)
In [ ]:
stime=time.time()
tpcfd=BTD.two_point_correlation(dat,bins,dualtree=True)
print time.time()-stime
print tpcfd
plt.plot(bins,tpcfd)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
X
In [ ]:
In [ ]:
np.random.seed(0)
X = np.random.random((30,3))
r = np.linspace(0, 1, 10)
tree = BallTree(X,metric='pyfunc',func=LCDMmetric)
s = pickle.dumps(tree)
treedump = pickle.loads(s)
treedump.two_point_correlation(X,r)
In [ ]:
BT_D = BallTree(data)
BT_R = BallTree(data_R)
counts_DD = np.zeros(Nbins + 1)
counts_RR = np.zeros(Nbins + 1)
for i in range(Nbins + 1):
counts_DD[i] = np.sum(BT_D.query_radius(data, bins[i],
count_only=True))
counts_RR[i] = np.sum(BT_R.query_radius(data_R, bins[i],
count_only=True))
DD = np.diff(counts_DD)
RR = np.diff(counts_RR)
# check for zero in the denominator
RR_zero = (RR == 0)
RR[RR_zero] = 1
if method == 'standard':
corr = factor ** 2 * DD / RR - 1
elif method == 'landy-szalay':
if sklearn_has_two_point:
counts_DR = KDT_R.two_point_correlation(data, bins)
else:
counts_DR = np.zeros(Nbins + 1)
for i in range(Nbins + 1):
counts_DR[i] = np.sum(BT_R.query_radius(data, bins[i],
count_only=True))
DR = np.diff(counts_DR)
corr = (factor ** 2 * DD - 2 * factor * DR + RR) / RR
corr[RR_zero] = np.nan
return corr
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
dr7fdat=np.array([data['s'][0:300] data['rar'][0:300] data['decr'][0:300]])
dr7fdat
In [ ]:
dr7fdat[2]
In [ ]:
In [ ]:
def LCDMmetric(p1,p2):
costheta=m.sin(dec1)*m.sin(dec2)+m.cos(dec1)*m.cos(dec2)*m.cos(ra1-ra2)
s1=DC_LCDM(z1)
s2=DC_LCDM(z2)
return np.sqrt(s1**2+s2**2-2.0*s1*s2*costheta)