First import all the modules such as healpy and astropy needed for analyzing the structure
In [1]:
import healpix_util as hu
import astropy as ap
import numpy as np
from astropy.io import fits
from astropy.table import Table
import astropy.io.ascii as ascii
from astropy.io import fits
from astropy.constants import c
import matplotlib.pyplot as plt
import math as m
from math import pi
#from scipy.constants import c
import scipy.special as sp
from astroML.decorators import pickle_results
from scipy import integrate
import warnings
from sklearn.neighbors import BallTree
import pickle
import multiprocessing as mp
import time
from aptestmetricdt import *
from aptestmetricdz import *
from scipy.spatial import distance as d
from apcat import *
from progressbar import *
from tqdm import *
from functools import partial
import pymangle
from apdz import *
from apdt import *
from scipy.optimize import curve_fit
#from astroML.datasets import fetch_sdss_specgals
#from astroML.correlation import bootstrap_two_point_angular
%matplotlib inline
In [2]:
# Getting back the objects:
with open('../output/datzAP.pkl') as f: # Python 3: open(..., 'rb')
dat = pickle.load(f)
dat
Out[2]:
Read the data file (taken from http://cosmo.nyu.edu/~eak306/SDSS-LRG.html ) converted to ascii with comoving distance etc. in V01 reading from pkl files for faster read
In [3]:
# Getting back the objects:
with open('../output/rdatzAP.pkl') as f: # Python 3: open(..., 'rb')
datR = pickle.load(f)
datR
Out[3]:
In [46]:
dd2d=np.zeros((20,20))
In [5]:
rng = np.array([[0, 0.02], [0, 0.02]])
In [6]:
ddbt=BallTree(dat,metric='pyfunc',func=APdz)
In [7]:
rng
Out[7]:
In [42]:
bins=np.arange(0.0,0.02001,0.001)
In [43]:
bins
Out[43]:
In [47]:
%%time
for i in tqdm(xrange(len(dat))):
ind=ddbt.query_radius(dat[i].reshape(1,-1),0.021)
for j in ind:
dist0=d.cdist([dat[i],],dat[j],APdz)[0]
dist1=d.cdist([dat[i],],dat[j],APzdth)[0]
dd2d+=np.histogram2d(dist0, dist1,range=rng,bins=(bins,bins))[0]
print dd2d
In [48]:
with open('dd2ddr72v06cdist200kopt.pkl','w') as f:
pickle.dump(dd2d,f)
dd2d
Out[48]:
In [52]:
dd2d=np.zeros((20,20))
In [49]:
ddbtzdth=BallTree(dat,metric='pyfunc',func=APzdth)
In [55]:
%%time
for i in tqdm(xrange(len(dat))):
ind=ddbtzdth.query_radius(dat[i].reshape(1,-1),0.021)
for j in ind:
dist0=d.cdist([dat[i],],dat[j],APdz)[0]
dist1=d.cdist([dat[i],],dat[j],APzdth)[0]
dd2d+=np.histogram2d(dist0, dist1,range=rng,bins=(bins,bins))[0]
print dd2d
In [51]:
dd2d
Out[51]:
In [ ]:
with open('dd2ddr72v06cdist200kopt2.pkl','w') as f:
pickle.dump(dd2d,f)
dd2d
In [ ]:
In [ ]:
In [37]:
%%time
ind=ddbt.query_radius(dat[0].reshape(1,-1),bins[0])
ind1=ind.reshape(1,0)
In [35]:
ind1
Out[35]:
In [38]:
for i in ind1:
print dat[i]
In [45]:
%%time
for i in ind:
dist0=d.cdist([dat[0],],dat[i],APdz)[0]
dist1=d.cdist([dat[0],],dat[i],APzdth)[0]
print np.histogram2d(dist0, dist1,range=rng)
dd2d+=np.histogram2d(dist0, dist1,range=rng,bins=(bins,bins))[0]
print dd2d
In [40]:
help(np.histogram2d)
In [ ]:
d.
In [24]:
ind
Out[24]:
In [19]:
np.random.seed(0)
X = np.random.random((10, 3)) # 10 points in 3 dimensions
tree = BallTree(X, leaf_size=2) # doctest: +SKIP
#print(tree.query_radius(X[0], r=0.3, count_only=True))
ind = tree.query_radius(X[0].reshape(1,-1), r=0.3) # doctest: +SKIP
print(ind) # indices of neighbors within distance 0.3
In [11]:
help(ddbt.query_radius)
In [ ]:
dr2d=np.zeros((20,20))
In [ ]:
rng = np.array([[0, 0.02], [0, 0.02]])
In [ ]:
%%time
dist0=d.cdist([dat[0],],datR,APdz)[0]
dist1=d.cdist([dat[0],],datR,APzdth)[0]
print np.histogram2d(dist0, dist1,range=rng)
In [ ]:
%%time
for i in tqdm(xrange(len(dat))):
dist0=d.cdist([dat[i],],datR,APdz)[0]
dist1=d.cdist([dat[i],],datR,APzdth)[0]
dr2d+=np.histogram2d(dist0, dist1,range=rng)[0]
print dr2d
In [ ]:
with open('dr2ddr72v06cdist200k.pkl','w') as f:
pickle.dump(dr2d,f)
dr2d
In [ ]:
In [ ]:
dist0
In [ ]:
len(dist0)
In [ ]:
dist0.size
In [ ]:
dist0.flatten
print dist0
In [ ]:
len(dist0)
In [ ]:
%%time
dist0=d.cdist([dat[0],],dat,APdz)[0]
dist1=d.cdist([dat[0],],dat,APzdtheta)[0]
print np.histogram2d(dist0, dist1,range=rng)
#print dd2d
In [ ]:
In [ ]:
dist0
In [ ]:
In [ ]:
dist0=dist0[0]
In [ ]:
dist0[1]
In [ ]:
dist0[1:len(dist0)]
In [ ]:
len(dist0)
In [ ]:
dist0.size
In [ ]:
help(dist0.flatten)
In [ ]:
dist0[0]
In [ ]:
np.array.flatten(dist0)
In [ ]:
len(dist0)
In [ ]:
In [ ]:
In [ ]:
%%time
while len(dat)>0:
i=len(dat)-1
while i>0:
dist[i]=APcat(dat[0],dat[i])
i-=1
dd2d+=np.histogram2d(dist[:,0], dist[:,1],range=rng)[0]
dat=np.delete(dat,0,axis=0)
In [ ]:
dd2d
In [ ]:
In [ ]:
In [ ]:
%%time
while len(dat)>0:
i=len(dat)-1
while i>0:
dist=500*APcat(dat[0],dat[i])
dd2d+=np.histogram2d(dist[0], dist[1], bins = 10, range = rng)
i-=1
dat=np.delete(dat,0,axis=0)
In [ ]:
def binDists2d(dat, dz = lambda u, v: abs(u[0]-v[0]), zdth = lambda u, v: 0.5*(u[0]+v[0])*np.arccos(np.sin(u[2])*np.sin(v[2])+np.cos(u[2])*np.cos(v[2])*np.cos(u[1]-v[1]))):
i, j = np.triu_indices(dat.shape[0], 1)
dist0 = dz(dat[i], dat[j])
dist1 = zdth(dat[i], dat[j])
rng = np.array([[0, 0.02], [0, 0.02]])
return np.histogram2d(dist0, dist1, bins = 10, range = rng)
In [ ]:
td=np.random.rand(1000,3)
In [ ]:
binDists2d(td)
In [ ]:
%%time
binDists2d(dat)
In [ ]:
In [ ]:
In [ ]:
def binDists2d(dat, f1 = 'euclidean', f2 = 'cosine'):
dist0 = APcat(dat, f1)
dist1 = d.pdist(dat, f2)
return np.histogram2d(dist0, dist1, bins = 10)
In [ ]:
dd2d
In [ ]:
plt.contour(dd2d)
In [ ]:
with open('DR72DD2DMI.pkl', 'w') as f:
pickle.dump(dd2d,f)
In [ ]:
with open('DR72DD2DMI.pkl') as f:
DD2D = pickle.load(f)
DD2D
In [ ]:
dzbin=zdthbin=np.arange(0.002,0.022,0.002)
In [ ]:
plt.contour(dzbin,zdthbin,dd2d)
In [ ]:
dzbin
In [ ]:
plt.contour(dzbin,zdthbin,dd2d,levels=[ 5041., 13955., 23161., 31557., 38796., 46402., 53552.,
60708., 67437., 74549.])
In [ ]:
In [ ]:
for i in range(len(dat)-1):
for j in range(len(dat)-1):
dist=APcat(dat[0],dat[i])
ind0=int(dist[0]/0.002)
ind1=int(dist[1]/0.002)
if ind0>9 or ind1>9:
pass
else:
dd2d[ind0,ind1]+=1
dat=np.delete(dat,0,0)
print len(dat)
print dd2d
In [ ]:
for i in range(len(dat)):
In [ ]:
bins=np.arange(0,0.022,0.002)
print bins
In [ ]:
%%time
from apmetric6 import *
BTdat6 = BallTree(dat,metric='pyfunc',func=APmetric6,leaf_size=5)
In [ ]:
BTdat6
In [ ]:
%%time
per6=BTdat6.two_point_correlation(dat,bins)
In [ ]:
print per6
One has to change if condition in the metric definition of dz<=0.002 to 0.002<dz<=0.004 and so on
In [ ]:
%%time
from apmetric4 import *
BTdat4 = BallTree(dat,metric='pyfunc',func=APmetric4,leaf_size=5)
In [ ]:
BTdat4
In [ ]:
%%time
per4=BTdat4.two_point_correlation(dat,bins)
In [ ]:
print per4
In [ ]:
In [ ]:
%%time
from apmetric3 import *
BTdat3 = BallTree(dat,metric='pyfunc',func=APmetric3,leaf_size=5)
BTdat3
%%time
per3=BTdat3.two_point_correlation(dat,bins)
print per3print bins
In [ ]:
Nbins=len(bins)
In [ ]:
Nbins
In [ ]:
LCfmetric=LCDMmetric
In [ ]:
LCfmetric(dat[0],dat[1])
In [ ]:
%%time
start_time=time.time()
counts_DD=BTDLC.two_point_correlation(dat,bins)
print counts_DD
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime
with open('BTDcDDLCf.pkl', 'w') as f:
pickle.dump(counts_DD,f)
In [ ]:
with open('BTDcDDLCf.pkl') as f:
counts_DD = pickle.load(f)
counts_DD
In [ ]:
DD=np.diff(counts_DD)
In [ ]:
DD
In [ ]:
plt.plot(bins[1:len(bins)],DD,'ro-')
BallTree.two_point_correlation works almost 10 times faster! with leaf_size=5 Going with it to the random catalog
In [ ]:
dataR=ascii.read("./output/rand200kdr72.dat")
In [ ]:
dataR
In [ ]:
len(dataR)
In [ ]:
dataR=ascii.read("./output/rDR7200kLCsrarf.dat")
In [ ]:
dataR
In [ ]:
dataR.remove_column('z')
dataR.remove_column('ra')
dataR.remove_column('dec')
In [ ]:
dataR
In [ ]:
rs=np.array(dataR['s'])
rrar=np.array(dataR['rar'])
rdecr=np.array(dataR['decr'])
In [ ]:
datR=np.array([rs,rrar,rdecr])
In [ ]:
datR
In [ ]:
datR.reshape(3,len(dataR))
In [ ]:
datR=datR.transpose()
In [ ]:
datR
In [ ]:
# Saving the objects:
with open('rDR7200kLCsrarf.pkl', 'w') as f: # Python 3: open(..., 'wb')
pickle.dump(datR, f)
In [ ]:
# Getting back the objects:
with open('rDR7200kLCsrarf.pkl') as f: # Python 3: open(..., 'rb')
datR = pickle.load(f)
datR
In [ ]:
%%time
BT_RLC = BallTree(datR,metric='pyfunc',func=LCfmetric,leaf_size=5)
with open('BTR200kdatsLCf.pkl', 'w') as f:
pickle.dump(BT_RLC,f)
In [ ]:
with open('BTR200kdatsLCf.pkl') as f:
BTRLC = pickle.load(f)
BTRLC
In [ ]:
%%time
start_time=time.time()
counts_RR=BTRLC.two_point_correlation(datR,bins)
print counts_RR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime
with open('BTR200kcRRLCf.pkl', 'w') as f:
pickle.dump(counts_RR,f)
with open('BTR200kcRRLCf.pkl') as f:
counts_RR = pickle.load(f)
counts_RR
In [ ]:
counts_RR
In [ ]:
RR=np.diff(counts_RR)
In [ ]:
RR
In [ ]:
plt.plot(bins[1:len(bins)],RR,'bo-')
In [ ]:
RR_zero = (RR == 0)
RR[RR_zero] = 1
In [ ]:
%%time
start_time=time.time()
counts_DR=BTRLC.two_point_correlation(dat,bins)
print counts_DR
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime
with open('BTR200kcDRLCf.pkl', 'w') as f:
pickle.dump(counts_DR,f)
In [ ]:
with open('BTR200kcDRLCf.pkl') as f:
counts_DR = pickle.load(f)
counts_DR
In [ ]:
DR=np.diff(counts_DR)
In [ ]:
DR
In [ ]:
corrells=(4.0 * DD - 4.0 * DR + RR) / RR
In [ ]:
corrells
In [ ]:
plt.plot(bins[1:len(bins)],corrells,'go-')
In [ ]:
plt.plot(bins[1:len(bins)],bins[1:len(bins)]*bins[1:len(bins)]*corrells*(c*1e-5)**2,'go-')
In [ ]:
plt.plot(bins[2:len(bins)],bins[2:len(bins)]*bins[2:len(bins)]*corrells[1:len(bins)]*(c*1e-5)**2,'go-')
In [ ]:
plt.plot(bins[2:len(bins)],corrells[1:len(bins)],'go-')
In [ ]:
plt.plot(bins[2:len(bins)],corrells[1:len(bins)],'go-')
plt.savefig("correl2xlsLCf.pdf")
In [ ]:
plt.plot(bins[2:len(bins)]*c/1e5,corrells[1:len(bins)],'bo-')
plt.savefig("correl2x1lsLCf.pdf")
In [ ]:
plt.yscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')
plt.savefig("correllsfiglogLCf.pdf")
In [ ]:
plt.yscale('log')
plt.plot(bins[2:len(bins)]*c/1e5,corrells[1:len(bins)],'ro-')
plt.savefig("correllslog2xLCf.pdf")
In [ ]:
plt.yscale('log')
plt.xscale('log')
plt.plot(bins[1:len(bins)]*c/1e5,corrells,'bo-')
plt.savefig("correllsloglogLCf.pdf")
In [ ]:
In [ ]:
In [ ]:
from functools import partial
def harvester(text, case):
X = case[0]
return text + str(X)
partial_harvester = partial(harvester, case=RAW_DATASET)
partial_qr=partial(BTD.query_radius,count_only=True)
if __name__ == '__main__':
pool = multiprocessing.Pool(processes=6)
case_data = RAW_DATASET
pool.map(partial_harvester, case_data, 1)
pool.close()
pool.join()
mapfunc = partial(BTD.query_radius, count_only=True)
map(mapfunc, volume_ids)
In [ ]:
In [ ]:
#ascii.write("DR72DDbinned.dat",(bins[1:len(bins)],DDresult))
start_time=time.time()
@pickle_results("DR72DDmp1.pkl")
def ddcal(BTD,dat,bins,Nbins):
counts_DD=np.zeros(Nbins)
for i in tqdm(range(Nbins)):
counts_DD[i]=np.sum(BTD.query_radius(dat, bins[i],count_only=True))
DD = np.diff(counts_DD)
print counts_DD
print DD
return DD
def mf_wrap(args):
return ddcal(*args)
pool=mp.Pool(8)
arg=[(BTD,dat,bins,Nbins)]
%timeit DDresult=pool.map(mf_wrap,arg)
#DDresult = ddcal(BTD,dat,bins,Nbins)
end_time=time.time()
tottime=end_time-start_time
print "Total run time:"
print tottime
In [ ]:
%timeit dat
In [ ]:
DDresult[0]
In [ ]:
DDresult[1]
In [ ]:
plt.plot(bins[1:len(bins)],DDresult[0],'ro')
In [ ]:
In [ ]:
In [ ]:
def myfun(a,b):
print a + b
return a+b
def mf_wrap(args):
return myfun(*args)
p = mp.Pool(4)
fl = [(a,b) for a in range(3) for b in range(2)]
p.map(mf_wrap, fl)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
counts_DD=np.zeros(Nbins)
for i in range(Nbins):
counts_DD[i]=np.sum(BTD.query_radius(dat, bins[i],count_only=True))
DD = np.diff(counts_DD)
In [ ]:
print counts_DD
print DD
In [ ]:
plt.plot(bins[1:len(bins)],DD,'ro')
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
dataR=fits.open("/Users/rohin/Downloads/random-DR7-Full.fits")
In [ ]:
dataR=dataR[1].data
In [ ]:
len(dataR)
In [ ]:
In [ ]:
tdata=np.array(data)
In [ ]:
type(tdata[4])
In [ ]:
tdata.shape
In [ ]:
In [ ]:
tdata.shape
In [ ]:
tdata=np.atleast_d(tdata)
In [ ]:
tdata.shape
In [ ]:
tdata.reshape(len(tdata),3)
In [ ]:
tdata=np.asarray(data)
tdata=tdata.transpose()
In [ ]:
In [ ]:
In [ ]:
tdata
In [ ]:
len(tdata)
In [ ]:
In [ ]:
BTD.two_point_correlationpoint_correlationpoint_correlationpoint_correlationtime
stime=time.time()
tpcf=BTD.two_point_correlation(dat,bins)
print time.time()-stime
print tpcf
plt.plot(bins,tpcf)
In [ ]:
stime=time.time()
tpcfd=BTD.two_point_correlation(dat,bins,dualtree=True)
print time.time()-stime
print tpcfd
plt.plot(bins,tpcfd)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
X
In [ ]:
In [ ]:
np.random.seed(0)
X = np.random.random((30,3))
r = np.linspace(0, 1, 10)
tree = BallTree(X,metric='pyfunc',func=LCDMmetric)
s = pickle.dumps(tree)
treedump = pickle.loads(s)
treedump.two_point_correlation(X,r)
In [ ]:
BT_D = BallTree(data)
BT_R = BallTree(data_R)
counts_DD = np.zeros(Nbins + 1)
counts_RR = np.zeros(Nbins + 1)
for i in range(Nbins + 1):
counts_DD[i] = np.sum(BT_D.query_radius(data, bins[i],
count_only=True))
counts_RR[i] = np.sum(BT_R.query_radius(data_R, bins[i],
count_only=True))
DD = np.diff(counts_DD)
RR = np.diff(counts_RR)
# check for zero in the denominator
RR_zero = (RR == 0)
RR[RR_zero] = 1
if method == 'standard':
corr = factor ** 2 * DD / RR - 1
elif method == 'landy-szalay':
if sklearn_has_two_point:
counts_DR = KDT_R.two_point_correlation(data, bins)
else:
counts_DR = np.zeros(Nbins + 1)
for i in range(Nbins + 1):
counts_DR[i] = np.sum(BT_R.query_radius(data, bins[i],
count_only=True))
DR = np.diff(counts_DR)
corr = (factor ** 2 * DD - 2 * factor * DR + RR) / RR
corr[RR_zero] = np.nan
return corr
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
dr7fdat=np.array([data['s'][0:300] data['rar'][0:300] data['decr'][0:300]])
dr7fdat
In [ ]:
dr7fdat[2]
In [ ]:
In [ ]:
def LCDMmetric(p1,p2):
costheta=m.sin(dec1)*m.sin(dec2)+m.cos(dec1)*m.cos(dec2)*m.cos(ra1-ra2)
s1=DC_LCDM(z1)
s2=DC_LCDM(z2)
return np.sqrt(s1**2+s2**2-2.0*s1*s2*costheta)