Playing with Arka's VPF code. Trying to understand it and ultimately put it into halotools.

``````

In [1]:

import numpy as np
from scipy import spatial, interpolate
from scipy.stats import percentileofscore
from pearce.mocks.kittens import TrainingBox
from time import time

``````
``````

In [2]:

%matplotlib inline
from matplotlib import pyplot as plt

``````
import numpy as np import sys import scipy.spatial import matplotlib.pylab as plt from scipy import interpolate from os import urandom import readfof from numba import jit import subprocess import os
``````

In [3]:

def CDFVolkNN(vol):
N = vol.shape[0]
gof = ((np.arange(0, N) + 1) / (N*1.0))

ind = np.argsort(vol)
sVol= vol[ind]
print sVol.shape, gof.shape, N
# return array of interpolating functions
CDF = interpolate.interp1d(sVol,gof, kind='linear', \
bounds_error=False)
return CDF

``````
``````

In [4]:

def my_CDFVolkNN(vol, binc):

sVol= np.sort(vol)
CDF =  np.searchsorted(sVol, binc)

return CDF/float(vol.shape[0])

``````
``````

In [116]:

def VDF(halo_table, nrandoms,cat, bins):
t0=time()
halo_pos = np.stack(halo_table['halo_%s'%coord] for coord in ['x', 'y', 'z']).T
#print halo_pos.shape

#Build Tree with halo positions
periodic=0 # TODO whats this do?

#Generate  nrandoms randoms on the same volume
random_pos = np.random.rand(nrandoms,3)*cat.Lbox
# Leaf size from 16-. 256 slowed down queery time a lot
# flipping the defintion the other way doesnt work
#xtree = spatial.cKDTree(halo_pos, boxsize=periodic, leafsize = 256)
xtree = spatial.cKDTree(halo_pos, boxsize=periodic, leafsize = 16)

#Get nearest neighbor distance
dis, disi = xtree.query(random_pos, k=1, eps = 1.0, n_jobs=-1)
vol = dis

#Define the bins
binw = bins[1:] - bins[:-1]
binc = (bins[1:] + bins[:-1]) / 2
#Now get the VPF
#CDFs = CDFVolkNN(vol)
dummyvpf = my_CDFVolkNN(vol, binc)

#Remove NaN from outside interpolation region
#dummyvpf[np.isnan(dummyvpf)] = 1.0
#return CDFs, dummyvpf
return dummyvpf

``````
``````

In [73]:

cat = TrainingBox(0)

``````
``````

In [74]:

``````
``````

In [75]:

len(cat.halocat.halo_table)

``````
``````

Out[75]:

10055159

``````
``````

In [102]:

nrandoms = 100**3
Boxsize = cat.Lbox

n1 = 3000 #Number of data points to subsample down to.
#For analysis on 10Mpc-50Mpc scales, a range of 30000<n1<90000
#seems optimal. For pushing to lower scales, you can try increasing

Ncut = 100000 #Rank on halo mass list to go down to.

``````
``````

In [103]:

halo_masses = cat.halocat.halo_table['halo_mvir']

``````
``````

In [104]:

mass_idxs = np.argsort(halo_masses)

``````
``````

In [105]:

cut_idxs = mass_idxs[-Ncut:]

``````
``````

In [106]:

halo_table = cat.halocat.halo_table[cut_idxs]

``````
``````

In [107]:

bins = np.logspace(-1, 1.8, 1000)

``````
``````

In [108]:

sim_vpf_n1 = VDF(halo_table, Ncut*10,cat, bins)

``````
``````

A 0.00191688537598
B 0.111558914185
C 0.577996969223
D 0.671555042267

/afs/slac.stanford.edu/u/ki/swmclau2/.local/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
app.launch_new_instance()

``````
``````

In [109]:

#interpolator, vpf = sim_vpf_n1
vpf = sim_vpf_n1

``````
``````

In [110]:

binc = (bins[1:] + bins[:-1]) / 2

``````
``````

In [111]:

plt.plot(binc, vpf);
#plt.xscale('log')

``````
``````

Out[111]:

[<matplotlib.lines.Line2D at 0x7f9c3b00f5d0>]

``````
``````

In [112]:

len(mass_idxs)

``````
``````

Out[112]:

10055159

``````
``````

In [124]:

for mass_cut in [1e14, 1e13,1e12,1e11]:
print mass_cut
cut_idxs = np.where(halo_masses>mass_cut)[0]
halo_table = cat.halocat.halo_table[cut_idxs]
print len(halo_table)
vpf = VDF(halo_table, Ncut*10,cat, bins)
plt.plot(binc, 1-vpf, label = r'\$Mass > 10^{%0.1f} M_{\odot}\$'%np.log10(mass_cut));
plt.legend(loc='best')
#plt.xscale('log')
plt.loglog()
plt.show();

``````
``````

1e+14
55532
1e+13
946302
1e+12
6453631
1e+11
10019345

/afs/slac.stanford.edu/u/ki/swmclau2/.local/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
app.launch_new_instance()

``````
``````

In [ ]:

``````