In [1]:
import tempfile, os
import numpy
startdir = os.path.abspath('.')
tmpdir = tempfile.mkdtemp()
os.chdir(tmpdir)

numpy.random.seed(42)

In [2]:
from nbodykit.lab import *
cat = UniformCatalog(nbar=100, BoxSize=1.0, seed=42)


/home/yfeng1/anaconda3/install/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

In [3]:
position = cat['Position']
velocity = cat['Velocity']

print(position)
print(velocity)


dask.array<array, shape=(96, 3), dtype=float64, chunksize=(96, 3)> first: [0.45470105 0.83263203 0.06905134] last: [0.62474599 0.15388738 0.84302209]
dask.array<array, shape=(96, 3), dtype=float64, chunksize=(96, 3)> first: [0.0006346  0.00675438 0.00704942] last: [0.00375581 0.00046149 0.00819726]

In [4]:
# normalize the position
normed_position = position / cat.attrs['BoxSize']

print(normed_position)


dask.array<truediv, shape=(96, 3), dtype=float64, chunksize=(96, 3)>

In [5]:
position, velocity = cat.compute(cat['Position'], cat['Velocity'])

print(type(position))
print(type(velocity))


<class 'numpy.ndarray'>
<class 'numpy.ndarray'>

In [6]:
maxpos = normed_position.max(axis=0)
print(maxpos)

print(cat.compute(maxpos))


dask.array<amax-aggregate, shape=(3,), dtype=float64, chunksize=(3,)>
[0.9927406  0.99610592 0.99925086]

In [7]:
# no "Mass" column originally
print("contains 'Mass'? :", 'Mass' in cat)

# add a random array as the "Mass" column
cat['Mass'] = numpy.random.random(size=len(cat))

# "Mass" exists!
print("contains 'Mass'? :", 'Mass' in cat)

# can also add scalar values -- converted to correct length
cat['Type'] = b"central"

print(cat['Mass'])
print(cat['Type'])


contains 'Mass'? : False
contains 'Mass'? : True
dask.array<array, shape=(96,), dtype=float64, chunksize=(96,)> first: 0.3745401188473625 last: 0.49379559636439074
dask.array<array, shape=(96,), dtype=|S7, chunksize=(96,)> first: b'central' last: b'central'

In [8]:
# some fake data
data = numpy.ones(5, dtype=[
      ('Position', ('f4', 3)),
      ('Velocity', ('f4', 3))]
      )

# initialize a catalog directly from the structured array
src = ArrayCatalog(data)

# overwrite the Velocity column
src['Velocity'] = src['Position'] + src['Velocity'] # 1 + 1 = 2

# overwrite the Position column
src['Position'] = src['Position'] + src['Velocity'] # 1 + 2 = 3

print("Velocity = ", src.compute(src['Velocity'])) # all equal to 2
print("Position = ", src.compute(src['Position'])) # all equal to 3


Velocity =  [[2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]]
Position =  [[3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]]

In [9]:
# apply RSD along the z axis
line_of_sight = [0,0,1]

# redshift and cosmology
redshift =  0.55; cosmo = cosmology.Cosmology(h=0.7).match(Omega0_m=0.31)

# the RSD normalization factor
rsd_factor = (1+redshift) / (100 * cosmo.efunc(redshift))

# update Position, applying RSD
src['Position'] = src['Position'] + rsd_factor * src['Velocity'] * line_of_sight

In [10]:
# boolean selection array
select = cat['Mass'] < 0.5
print("number of True entries = ", cat.compute(select.sum()))

# select only entries where select = True
subcat = cat[select]

print("size of subcat = ", subcat.size)

# select the first ten rows
subcat = cat[:10]
print("size of subcat = ", subcat.size)

# select first and last row
subcat = cat[[0, -1]]
print("size of subcat = ", subcat.size)


number of True entries =  50
size of subcat =  50
size of subcat =  10
size of subcat =  2

In [11]:
print("columns in catalog = ", cat.columns)

# select Position + Mass
subcat = cat[['Position', 'Mass']]

# the selected columns + default columns
print("columns in subset = ", subcat.columns)


columns in catalog =  ['Mass', 'Position', 'Selection', 'Type', 'Value', 'Velocity', 'Weight']
columns in subset =  ['Mass', 'Position', 'Selection', 'Value', 'Weight']

In [12]:
cat1 = UniformCatalog(nbar=50, BoxSize=1.0, seed=42)
cat2 = UniformCatalog(nbar=150, BoxSize=1.0, seed=42)

combined = transform.ConcatenateSources(cat1, cat2)

print("total size = %d + %d = %d" %(cat1.size, cat2.size, combined.size))


total size = 47 + 145 = 192

In [13]:
# fake position data
data = numpy.random.random(size=(5,3))

# save to a plaintext file
numpy.savetxt('csv-example.dat', data, fmt='%.7e')

# the cartesian coordinates
names =['x', 'y', 'z']

# read the data
f = CSVCatalog('csv-example.dat', names)

# make the "Position" column
f['Position'] =  transform.StackColumns(f['x'], f['y'], f['z'])

print(f['Position'])
print(f.compute(f['Position']))


dask.array<transpose, shape=(5, 3), dtype=float64, chunksize=(5, 1)> first: [0.52273283 0.42754102 0.02541913] last: [0.22879817 0.07697991 0.28975145]
[[0.52273283 0.42754102 0.02541913]
 [0.10789143 0.03142919 0.63641041]
 [0.31435598 0.50857069 0.90756647]
 [0.24929223 0.41038292 0.75555114]
 [0.22879817 0.07697991 0.28975145]]

In [14]:
src = RandomCatalog(100, seed=42)

# add random (ra, dec, z) coordinates
src['z'] = src.rng.normal(loc=0.5, scale=0.1)
src['ra'] = src.rng.uniform(low=0, high=360)
src['dec'] = src.rng.uniform(low=-180, high=180.)

# initialize a set of cosmology parameters
cosmo = cosmology.Cosmology(h=0.7)

# add the position
src['Position'] = transform.SkyToCartesian(src['ra'], src['dec'], src['z'], degrees=True, cosmo=cosmo)

In [15]:
ra = transform.da.deg2rad(src['ra']) # from degrees to radians
sin_ra = transform.da.sin(ra) # compute the sine

print("min(sin(ra)) = ", src.compute(sin_ra.min()))
print("max(sin(ra)) = ", src.compute(sin_ra.max()))


min(sin(ra)) =  -0.999907640154132
max(sin(ra)) =  0.9988053754673173

In [16]:
import shutil
os.chdir(startdir)
shutil.rmtree(tmpdir)