notebook.community

Edit and run



In [1]:

    
# this cell is hidden
import tempfile, os
startdir = os.path.abspath('.')
tmpdir = tempfile.mkdtemp()
os.chdir(tmpdir)



In [2]:

    
import numpy
from nbodykit.source.catalog import CSVCatalog

# generate some fake ASCII data
data = numpy.random.random(size=(100,5))

# save to a plaintext file
numpy.savetxt('csv-example.txt', data, fmt='%.7e')

# name each of the 5 input columns
names =['x', 'y', 'z', 'w', 'v']

# read the data
f = CSVCatalog('csv-example.txt', names)

# combine x, y, z to Position, and add boxsize
f['Position'] = f['x'][:, None] * [1, 0, 0] + f['y'][:, None] * [0, 1, 0] + f['z'][:, None] * [0, 0, 1]
f.attrs['BoxSize'] = 1.0

print(f)
print("columns = ", f.columns) # default Weight,Selection also present
print("total size = ", f.csize)









    



CSVCatalog(size=100, FileStack(CSVFile(path=/tmp/tmpedljiijj/csv-example.txt, dataset=*, ncolumns=5, shape=(100,)>, ... 1 files))
columns =  ['Position', 'Selection', 'Value', 'Weight', 'v', 'w', 'x', 'y', 'z']
total size =  100



In [3]:

    
from nbodykit.source.catalog import BinaryCatalog

# generate some fake data and save to a binary file
with open('binary-example.dat', 'wb') as ff:
    pos = numpy.random.random(size=(1024, 3)) # fake Position column
    vel = numpy.random.random(size=(1024, 3)) # fake Velocity column
    pos.tofile(ff); vel.tofile(ff); ff.seek(0)

# create the binary catalog
f = BinaryCatalog(ff.name, [('Position', ('f8', 3)), ('Velocity', ('f8', 3))], size=1024)

print(f)
print("columns = ", f.columns) # default Weight,Selection also present
print("total size = ", f.csize)









    



BinaryCatalog(size=1024, FileStack(BinaryFile(path=/tmp/tmpedljiijj/binary-example.dat, dataset=*, ncolumns=2, shape=(1024,)>, ... 1 files))
columns =  ['Position', 'Selection', 'Value', 'Velocity', 'Weight']
total size =  1024



In [4]:

    
import h5py
from nbodykit.source.catalog import HDFCatalog

# generate some fake data
dset = numpy.empty(1024, dtype=[('Position', ('f8', 3)), ('Mass', 'f8')])
dset['Position'] = numpy.random.random(size=(1024, 3))
dset['Mass'] = numpy.random.random(size=1024)

# write to a HDF5 file
with h5py.File('hdf-example.hdf5' , 'w') as ff:
    ff.create_dataset('Data1', data=dset)
    grp = ff.create_group('Data2')
    grp.create_dataset('Position', data=dset['Position']) # column as dataset
    grp.create_dataset('Mass', data=dset['Mass']) # column as dataset

# intitialize the catalog
f = HDFCatalog('hdf-example.hdf5')

print(f)
print("columns = ", f.columns) # default Weight,Selection also present
print("total size = ", f.csize)









    



HDFCatalog(size=1024, FileStack(HDFFile(path=/tmp/tmpedljiijj/hdf-example.hdf5, dataset=/, ncolumns=4, shape=(1024,)>, ... 1 files))
columns =  ['Data1/Mass', 'Data1/Position', 'Data2/Mass', 'Data2/Position', 'Selection', 'Value', 'Weight']
total size =  1024



In [5]:

    
import bigfile
from nbodykit.source.catalog import BigFileCatalog

# generate some fake data
data = numpy.empty(512, dtype=[('Position', ('f8', 3)), ('Velocity', ('f8',3))])
data['Position'] = numpy.random.random(size=(512, 3))
data['Velocity'] = numpy.random.random(size=(512,3))

# save fake data to a BigFile
with bigfile.BigFile('bigfile-example', create=True) as tmpff:
    with tmpff.create("Position", dtype=('f4', 3), size=512) as bb:
        bb.write(0, data['Position'])
    with tmpff.create("Velocity", dtype=('f4', 3), size=512) as bb:
        bb.write(0, data['Velocity'])
    with tmpff.create("Header") as bb:
        bb.attrs['Size'] = 512.

# initialize the catalog
f = BigFileCatalog('bigfile-example', header='Header')

print(f)
print("columns = ", f.columns) # default Weight,Selection also present
print("total size = ", f.csize)









    



BigFileCatalog(size=512, FileStack(BigFile(path=/tmp/tmpedljiijj/bigfile-example, dataset=./, ncolumns=2, shape=(512,)>, ... 1 files))
columns =  ['Position', 'Selection', 'Value', 'Velocity', 'Weight']
total size =  512



In [6]:

    
import fitsio
from nbodykit.source.catalog import FITSCatalog

# generate some fake data
dset = numpy.empty(1024, dtype=[('Position', ('f8', 3)), ('Mass', 'f8')])
dset['Position'] = numpy.random.random(size=(1024, 3))
dset['Mass'] = numpy.random.random(size=1024)

# write to a FITS file using fitsio
fitsio.write('fits-example.fits', dset, extname='Data')

# initialize the catalog
f = FITSCatalog('fits-example.fits', ext='Data')

print(f)
print("columns = ", f.columns) # default Weight,Selection also present
print("total size = ", f.csize)









    



FITSCatalog(size=1024, FileStack(FITSFile(path=/tmp/tmpedljiijj/fits-example.fits, dataset=Data, ncolumns=2, shape=(1024,)>, ... 1 files))
columns =  ['Mass', 'Position', 'Selection', 'Value', 'Weight']
total size =  1024



In [7]:

    
# generate data
data = numpy.random.random(size=(100,5))

# save first 40 rows of data to file
numpy.savetxt('csv-example-1.txt', data[:40], fmt='%.7e')

# save the remaining 60 rows to another file
numpy.savetxt('csv-example-2.txt', data[40:], fmt='%.7e')



In [8]:

    
# the names of the columns in both files
names =['a', 'b', 'c', 'd', 'e']

# read with a glob pattern
f = CSVCatalog('csv-example-*', names)

print(f)

# combined catalog size is 40+60=100
print("total size = ", f.csize)









    



CSVCatalog(size=100, FileStack(CSVFile(path=/tmp/tmpedljiijj/csv-example-1.txt, dataset=*, ncolumns=5, shape=(40,)>, ... 2 files))
total size =  100



In [9]:

    
# the names of the columns in both files
names =['a', 'b', 'c', 'd', 'e']

# read with a list of the file names
f = CSVCatalog(['csv-example-1.txt', 'csv-example-2.txt'], names)

print(f)

# combined catalog size is 40+60=100
print("total size = ", f.csize)









    



CSVCatalog(size=100, FileStack(CSVFile(path=csv-example-1.txt, dataset=*, ncolumns=5, shape=(40,)>, ... 2 files))
total size =  100



In [10]:

    
from nbodykit.source.catalog import ArrayCatalog

# generate the fake data
data = numpy.empty(1024, dtype=[('Position', ('f8', 3)), ('Mass', 'f8')])
data['Position'] = numpy.random.random(size=(1024, 3))
data['Mass'] = numpy.random.random(size=1024)

# save to a npy file
numpy.save("npy-example.npy", data)

data = numpy.load("npy-example.npy")

# initialize the catalog
f = ArrayCatalog(data)

print(f)
print("columns = ", f.columns) # default Weight,Selection also present
print("total size = ", f.csize)


f = ArrayCatalog({'Position' : data['Position'], 'Mass' : data['Mass'] })

print(f)
print("columns = ", f.columns) # default Weight,Selection also present
print("total size = ", f.csize)









    



ArrayCatalog(size=1024)
columns =  ['Mass', 'Position', 'Selection', 'Value', 'Weight']
total size =  1024
ArrayCatalog(size=1024)
columns =  ['Mass', 'Position', 'Selection', 'Value', 'Weight']
total size =  1024



In [11]:

    
from nbodykit.io.base import FileType

class NPYFile(FileType):
    """
    A file-like object to read numpy ``.npy`` files
    """
    def __init__(self, path):
        self.path = path
        self.attrs = {}
        # load the data and set size and dtype
        self._data = numpy.load(self.path)
        self.size = len(self._data) # total size
        self.dtype = self._data.dtype # data dtype
        
    def read(self, columns, start, stop, step=1):
        """
        Read the specified column(s) over the given range
        """
        return self._data[start:stop:step]



In [12]:

    
from nbodykit.source.catalog.file import FileCatalogFactory

NPYCatalog = FileCatalogFactory('NPYCatalog', NPYFile)



In [13]:

    
# generate the fake data
data = numpy.empty(1024, dtype=[('Position', ('f8', 3)), ('Mass', 'f8')])
data['Position'] = numpy.random.random(size=(1024, 3))
data['Mass'] = numpy.random.random(size=1024)

# save to a npy file
numpy.save("npy-example.npy", data)

# and now load the data
f = NPYCatalog("npy-example.npy")

print(f)
print("columns = ", f.columns) # default Weight,Selection also present
print("total size = ", f.csize)









    



NPYCatalog(size=1024, FileStack(NPYFile(path=/tmp/tmpedljiijj/npy-example.npy, dataset=None, ncolumns=2, shape=(1024,)>, ... 1 files))
columns =  ['Mass', 'Position', 'Selection', 'Value', 'Weight']
total size =  1024



In [14]:

    
import shutil
os.chdir(startdir)
shutil.rmtree(tmpdir)