In [3]:
import numpy as np
import pandas as pd
import pyarrow as pa
import feather as fth
type_ = np.dtype('float64')
DATA_SIZE = (1 << 30)
NCOLS = 100
NROWS = DATA_SIZE / NCOLS / np.dtype(type_).itemsize
data = {
'c' + str(i): np.random.randn(int(NROWS))
for i in range(int(NCOLS))
}
df = pd.DataFrame(data)
df[::5] = np.nan
In [4]:
fth.write_dataframe?
In [5]:
NFILES = 20
for i in range(NFILES):
fth.write_dataframe(df, 'df{0}.feather'.format(i))
In [9]:
def read_all(nfiles=NFILES, nthreads=1):
for i in range(nfiles):
path = 'df{0}.feather'.format(i)
fth.read_dataframe(path, nthreads=nthreads)
In [21]:
%time read_all(10)
In [22]:
%time read_all(10, nthreads=4)
In [16]:
fth.read_dataframe??
In [ ]: