In [1]:
import modin.pandas as pd
import numpy as np
from sys import getsizeof


WARNING: Not updating worker name since `setproctitle` is not installed. Install this with `pip install setproctitle` (or ray[debug]) to enable monitoring of worker processes.
Process STDOUT and STDERR is being redirected to /tmp/ray/session_2018-12-27_20-44-34_6234/logs.
Waiting for redis server at 127.0.0.1:60333 to respond...
Waiting for redis server at 127.0.0.1:17171 to respond...
Starting the Plasma object store with 13.497912524 GB memory using /dev/shm.

In [2]:
arr = np.random.randint(0, 999, size=(2**10, 2**16))
print(getsizeof(arr)/1024**2 , 'MB')


512.0001068115234 MB

In [3]:
%%time

df = pd.DataFrame(arr)
aggrs = df.agg(['min', 'max', 'std', 'median'])
print(df.info(), aggrs.shape, df.shape)
print(getsizeof(df)/1024**2, 'Mb')


/home/dex/anaconda3/lib/python3.6/site-packages/modin/pandas/dataframe.py:759: UserWarning: User-defined function verification with DataFrame dtypes is still under development. Should be fully functional in a future release.
  UserWarning,
/home/dex/anaconda3/lib/python3.6/site-packages/modin/pandas/dataframe.py:5017: UserWarning: Defaulting to Pandas implementation
  warnings.warn("Defaulting to Pandas implementation", UserWarning)
<class 'modin.pandas.dataframe.DataFrame'>
RangeIndex: 1024 entries, 0 to 1023
Columns: 65536 entries, 0 to 65535
dtypes: int64(65536)
memory usage: 512.0 MB
None (4, 65536) (1024, 65536)
512.0000991821289 Mb
CPU times: user 33.2 s, sys: 1.73 s, total: 35 s
Wall time: 21.6 s

In [4]:
import pandas as pd
import numpy as np
from sys import getsizeof

In [5]:
%%time

df = pd.DataFrame(arr)
aggrs = df.agg(['min', 'max', 'std', 'median'])
print(df.info(), aggrs.shape, df.shape)
print(getsizeof(df)/1024**2, 'Mb')


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1024 entries, 0 to 1023
Columns: 65536 entries, 0 to 65535
dtypes: int64(65536)
memory usage: 512.0 MB
None (4, 65536) (1024, 65536)
512.0000991821289 Mb
CPU times: user 1min 20s, sys: 301 ms, total: 1min 20s
Wall time: 1min 20s

In [ ]: