In [1]:
pip show pandas dask
In [2]:
import numpy as np
import pandas as pd
from dask import dataframe as dd
In [3]:
df = pd.DataFrame(
np.random.randint(0,100,size=(9000000,26)),
columns=list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
)
ddf = dd.from_pandas(df, 10)
In [5]:
ddf['useless_letter'] = ddf['A'].apply(lambda x: np.random.choice(list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')))
ddf['alphabet'] = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
out_columns = ddf.columns.tolist()
out_columns.reverse()
ddf = ddf[out_columns]
In [6]:
ddf.head()
Out[6]:
In [8]:
%%time
ddf.to_csv('sample_data.csv', single_file=True)
Out[8]:
In [9]:
%%time
ddf.to_parquet('sample_data_parquet')
In [4]:
In [ ]:
In [ ]: