Download pre-prepared data from here:
These imports are needed to run the full set of examples
In [1]:
# Complete set of Python 3.6 imports used for these examples
# Standard modules
import io
import logging
import lzma
import multiprocessing
import os
import ssl
import sys
import time
import urllib.request
import zipfile
# Third-party modules
import fastparquet # Needs python-snappy and llvmlite
import graphviz # To visualize Dask graphs
import numpy as np
import pandas as pd
import psutil # Memory stats
import dask
import dask.dataframe as dd
import bokeh.io # For Dask profile graphs
import seaborn as sns # For colormaps
# Support multiple lines of output in each cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# Don't wrap tables
pd.options.display.max_rows = 20
pd.options.display.max_columns = 20
pd.options.display.width = 300
# Show matplotlib and bokeh graphs inline in Jupyter notebook
%matplotlib inline
bokeh.io.output_notebook()
print(sys.version)
np.__version__, pd.__version__, dask.__version__
Out[1]:
In [143]:
task = ddf.head(n=2, npartitions=2, compute=False)
In [91]:
task.visualize()
Out[91]:
In [92]:
task.dask
Out[92]:
In [142]:
task._keys()
Out[142]:
In [ ]:
In [ ]:
In [ ]:
In [65]:
print(pd.DataFrame.__doc__)
In [66]:
print(dd.DataFrame.__doc__)
In [ ]:
dd.from_pandas()
In [ ]:
In [52]:
ddf = dd.from_pandas(df, chunksize=2)
task = ddf[ddf.a>2]
In [53]:
task.compute()
Out[53]:
In [54]:
task.visualize()
Out[54]:
In [55]:
print(dd.DataFrame.__doc__)
In [56]:
task._meta
Out[56]:
In [57]:
task.npartitions
task.divisions
Out[57]:
Out[57]:
In [58]:
task._name
Out[58]:
In [59]:
task.dask
Out[59]:
In [60]:
task.dask[(task._name,0)]
Out[60]:
In [61]:
task.dask[(task._name,1)]
Out[61]:
In [62]:
task.compute??
In [48]:
task2.compute()
Out[48]:
In [49]:
task2.visualize()
Out[49]:
In [51]:
task2.dask[(task2._name,0)]
Out[51]:
In [ ]: