In [1]:
from dask.distributed import Client
client = Client("tcp://127.0.0.1:33705")
client
Out[1]:
In [2]:
import dask
import dask.dataframe as dd
from random import randint
from time import sleep
In [3]:
# This generally seems terrible pretty sure I shouldn't be using "sleep" for this demonstration
def short_running_func():
print('short') # If using dask these print statements are likely in your console
sleep(0.1)
return 'short'
def time_out_func():
print('long') # If using dask these print statements are likely in your console
result = ''
try:
sleep(0.5)
raise TimeoutError()
result = 'long'
except TimeoutError as e:
result = 'timeout'
return result
def process_column(value):
result = None
# Randomly pick short or long to demonstrate
pick = randint(0, 1)
if pick == 0:
result = short_running_func()
else:
result = time_out_func()
return result
In [8]:
# Keep it small for now
df = dask.datasets.timeseries().reset_index()
df = df.loc[0:100,:]
df.head()
Out[8]:
In [9]:
df['process'] = df['name'].apply(process_column, meta='O')
%time df.head()
Out[9]:
In [ ]: