In [2]:
import dask.dataframe as dd

def get_max_label(series):
    return series.value_counts().index.values[0]
    

dd.


---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-2-323ff376d3ba> in <module>()
      5 
      6 
----> 7 dd.read_csv('my_csvglob*.csv').groupby('imageid').agg({'label': get_max_label}).compute()

/home/benjamin/anaconda3/lib/python3.5/site-packages/dask/dataframe/csv.py in read_csv(urlpath, blocksize, chunkbytes, collection, lineterminator, compression, sample, enforce, storage_options, **kwargs)
    220                                          sample=sample,
    221                                          compression=compression,
--> 222                                          **(storage_options or {}))
    223     if not isinstance(values[0], (tuple, list)):
    224         values = [values]

/home/benjamin/anaconda3/lib/python3.5/site-packages/dask/bytes/core.py in read_bytes(urlpath, delimiter, not_zero, blocksize, sample, compression, **kwargs)
    210     return read_bytes(storage_options.pop('path'), delimiter=delimiter,
    211             not_zero=not_zero, blocksize=blocksize, sample=sample,
--> 212             compression=compression, **storage_options)
    213 
    214 

/home/benjamin/anaconda3/lib/python3.5/site-packages/dask/bytes/local.py in read_bytes(path, delimiter, not_zero, blocksize, sample, compression)
     31     if '*' in path:
     32         filenames = list(map(os.path.abspath, sorted(glob(path))))
---> 33         sample, first = read_bytes(filenames[0], delimiter, not_zero,
     34                                    blocksize, sample=True,
     35                                    compression=compression)

IndexError: list index out of range

In [ ]: