In [2]:
import dask.dataframe as dd
def get_max_label(series):
return series.value_counts().index.values[0]
dd.
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-2-323ff376d3ba> in <module>()
5
6
----> 7 dd.read_csv('my_csvglob*.csv').groupby('imageid').agg({'label': get_max_label}).compute()
/home/benjamin/anaconda3/lib/python3.5/site-packages/dask/dataframe/csv.py in read_csv(urlpath, blocksize, chunkbytes, collection, lineterminator, compression, sample, enforce, storage_options, **kwargs)
220 sample=sample,
221 compression=compression,
--> 222 **(storage_options or {}))
223 if not isinstance(values[0], (tuple, list)):
224 values = [values]
/home/benjamin/anaconda3/lib/python3.5/site-packages/dask/bytes/core.py in read_bytes(urlpath, delimiter, not_zero, blocksize, sample, compression, **kwargs)
210 return read_bytes(storage_options.pop('path'), delimiter=delimiter,
211 not_zero=not_zero, blocksize=blocksize, sample=sample,
--> 212 compression=compression, **storage_options)
213
214
/home/benjamin/anaconda3/lib/python3.5/site-packages/dask/bytes/local.py in read_bytes(path, delimiter, not_zero, blocksize, sample, compression)
31 if '*' in path:
32 filenames = list(map(os.path.abspath, sorted(glob(path))))
---> 33 sample, first = read_bytes(filenames[0], delimiter, not_zero,
34 blocksize, sample=True,
35 compression=compression)
IndexError: list index out of range
In [ ]:
Content source: jaminsore/jaminsore.github.io
Similar notebooks: