In [3]:
%matplotlib inline
In [4]:
import dask.dataframe as dd
import glob
import os
In [5]:
filenames = [ os.path.splitext(wholeFilename)[0] for wholeFilename in
[ os.path.basename(wholePath) for wholePath in glob.glob("../input/2*.xlsx") ] ]
In [6]:
dataFiles = dd.DataFrame({"filename": filenames})
dataFiles["year"], dataFiles["pollutant"], dataFiles["resolution"] = dataFiles["filename"].str.split('_', 2).str
In [ ]: