In [2]:
import pandas as pd
import numpy as np
import tables
import frogress
print('pandas {}'.format(pd.__version__))
print('numpy {}'.format(np.__version__))
print('tables {}'.format(tables.__version__))
print('frogress {}'.format(frogress.__version__))
In [9]:
!head -n 3 data/testSearchStream.tsv
In [22]:
!head -n 3 data/trainSearchStream.tsv
In [3]:
filename = 'data/trainSearchStream.tsv'
n=4000
with pd.HDFStore(filename.replace('tsv', 'hd5'),mode='w') as store:
for chunk in frogress.Bar(pd.read_csv(filename,chunksize=n, sep='\t'), steps=(! wc -l filename)//n):
store.append('df',chunk)
In [4]:
s = pd.HDFStore('data/trainSearchStream.hd5')
s
Out[4]:
In [12]:
s.get_storer('df').attrs
Out[12]:
In [ ]: