In [1]:
from itertools import islice
import tables as tb
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
tbf = tb.open_file("/global/scratch/ryee/symbol_count/agg_count.h5", "a")
table = tbf.root.count_table
In [2]:
len(table)
Out[2]:
In [5]:
rows = table.iterrows()
rows
Out[5]:
In [6]:
[(row['symbol_root'], row['count']) for row in islice(rows,5)]
Out[6]:
In [ ]:
# let's try reading the
In [7]:
tbf.close()
before indexing of columns:
ls -lt ~/gscratch/symbol_count/agg_count.h5
-rw-r--r-- 1 ryee ucb 139474786 Jul 26 10:32 /global/home/users/ryee/gscratch/symbol_count/agg_count.h5
In [ ]:
# index
# index1 = table.cols.date.create_index()
In [ ]:
# read the first couple of rows (like df.head)
In [8]:
tbf = tb.open_file("/global/scratch/ryee/symbol_count/agg_count.h5", "a")
table = tbf.root.count_table.read()
In [12]:
table['count'].sum()
Out[12]:
In [14]:
df = DataFrame(table)
df.head()
Out[14]:
In [16]:
df["count"].sum()
Out[16]:
In [25]:
cdf = DataFrame(df["date"].value_counts(), columns=['c'])
cdf['ts']= cdf.index
cdf.head()
Out[25]:
In [22]:
%matplotlib inline
In [26]:
cdf.plot(x='ts', y='c', kind='scatter')
Out[26]:
In [ ]: