In [ ]:
%load_ext autoreload

In [ ]:
%autoreload 2

In [ ]:
import os
import time
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cellpy
from cellpy import cellreader

%matplotlib inline

In [ ]:
from cellpy import log
log.setup_logging(default_level="INFO", custom_log_dir=os.getcwd())

In [ ]:
filename = Path("/Users/jepe/Arbeid/Data/celldata/20171120_nb034_11_cc.h5")
print(f"size: {filename.stat().st_size/1_048_576}")

my_data = cellreader.CellpyData()
my_data.load(filename)
dataset = my_data.dataset
dataset.steps.head()
print(dataset.steps.columns)

In [ ]:
dataset.raw.columns

In [ ]:
dataset.raw.head(10)

Some notes

  • should rename the tables consistently
    • e.g. dfsummary, dfdata, dfinfo, dfsteps, dffid
    • have to take care so that it also can read "old" cellpy-files
  • should make (or check if it is already made) an option for giving a "custom" config-file in starting the session

In [ ]:
my_data.make_step_table()

In [ ]:
filename2 = Path("/Users/jepe/Arbeid/Data/celldata/20171120_nb034_11_cc.nh5")
my_data.save(filename2)

In [ ]:
print(f"size: {filename2.stat().st_size/1_048_576} MB")

In [ ]:
my_data2 = cellreader.CellpyData()
my_data2.load(filename2)
dataset2 = my_data2.dataset
print(dataset2.steps.columns)
del my_data2
del dataset2

In [ ]:
# next: dont load the full hdf5-file, only get datapoints for a cycle from step_table
# then: query the hdf5-file for the data (and time it)
# ex: store.select('/CellpyData/dfdata', "data_point>20130104 & data_point<20130104 & columns=['A', 'B']")

In [ ]:
infoname = '/CellpyData/info'
dataname = '/CellpyData/dfdata'
summaryname = '/CellpyData/dfsummary'
fidname = '/CellpyData/fidtable'
stepname = '/CellpyData/step_table'

store = pd.HDFStore(filename2)
store.select('/CellpyData/dfdata', where="index>21 and index<32")
store.select('/CellpyData/dfdata', "index>21 & index<32 & columns=['Test_Time', 'Step_Index']")

Querying cellpy file (hdf5)

  1. load steptable
  2. get the stepnumbers for given cycle
  3. create query and run it
  4. scale the charge (100_000/mass)

In [ ]:
steptable = store.select(stepname)

In [ ]:
s = my_data.get_step_numbers(
    steptype='charge',
    allctypes=True,
    pdtype=True,
    cycle_number=None,
    steptable=steptable
)
cycle_mask = (s["cycle"] == 2) # also possible to give cycle_number in get_step_number instead

In [ ]:
s.head()

In [ ]:
a = s.loc[cycle_mask, ["point_first", "point_last"]].values[0]

In [ ]:
v_hdr = "Voltage"
c_hdr = "Charge_Capacity"
d_hdr = "Discharge_Capacity"
i_hdr = "Current"

In [ ]:
q = f"index>={ a[0] } & index<={ a[1] }"

In [ ]:
q += f"& columns = ['{c_hdr}', '{v_hdr}']"

In [ ]:
mass = dataset.mass
print(f"mass from dataset.mass = {mass:5.4} mg")

In [ ]:
%%timeit
my_data.get_ccap(2)

In [ ]:
%%timeit
c2 = store.select('/CellpyData/dfdata', q)
c2[c_hdr] = c2[c_hdr] * 1000000 / mass

In [ ]:
5.03/3.05

Result

  • 65% penalty for using "hdf5" query lookup
  • 5.03 vs 3.05 ms

In [ ]:
plt.plot(c2[c_hdr], c2[v_hdr])

In [ ]:
store.close()

In [ ]: