accelQC is intended to provide basic QC functionality for checking raw data produced from 3D acelerometry devices
In [1]:
# analytics
import numpy as np
import pandas as pd
import scipy.io as scio
import scipy.signal as sp
import seaborn as sb
# utils
import os
In [2]:
# flags
TEST = True
VERBOSE = True
In [3]:
# test data path
# todo: flexible input
if TEST :
datapath = os.path.abspath("test_data/")
else :
datapath = os.getcwd()
print(datapath)
In [44]:
if TEST :
# read in test file, indexing on first two columns (src and input file)
accel = pd.read_csv(os.path.join(datapath, 'test_data_full.tsv'), header=0, sep="\t", na_filter=False)
accel.reset_index()
accel.fillna('', inplace=True)
accel.set_index(['src', 'file'], inplace=True)
else :
#TODO
print()
accel.info()
In [45]:
## reduce size of test dataset for simplicity
## take first 2 files from each source (if more than two exist)
if TEST :
accel = accel.ix[[val for sublist in accel.groupby(level=[0]).groups.values() for val in pd.Series(sublist).unique()[:2]]]
accel.info()
In [46]:
## summary of files present in each source
accel.reset_index(level="file").groupby(level="src").aggregate({'file' : lambda x : len(np.unique(x))})
Out[46]:
In [47]:
## summary of datasets per input source and file
accel.groupby(level=['src','file']).aggregate({
't' : lambda x: len(x), # number of rows
'x' : lambda x: np.max(x) - np.min(x), # range
'y' : lambda x: np.max(x) - np.min(x), # range
'z' : lambda x: np.max(x) - np.min(x) # range
})
Out[47]:
In [ ]:
# http://docs.scipy.org/doc/scipy-dev/reference/generated/scipy.signal.welch.html#scipy.signal.welch