In [2]:
%matplotlib inline
import glob
import matplotlib.pyplot as plt
import pandas as pd
import os
import glob
from enerpi.api import enerpi_log, enerpi_data_catalog
from enerpi.base import timeit
from enerpi.catalog import process_data
from prettyprinting import *
%load_ext memory_profiler
os.chdir('/Users/uge/Desktop/bkp_enerpidata/')
raw = pd.read_hdf('enerpi_data.h5', 'rms')
print_red(raw.head())
print_magenta(raw.head())
today = pd.read_hdf('CURRENT_MONTH/TODAY.h5', 'rms')
today.tail()
Out[2]:
In [2]:
log = enerpi_log()
In [6]:
log_last = log.loc['2016-10-01'].dropna(how='all', axis=1).drop(['temp', 'exec'], axis=1)
log_last = log_last[~log_last.msg.str.startswith('Tªs -->')]
log_last.tail(10)
Out[6]:
In [10]:
log_last[log_last.tipo == 'INFO'].msg.tolist()[0]
Out[10]:
In [16]:
log.loc['2016-10-01 1:48':'2016-10-01 3:55']
Out[16]:
In [27]:
log[(log.tipo == 'DEBUG') & log.msg.str.startswith('TIMEIT')].tail(7)
Out[27]:
In [3]:
base_path = '/Users/uge/Desktop/bkp_enerpidata/'
key_raw = 'rms'
CURRENT_MONTH = 'CURRENT_MONTH'
STORE_EXT = '.h5'
@timeit('_load_current_month', verbose=True)
def _load_current_month():
days_cm = sorted([p.replace(base_path + os.path.sep, '')
for p in glob.glob(os.path.join(base_path, CURRENT_MONTH, '*{}'.format(STORE_EXT)))])
df = pd.DataFrame(pd.concat([pd.read_hdf(p, key=key_raw) for p in days_cm], axis=0)) #.sort_index()
#dfs = [pd.read_hdf(p, key=key_raw) for p in days_cm]
#df = pd.read_hdf(days_cm[0], key=key_raw)
#for p in days_cm[1:]:
#df = df.append(pd.read_hdf(p, key=key_raw))
# df = pd.concat([df, pd.read_hdf(p, key=key_raw)], axis=0)
print_red('Current month data stats: {} rows, from {:%c} to {:%c}, index: unique={}, monotonic={}'
.format(df.shape[0], df.index[0], df.index[-1], df.index.is_unique, df.index.is_monotonic_increasing))
return df, days_cm
%memit df_month, days_month = _load_current_month()
df_month.info()
In [7]:
df_month.drop(['high_delta', 'execution'], axis=1).info()
In [4]:
raw.info()
In [ ]:
In [5]:
def _compress_data(data, verbose=False):
if data is not None:
if verbose:
data.info()
if not data.empty:
data = data.copy().astype('float32')
data['ref'] = data['ref'].astype('int16')
# data['ldr'] *= 1000.
data['ldr'] = pd.Series(1000. * data['ldr']).round(0).astype('int16')
if verbose:
data.info()
return data
%memit new_data = process_data(_compress_data(raw))
new_data.info()
In [ ]:
In [48]:
@timeit('_archive_month', verbose=True)
def _archive_month(new_data):
month, _old_stores = _load_current_month()
print_info('MONTH DATA: {}'.format(month.shape))
new_data = pd.DataFrame(pd.concat([month, new_data], axis=0)).sort_index().groupby(level=0).first()
print_ok('NEW FUSION DATA: {} --> all_dates={}, unique={}, monotonic_incr={}'
.format(new_data.shape, new_data.index.is_all_dates, new_data.index.is_unique, new_data.index.is_monotonic_increasing))
return new_data
%memit all_data = _archive_month(new_data)
all_data.info()
In [6]:
@timeit('_archive_month2', verbose=True)
def _archive_month2(new_data):
month, _old_stores = _load_current_month()
print_info('MONTH DATA: {}'.format(month.shape))
#new_data = month.append(new_data)
#new_data = pd.DataFrame(pd.concat([month, new_data], axis=0)).sort_index().groupby(level=0).first()
new_data = pd.DataFrame(pd.concat([month, new_data], axis=0))
print_ok('NEW FUSION DATA: {} --> all_dates={}, unique={}, monotonic_incr={}'
.format(new_data.shape, new_data.index.is_all_dates, new_data.index.is_unique, new_data.index.is_monotonic_increasing))
return new_data
%memit all_data = _archive_month2(new_data)
all_data.info()
In [27]:
%memit df_month.append(new_data)
In [21]:
DELTA_MAX_CALC_CONSUMO_SAMPLE_BFILL = 120
def _process_data(data, append_consumo=False):
consumo = None
if data is not None and not data.empty and (append_consumo or ('high_delta' not in data.columns)):
data = data.copy()
data['delta'] = pd.Series(data.index).diff().fillna(method='bfill').dt.total_seconds().values
data['high_delta'] = False
data['execution'] = False
data.loc[data['delta'] > 3, 'high_delta'] = True
data.loc[data['delta'] > 60, 'execution'] = 1
data['delta_consumo'] = data['delta'].apply(lambda x: min(x, DELTA_MAX_CALC_CONSUMO_SAMPLE_BFILL))
data['Wh'] = data.power * data.delta_consumo / 3600
if append_consumo:
resampler = data[['power', 'Wh', 'delta_consumo', 'high_delta', 'execution']].resample('1h', label='left')
consumo = pd.DataFrame(resampler['Wh'].sum().rename('kWh')).fillna(0.).astype('float32')
consumo /= 1000.
consumo['t_ref'] = pd.Series(resampler['delta_consumo'].sum() / 3600).astype('float32')
consumo['n_jump'] = resampler['high_delta'].sum().fillna(0).astype('int16')
consumo['n_exec'] = resampler['execution'].sum().fillna(0).astype('int32')
consumo['p_max'] = resampler['power'].max().round(0).astype('float16')
consumo['p_mean'] = resampler['power'].mean().round(0).astype('float16')
consumo['p_min'] = resampler['power'].min().round(0).astype('float16')
data['high_delta'] = data['high_delta'].astype(bool)
data['execution'] = data['execution'].astype(bool)
data.drop(['delta', 'delta_consumo', 'Wh'], axis=1, inplace=True)
if append_consumo:
return data, consumo
return data
elif append_consumo:
return data, None
return data
def _process_data_consumo(data):
consumo = None
if data is not None and not data.empty:
delta = pd.DataFrame(pd.Series(data.index, index=data.index, name='delta_consumo'
).diff().fillna(method='bfill').dt.total_seconds().apply(
lambda x: min(x, DELTA_MAX_CALC_CONSUMO_SAMPLE_BFILL)))
delta['Wh'] = data.power * delta.delta_consumo / 3600
resampler_data = data[['power', 'high_delta', 'execution']].resample('1h', label='left')
resampler_delta = delta[['Wh', 'delta_consumo']].resample('1h', label='left')
consumo = pd.DataFrame(resampler_delta['Wh'].sum().rename('kWh')).fillna(0.).astype('float32')
consumo /= 1000.
consumo['t_ref'] = pd.Series(resampler_delta['delta_consumo'].sum() / 3600).astype('float32')
consumo['n_jump'] = resampler_data['high_delta'].sum().fillna(0).astype('int16')
consumo['n_exec'] = resampler_data['execution'].sum().fillna(0).astype('int32')
consumo['p_max'] = resampler_data['power'].max().round(0).astype('float16')
consumo['p_mean'] = resampler_data['power'].mean().round(0).astype('float16')
consumo['p_min'] = resampler_data['power'].min().round(0).astype('float16')
return consumo
return None
%memit consumo = _process_data_consumo(all_data)
%memit all_data, consumo_1 = _process_data(all_data, append_consumo=True)
In [22]:
@timeit('archive_month_total', verbose=True)
def archive_month_total():
raw = pd.read_hdf('enerpi_data.h5', 'rms')
new_data = process_data(_compress_data(raw))
new_data.info()
all_data = _archive_month2(new_data)
all_data.info()
all_consumo = _process_data_consumo(all_data)
return all_data, all_consumo
%memit d1, d2 = archive_month_total()
d2.info()
In [ ]:
In [8]:
# pd.read_hdf('OLD_STORES/temp_debug_day_31.h5', 'rms').loc['2016-08-31']
'OLD_STORES/temp_debug_day_31.h5'
'OLD_STORES/temp_debug_month.h5'
#pd.read_hdf('OLD_STORES/temp_debug_month.h5', 'rms') #.loc['2016-08-31']
Out[8]:
In [21]:
from enerpi.base import CONFIG
#from enerpi.catalog import EnerpiCatalog
#from enerpi.pisampler import COL_TS, COLS_DATA
# Config:
DATA_PATH = os.path.expanduser(CONFIG.get('ENERPI_DATA', 'DATA_PATH'))
HDF_STORE = os.path.join(DATA_PATH, CONFIG.get('ENERPI_DATA', 'HDF_STORE'))
HDF_STORE, DATA_PATH
#catalog = init_catalog(base_path=DATA_PATH, raw_file=path_st)
Out[21]:
In [ ]:
## OSError: [Errno 12] Cannot allocate memory
# IN archive_periodic(self, new_data=None, reload_index=False):
# month, old_stores = self._load_current_month(with_summary_data=False)
# new_data = pd.DataFrame(pd.concat([month, new_data], axis=0)).sort_index().groupby(level=0).first()
In [ ]:
In [ ]:
In [ ]:
In [ ]: