In [2]:
%matplotlib inline
import glob
import matplotlib.pyplot as plt
import pandas as pd
import os
import glob
from enerpi.api import enerpi_log, enerpi_data_catalog
from enerpi.base import timeit
from enerpi.catalog import process_data
from prettyprinting import *

%load_ext memory_profiler
os.chdir('/Users/uge/Desktop/bkp_enerpidata/')

raw = pd.read_hdf('enerpi_data.h5', 'rms')
print_red(raw.head())
print_magenta(raw.head())

today = pd.read_hdf('CURRENT_MONTH/TODAY.h5', 'rms')
today.tail()


The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler
                                 power     noise   ref       ldr
ts                                                              
2016-09-30 23:47:08.822574  409.085131  0.007980  82.0  0.572637
2016-09-30 23:47:09.823459  409.990919  0.007897  83.0  0.572953
2016-09-30 23:47:10.834189  405.138188  0.007908  84.0  0.572625
2016-09-30 23:47:11.839873  398.097771  0.007941  83.0  0.571583
2016-09-30 23:47:12.839493  390.990458  0.007993  83.0  0.572979
                                 power     noise   ref       ldr
ts                                                              
2016-09-30 23:47:08.822574  409.085131  0.007980  82.0  0.572637
2016-09-30 23:47:09.823459  409.990919  0.007897  83.0  0.572953
2016-09-30 23:47:10.834189  405.138188  0.007908  84.0  0.572625
2016-09-30 23:47:11.839873  398.097771  0.007941  83.0  0.571583
2016-09-30 23:47:12.839493  390.990458  0.007993  83.0  0.572979
Out[2]:
power noise ref ldr high_delta execution
ts
2016-09-30 23:47:03.784795 415.364471 0.008150 84 574 False False
2016-09-30 23:47:04.795868 411.964203 0.008085 84 574 False False
2016-09-30 23:47:05.797008 419.902405 0.008139 83 574 False False
2016-09-30 23:47:06.808354 407.850464 0.008171 84 574 False False
2016-09-30 23:47:07.818927 398.742889 0.008107 84 574 False False

In [2]:
log = enerpi_log()


                 msg    temp  debug_send  no_red
exec tipo                                       
0    INFO          7       0           0       0
     WARNING       1       0           0       1
1    DEBUG     66350   65800           0       0
     INFO         11       0           0       0
     WARNING       1       0           0       1
2    DEBUG     53834   53243           0       0
     INFO         11       0           0       0
     WARNING       1       0           0       1
3    DEBUG    700514  694649           0       0
     INFO         36       0           0       0
     WARNING      13       0           0      13
                     tipo                                                msg  \
ts                                                                             
2016-09-02 14:01:37  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-02 14:01:37  INFO                                   Iniciando DAEMON   
2016-09-02 14:01:54  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-02 14:01:54  INFO                                   Iniciando DAEMON   
2016-09-02 14:01:54  INFO                       ENERPI Logger daemon started   
2016-09-02 14:01:54  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-02 14:01:54  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-02 14:01:54  INFO              Init ENERPI logging & broadcasting...   
2016-09-02 15:02:24  INFO  ** ARCHIVE MONTH: ['CURRENT_MONTH/TODAY.h5', '...   
2016-09-03 00:06:37  INFO                                     ** ARCHIVE DAY   
2016-09-04 00:18:06  INFO                                     ** ARCHIVE DAY   
2016-09-04 21:18:20  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-04 21:18:20  INFO                                   Iniciando DAEMON   
2016-09-04 21:20:30  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-04 21:20:30  INFO                                   Iniciando DAEMON   
2016-09-04 21:20:30  INFO                       ENERPI Logger daemon started   
2016-09-04 21:20:30  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-04 21:20:30  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-04 21:20:30  INFO              Init ENERPI logging & broadcasting...   
2016-09-05 00:21:57  INFO                                     ** ARCHIVE DAY   
2016-09-06 00:33:40  INFO                                     ** ARCHIVE DAY   
2016-09-06 18:04:39  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-06 18:04:39  INFO                                   Iniciando DAEMON   
2016-09-06 18:06:07  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-06 18:07:57  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-06 18:07:57  INFO                                   Iniciando DAEMON   
2016-09-06 18:07:57  INFO                       ENERPI Logger daemon started   
2016-09-06 18:07:57  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-06 18:07:57  INFO                   ︎⚡⚡ ︎ENERPI AC CURRENT SENSOR ⚡⚡   
2016-09-06 18:07:57  INFO              Init ENERPI logging & broadcasting...   
...                   ...                                                ...   
2016-09-12 00:57:09  INFO                                     ** ARCHIVE DAY   
2016-09-13 00:05:51  INFO                                     ** ARCHIVE DAY   
2016-09-14 00:14:57  INFO                                     ** ARCHIVE DAY   
2016-09-15 00:23:55  INFO                                     ** ARCHIVE DAY   
2016-09-16 00:33:02  INFO                                     ** ARCHIVE DAY   
2016-09-17 00:41:59  INFO                                     ** ARCHIVE DAY   
2016-09-18 00:51:03  INFO                                     ** ARCHIVE DAY   
2016-09-19 01:00:08  INFO                                     ** ARCHIVE DAY   
2016-09-20 00:08:47  INFO                                     ** ARCHIVE DAY   
2016-09-21 00:17:52  INFO                                     ** ARCHIVE DAY   
2016-09-22 00:26:53  INFO                                     ** ARCHIVE DAY   
2016-09-23 00:35:51  INFO                                     ** ARCHIVE DAY   
2016-09-24 00:44:51  INFO                                     ** ARCHIVE DAY   
2016-09-25 00:53:46  INFO                                     ** ARCHIVE DAY   
2016-09-26 00:02:25  INFO                                     ** ARCHIVE DAY   
2016-09-27 00:11:28  INFO                                     ** ARCHIVE DAY   
2016-09-28 00:20:28  INFO                                     ** ARCHIVE DAY   
2016-09-29 00:29:26  INFO                                     ** ARCHIVE DAY   
2016-09-30 00:38:27  INFO                                     ** ARCHIVE DAY   
2016-10-01 00:47:40  INFO  ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...   
2016-10-01 01:48:14  INFO  ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...   
2016-10-01 02:48:50  INFO  ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...   
2016-10-01 03:49:28  INFO  ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...   
2016-10-01 04:50:03  INFO  ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...   
2016-10-01 05:50:38  INFO  ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...   
2016-10-01 06:51:13  INFO  ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...   
2016-10-01 07:51:51  INFO  ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...   
2016-10-01 08:52:24  INFO  ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...   
2016-10-01 09:52:57  INFO  ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...   
2016-10-01 10:53:31  INFO  ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...   

                     exec  
ts                         
2016-09-02 14:01:37     0  
2016-09-02 14:01:37     0  
2016-09-02 14:01:54     0  
2016-09-02 14:01:54     0  
2016-09-02 14:01:54     0  
2016-09-02 14:01:54     0  
2016-09-02 14:01:54     0  
2016-09-02 14:01:54     1  
2016-09-02 15:02:24     1  
2016-09-03 00:06:37     1  
2016-09-04 00:18:06     1  
2016-09-04 21:18:20     1  
2016-09-04 21:18:20     1  
2016-09-04 21:20:30     1  
2016-09-04 21:20:30     1  
2016-09-04 21:20:30     1  
2016-09-04 21:20:30     1  
2016-09-04 21:20:30     1  
2016-09-04 21:20:30     2  
2016-09-05 00:21:57     2  
2016-09-06 00:33:40     2  
2016-09-06 18:04:39     2  
2016-09-06 18:04:39     2  
2016-09-06 18:06:07     2  
2016-09-06 18:07:57     2  
2016-09-06 18:07:57     2  
2016-09-06 18:07:57     2  
2016-09-06 18:07:57     2  
2016-09-06 18:07:57     2  
2016-09-06 18:07:57     3  
...                   ...  
2016-09-12 00:57:09     3  
2016-09-13 00:05:51     3  
2016-09-14 00:14:57     3  
2016-09-15 00:23:55     3  
2016-09-16 00:33:02     3  
2016-09-17 00:41:59     3  
2016-09-18 00:51:03     3  
2016-09-19 01:00:08     3  
2016-09-20 00:08:47     3  
2016-09-21 00:17:52     3  
2016-09-22 00:26:53     3  
2016-09-23 00:35:51     3  
2016-09-24 00:44:51     3  
2016-09-25 00:53:46     3  
2016-09-26 00:02:25     3  
2016-09-27 00:11:28     3  
2016-09-28 00:20:28     3  
2016-09-29 00:29:26     3  
2016-09-30 00:38:27     3  
2016-10-01 00:47:40     3  
2016-10-01 01:48:14     3  
2016-10-01 02:48:50     3  
2016-10-01 03:49:28     3  
2016-10-01 04:50:03     3  
2016-10-01 05:50:38     3  
2016-10-01 06:51:13     3  
2016-10-01 07:51:51     3  
2016-10-01 08:52:24     3  
2016-10-01 09:52:57     3  
2016-10-01 10:53:31     3  

[65 rows x 3 columns]

In [6]:
log_last = log.loc['2016-10-01'].dropna(how='all', axis=1).drop(['temp', 'exec'], axis=1)
log_last = log_last[~log_last.msg.str.startswith('Tªs -->')]
log_last.tail(10)


Out[6]:
tipo msg
ts
2016-10-01 00:47:30 DEBUG Size Store: 782.2 KB, 3600 rows
2016-10-01 00:47:30 DEBUG ARCHIVE NEW RAW DATA: (3600, 4)
2016-10-01 00:47:40 DEBUG Current month data stats:
2016-10-01 00:47:40 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...
2016-10-01 01:48:04 DEBUG Size Store: 1561.7 KB, 7200 rows
2016-10-01 01:48:04 DEBUG ARCHIVE NEW RAW DATA: (7200, 4)
2016-10-01 01:48:14 DEBUG Current month data stats:
2016-10-01 01:48:14 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...
2016-10-01 02:48:39 DEBUG Size Store: 2345.6 KB, 10800 rows
2016-10-01 02:48:39 DEBUG ARCHIVE NEW RAW DATA: (10800, 4)
2016-10-01 02:48:50 DEBUG Current month data stats:
2016-10-01 02:48:50 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...
2016-10-01 03:49:17 DEBUG Size Store: 3169.9 KB, 14400 rows
2016-10-01 03:49:18 DEBUG ARCHIVE NEW RAW DATA: (14400, 4)
2016-10-01 03:49:28 DEBUG Current month data stats:
2016-10-01 03:49:28 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...
2016-10-01 04:49:52 DEBUG Size Store: 3969.6 KB, 18000 rows
2016-10-01 04:49:52 DEBUG ARCHIVE NEW RAW DATA: (18000, 4)
2016-10-01 04:50:03 DEBUG Current month data stats:
2016-10-01 04:50:03 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...
2016-10-01 05:50:27 DEBUG Size Store: 4733.1 KB, 21600 rows
2016-10-01 05:50:27 DEBUG ARCHIVE NEW RAW DATA: (21600, 4)
2016-10-01 05:50:38 DEBUG Current month data stats:
2016-10-01 05:50:38 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...
2016-10-01 06:51:02 DEBUG Size Store: 5481.5 KB, 25200 rows
2016-10-01 06:51:02 DEBUG ARCHIVE NEW RAW DATA: (25200, 4)
2016-10-01 06:51:13 DEBUG Current month data stats:
2016-10-01 06:51:13 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...
2016-10-01 07:51:40 DEBUG Size Store: 6256.9 KB, 28800 rows
2016-10-01 07:51:40 DEBUG ARCHIVE NEW RAW DATA: (28800, 4)
2016-10-01 07:51:51 DEBUG Current month data stats:
2016-10-01 07:51:51 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...
2016-10-01 08:52:13 DEBUG Size Store: 7067.5 KB, 32400 rows
2016-10-01 08:52:13 DEBUG ARCHIVE NEW RAW DATA: (32400, 4)
2016-10-01 08:52:24 DEBUG Current month data stats:
2016-10-01 08:52:24 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...
2016-10-01 09:52:46 DEBUG Size Store: 7872.8 KB, 36000 rows
2016-10-01 09:52:46 DEBUG ARCHIVE NEW RAW DATA: (36000, 4)
2016-10-01 09:52:57 DEBUG Current month data stats:
2016-10-01 09:52:57 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...
2016-10-01 10:53:20 DEBUG Size Store: 8617.9 KB, 39600 rows
2016-10-01 10:53:20 DEBUG ARCHIVE NEW RAW DATA: (39600, 4)
2016-10-01 10:53:31 DEBUG Current month data stats:
2016-10-01 10:53:31 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09...

In [10]:
log_last[log_last.tipo == 'INFO'].msg.tolist()[0]


Out[10]:
"** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09_DAY_26.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_12.h5', 'CURRENT_MONTH/TODAY.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_16.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_06.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_07.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_17.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_04.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_18.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_25.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_21.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_29.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_24.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_20.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_01.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_02.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_19.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_05.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_03.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_11.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_14.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_13.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_15.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_10.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_09.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_28.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_27.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_23.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_22.h5', 'CURRENT_MONTH/DATA_2016_09_DAY_08.h5']"

In [16]:
log.loc['2016-10-01 1:48':'2016-10-01 3:55']


Out[16]:
tipo msg temp debug_send no_red exec
ts
2016-10-01 01:48:00 DEBUG Tªs --> 62.8 / 62.8 ºC True NaN NaN 3
2016-10-01 01:48:03 DEBUG Tªs --> 63.4 / 63.4 ºC True NaN NaN 3
2016-10-01 01:48:04 DEBUG Size Store: 1561.7 KB, 7200 rows NaN NaN NaN 3
2016-10-01 01:48:04 DEBUG ARCHIVE NEW RAW DATA: (7200, 4) NaN NaN NaN 3
2016-10-01 01:48:06 DEBUG Tªs --> 64.5 / 64.5 ºC True NaN NaN 3
2016-10-01 01:48:09 DEBUG Tªs --> 66.6 / 66.1 ºC True NaN NaN 3
2016-10-01 01:48:12 DEBUG Tªs --> 66.6 / 66.6 ºC True NaN NaN 3
2016-10-01 01:48:14 DEBUG Current month data stats: NaN NaN NaN 3
2016-10-01 01:48:14 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09... NaN NaN NaN 3
2016-10-01 01:48:15 DEBUG Tªs --> 67.1 / 67.1 ºC True NaN NaN 3
2016-10-01 01:48:18 DEBUG Tªs --> 67.1 / 67.7 ºC True NaN NaN 3
2016-10-01 02:48:39 DEBUG Size Store: 2345.6 KB, 10800 rows NaN NaN NaN 3
2016-10-01 02:48:39 DEBUG ARCHIVE NEW RAW DATA: (10800, 4) NaN NaN NaN 3
2016-10-01 02:48:50 DEBUG Current month data stats: NaN NaN NaN 3
2016-10-01 02:48:50 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09... NaN NaN NaN 3
2016-10-01 03:49:17 DEBUG Size Store: 3169.9 KB, 14400 rows NaN NaN NaN 3
2016-10-01 03:49:18 DEBUG ARCHIVE NEW RAW DATA: (14400, 4) NaN NaN NaN 3
2016-10-01 03:49:28 DEBUG Current month data stats: NaN NaN NaN 3
2016-10-01 03:49:28 INFO ** ARCHIVE MONTH: ['CURRENT_MONTH/DATA_2016_09... NaN NaN NaN 3

In [27]:
log[(log.tipo == 'DEBUG') & log.msg.str.startswith('TIMEIT')].tail(7)


Out[27]:
tipo msg temp debug_send no_red exec
ts
2016-09-30 22:46:46 DEBUG TIMEIT _make_index TOOK: 0.710 s NaN NaN NaN 3
2016-09-30 22:46:46 DEBUG TIMEIT archive_periodic TOOK: 1.540 s NaN NaN NaN 3
2016-09-30 22:46:46 DEBUG TIMEIT update_catalog TOOK: 1.547 s NaN NaN NaN 3
2016-09-30 23:47:08 DEBUG TIMEIT _distribute_data TOOK: 0.670 s NaN NaN NaN 3
2016-09-30 23:47:09 DEBUG TIMEIT _make_index TOOK: 0.728 s NaN NaN NaN 3
2016-09-30 23:47:09 DEBUG TIMEIT archive_periodic TOOK: 1.584 s NaN NaN NaN 3
2016-09-30 23:47:09 DEBUG TIMEIT update_catalog TOOK: 1.591 s NaN NaN NaN 3

In [3]:
base_path = '/Users/uge/Desktop/bkp_enerpidata/'
key_raw = 'rms'
CURRENT_MONTH = 'CURRENT_MONTH'
STORE_EXT = '.h5'


@timeit('_load_current_month', verbose=True)
def _load_current_month():
    days_cm = sorted([p.replace(base_path + os.path.sep, '')
                      for p in glob.glob(os.path.join(base_path, CURRENT_MONTH, '*{}'.format(STORE_EXT)))])
    df = pd.DataFrame(pd.concat([pd.read_hdf(p, key=key_raw) for p in days_cm], axis=0)) #.sort_index()
    #dfs = [pd.read_hdf(p, key=key_raw) for p in days_cm]
    #df = pd.read_hdf(days_cm[0], key=key_raw)
    #for p in days_cm[1:]:
        #df = df.append(pd.read_hdf(p, key=key_raw))
    #    df = pd.concat([df, pd.read_hdf(p, key=key_raw)], axis=0)
    print_red('Current month data stats: {} rows, from {:%c} to {:%c}, index: unique={}, monotonic={}'
              .format(df.shape[0], df.index[0], df.index[-1], df.index.is_unique, df.index.is_monotonic_increasing))
    return df, days_cm


%memit df_month, days_month = _load_current_month()
df_month.info()


Current month data stats: 2566399 rows, from Thu Sep  1 00:00:00 2016 to Fri Sep 30 23:47:07 2016, index: unique=True, monotonic=True
_load_current_month TOOK: 0.889 s
peak memory: 333.04 MiB, increment: 221.25 MiB
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2566399 entries, 2016-09-01 00:00:00.977816 to 2016-09-30 23:47:07.818927
Data columns (total 6 columns):
power         float32
noise         float32
ref           int16
ldr           int16
high_delta    bool
execution     bool
dtypes: bool(2), float32(2), int16(2)
memory usage: 53.8 MB

In [7]:
df_month.drop(['high_delta', 'execution'], axis=1).info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2566399 entries, 2016-09-01 00:00:00.977816 to 2016-09-30 23:47:07.818927
Data columns (total 4 columns):
power    float32
noise    float32
ref      int16
ldr      int16
dtypes: float32(2), int16(2)
memory usage: 49.0 MB

In [4]:
raw.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 46020 entries, 2016-09-30 23:47:08.822574 to 2016-10-01 12:41:15.155833
Data columns (total 4 columns):
power    46020 non-null float64
noise    46020 non-null float64
ref      46020 non-null float64
ldr      46020 non-null float64
dtypes: float64(4)
memory usage: 1.8 MB

In [ ]:


In [5]:
def _compress_data(data, verbose=False):
    if data is not None:
        if verbose:
            data.info()
        if not data.empty:
            data = data.copy().astype('float32')
            data['ref'] = data['ref'].astype('int16')
            # data['ldr'] *= 1000.
            data['ldr'] = pd.Series(1000. * data['ldr']).round(0).astype('int16')
            if verbose:
                data.info()
    return data


%memit new_data = process_data(_compress_data(raw))
new_data.info()


peak memory: 237.83 MiB, increment: -5.59 MiB
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 46020 entries, 2016-09-30 23:47:08.822574 to 2016-10-01 12:41:15.155833
Data columns (total 6 columns):
power         46020 non-null float32
noise         46020 non-null float32
ref           46020 non-null int16
ldr           46020 non-null int16
high_delta    46020 non-null bool
execution     46020 non-null bool
dtypes: bool(2), float32(2), int16(2)
memory usage: 988.7 KB

In [ ]:


In [48]:
@timeit('_archive_month', verbose=True)
def _archive_month(new_data):
    month, _old_stores = _load_current_month()
    print_info('MONTH DATA: {}'.format(month.shape))
    new_data = pd.DataFrame(pd.concat([month, new_data], axis=0)).sort_index().groupby(level=0).first()
    print_ok('NEW FUSION DATA: {} --> all_dates={}, unique={}, monotonic_incr={}'
             .format(new_data.shape, new_data.index.is_all_dates, new_data.index.is_unique, new_data.index.is_monotonic_increasing))
    return new_data


%memit all_data = _archive_month(new_data)
all_data.info()


Current month data stats: 2566399 rows, from Thu Sep  1 00:00:00 2016 to Fri Sep 30 23:47:07 2016, index: unique=True, monotonic=True
_load_current_month TOOK: 0.883 s
MONTH DATA: (2566399, 6)
NEW FUSION DATA: (2612419, 6) --> all_dates=True, unique=True, monotonic_incr=True
_archive_month TOOK: 2.342 s
peak memory: 1616.98 MiB, increment: 754.07 MiB
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2612419 entries, 2016-09-01 00:00:00.977816 to 2016-10-01 12:41:15.155833
Data columns (total 6 columns):
power         float32
noise         float32
ref           int16
ldr           int16
high_delta    bool
execution     bool
dtypes: bool(2), float32(2), int16(2)
memory usage: 54.8 MB

In [6]:
@timeit('_archive_month2', verbose=True)
def _archive_month2(new_data):
    month, _old_stores = _load_current_month()
    print_info('MONTH DATA: {}'.format(month.shape))
    #new_data = month.append(new_data)
    #new_data = pd.DataFrame(pd.concat([month, new_data], axis=0)).sort_index().groupby(level=0).first()
    new_data = pd.DataFrame(pd.concat([month, new_data], axis=0))
    print_ok('NEW FUSION DATA: {} --> all_dates={}, unique={}, monotonic_incr={}'
             .format(new_data.shape, new_data.index.is_all_dates, new_data.index.is_unique, new_data.index.is_monotonic_increasing))
    return new_data


%memit all_data = _archive_month2(new_data)
all_data.info()


Current month data stats: 2566399 rows, from Thu Sep  1 00:00:00 2016 to Fri Sep 30 23:47:07 2016, index: unique=True, monotonic=True
_load_current_month TOOK: 0.881 s
MONTH DATA: (2566399, 6)
NEW FUSION DATA: (2612419, 6) --> all_dates=True, unique=True, monotonic_incr=True
_archive_month2 TOOK: 1.038 s
peak memory: 573.82 MiB, increment: 337.59 MiB
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2612419 entries, 2016-09-01 00:00:00.977816 to 2016-10-01 12:41:15.155833
Data columns (total 6 columns):
power         float32
noise         float32
ref           int16
ldr           int16
high_delta    bool
execution     bool
dtypes: bool(2), float32(2), int16(2)
memory usage: 54.8 MB

In [27]:
%memit df_month.append(new_data)


peak memory: 451.83 MiB, increment: 33.58 MiB

In [21]:
DELTA_MAX_CALC_CONSUMO_SAMPLE_BFILL = 120

def _process_data(data, append_consumo=False):
    consumo = None
    if data is not None and not data.empty and (append_consumo or ('high_delta' not in data.columns)):
        data = data.copy()
        data['delta'] = pd.Series(data.index).diff().fillna(method='bfill').dt.total_seconds().values
        data['high_delta'] = False
        data['execution'] = False
        data.loc[data['delta'] > 3, 'high_delta'] = True
        data.loc[data['delta'] > 60, 'execution'] = 1
        data['delta_consumo'] = data['delta'].apply(lambda x: min(x, DELTA_MAX_CALC_CONSUMO_SAMPLE_BFILL))
        data['Wh'] = data.power * data.delta_consumo / 3600
        if append_consumo:
            resampler = data[['power', 'Wh', 'delta_consumo', 'high_delta', 'execution']].resample('1h', label='left')
            consumo = pd.DataFrame(resampler['Wh'].sum().rename('kWh')).fillna(0.).astype('float32')
            consumo /= 1000.
            consumo['t_ref'] = pd.Series(resampler['delta_consumo'].sum() / 3600).astype('float32')
            consumo['n_jump'] = resampler['high_delta'].sum().fillna(0).astype('int16')
            consumo['n_exec'] = resampler['execution'].sum().fillna(0).astype('int32')
            consumo['p_max'] = resampler['power'].max().round(0).astype('float16')
            consumo['p_mean'] = resampler['power'].mean().round(0).astype('float16')
            consumo['p_min'] = resampler['power'].min().round(0).astype('float16')
        data['high_delta'] = data['high_delta'].astype(bool)
        data['execution'] = data['execution'].astype(bool)
        data.drop(['delta', 'delta_consumo', 'Wh'], axis=1, inplace=True)
        if append_consumo:
            return data, consumo
        return data
    elif append_consumo:
        return data, None
    return data


def _process_data_consumo(data):
    consumo = None
    if data is not None and not data.empty:
        delta = pd.DataFrame(pd.Series(data.index, index=data.index, name='delta_consumo'
                                      ).diff().fillna(method='bfill').dt.total_seconds().apply(
                lambda x: min(x, DELTA_MAX_CALC_CONSUMO_SAMPLE_BFILL)))
        delta['Wh'] = data.power * delta.delta_consumo / 3600
        
        resampler_data = data[['power', 'high_delta', 'execution']].resample('1h', label='left')
        resampler_delta = delta[['Wh', 'delta_consumo']].resample('1h', label='left')
        
        consumo = pd.DataFrame(resampler_delta['Wh'].sum().rename('kWh')).fillna(0.).astype('float32')
        consumo /= 1000.
        consumo['t_ref'] = pd.Series(resampler_delta['delta_consumo'].sum() / 3600).astype('float32')
        consumo['n_jump'] = resampler_data['high_delta'].sum().fillna(0).astype('int16')
        consumo['n_exec'] = resampler_data['execution'].sum().fillna(0).astype('int32')
        consumo['p_max'] = resampler_data['power'].max().round(0).astype('float16')
        consumo['p_mean'] = resampler_data['power'].mean().round(0).astype('float16')
        consumo['p_min'] = resampler_data['power'].min().round(0).astype('float16')
        return consumo
    return None


%memit consumo = _process_data_consumo(all_data)
%memit all_data, consumo_1 = _process_data(all_data, append_consumo=True)


peak memory: 735.18 MiB, increment: 63.73 MiB
peak memory: 829.14 MiB, increment: 157.69 MiB

In [22]:
@timeit('archive_month_total', verbose=True)
def archive_month_total():
    raw = pd.read_hdf('enerpi_data.h5', 'rms')
    new_data = process_data(_compress_data(raw))
    new_data.info()
    all_data = _archive_month2(new_data)
    all_data.info()
    all_consumo = _process_data_consumo(all_data)
    return all_data, all_consumo


%memit d1, d2 = archive_month_total()
d2.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 46020 entries, 2016-09-30 23:47:08.822574 to 2016-10-01 12:41:15.155833
Data columns (total 6 columns):
power         46020 non-null float32
noise         46020 non-null float32
ref           46020 non-null int16
ldr           46020 non-null int16
high_delta    46020 non-null bool
execution     46020 non-null bool
dtypes: bool(2), float32(2), int16(2)
memory usage: 988.7 KB
Current month data stats: 2566399 rows, from Thu Sep  1 00:00:00 2016 to Fri Sep 30 23:47:07 2016, index: unique=True, monotonic=True
_load_current_month TOOK: 1.188 s
MONTH DATA: (2566399, 6)
NEW FUSION DATA: (2612419, 6) --> all_dates=True, unique=True, monotonic_incr=True
_archive_month2 TOOK: 1.363 s
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2612419 entries, 2016-09-01 00:00:00.977816 to 2016-10-01 12:41:15.155833
Data columns (total 6 columns):
power         float32
noise         float32
ref           int16
ldr           int16
high_delta    bool
execution     bool
dtypes: bool(2), float32(2), int16(2)
memory usage: 54.8 MB
archive_month_total TOOK: 3.350 s
peak memory: 741.16 MiB, increment: 523.66 MiB
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 733 entries, 2016-09-01 00:00:00 to 2016-10-01 12:00:00
Freq: H
Data columns (total 7 columns):
kWh       733 non-null float32
t_ref     732 non-null float32
n_jump    733 non-null int16
n_exec    733 non-null int32
p_max     732 non-null float16
p_mean    732 non-null float16
p_min     732 non-null float16
dtypes: float16(3), float32(2), int16(1), int32(1)
memory usage: 20.0 KB

In [ ]:


In [8]:
# pd.read_hdf('OLD_STORES/temp_debug_day_31.h5', 'rms').loc['2016-08-31']

'OLD_STORES/temp_debug_day_31.h5'
'OLD_STORES/temp_debug_month.h5'
#pd.read_hdf('OLD_STORES/temp_debug_month.h5', 'rms') #.loc['2016-08-31']


Out[8]:
'OLD_STORES/temp_debug_month.h5'

In [21]:
from enerpi.base import CONFIG
#from enerpi.catalog import EnerpiCatalog
#from enerpi.pisampler import COL_TS, COLS_DATA

# Config:
DATA_PATH = os.path.expanduser(CONFIG.get('ENERPI_DATA', 'DATA_PATH'))
HDF_STORE = os.path.join(DATA_PATH, CONFIG.get('ENERPI_DATA', 'HDF_STORE'))

HDF_STORE, DATA_PATH
#catalog = init_catalog(base_path=DATA_PATH, raw_file=path_st)


Out[21]:
('/Users/uge/ENERPIDATA/enerpi_data.h5', '/Users/uge/ENERPIDATA')

In [ ]:
## OSError: [Errno 12] Cannot allocate memory

# IN archive_periodic(self, new_data=None, reload_index=False):

# month, old_stores = self._load_current_month(with_summary_data=False)
# new_data = pd.DataFrame(pd.concat([month, new_data], axis=0)).sort_index().groupby(level=0).first()

In [ ]:


In [ ]:


In [ ]:


In [ ]: