In [183]:
import json


SENSORES = ['DHT22', 'DHT11']
PIN_DATA_DHT11 = 17
# TODO Leer de disco o identificar configuración:
PIN_DATA_DHT22 = 22

pinout = dict(zip(SENSORES, [22, 17]))
pinout


Out[183]:
{'DHT11': 17, 'DHT22': 22}

In [2]:
json.dumps(pinout)


Out[2]:
'{"DHT11": 17, "DHT22": 22}'

In [184]:
import os
import pandas as pd
import glob


os.chdir('/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/')
glob.glob('*')


Out[184]:
['CURRENT_MONTH',
 'D1_potencia_consumo_ldr_rm60.png',
 'D1_potencia_consumo_ldr_rs10s.pdf',
 'D1_potencia_consumo_ldr_rs1m.png',
 'D2_potencia_consumo_ldr_rm60.png',
 'D3_potencia_consumo_ldr.png',
 'data_2016_08.h5_bad',
 'data_juan_iborra_3d.h5',
 'data_juan_iborra_3d_bzip2.h5',
 'data_juan_iborra_3d_default.h5',
 'data_juan_iborra_3d_default_big.h5',
 'data_juan_iborra_3d_zlib.h5',
 'debug_rpitemps_.csv',
 'enerpi.log',
 'enerpi_2.log',
 'enerpi_data.h5',
 'enerpi_log_D3.log',
 'enerpi_other.log',
 'enerpi_potencia_consumo_ldr_20160802_2312_20160803_1925.png',
 'enerpi_potencia_consumo_ldr_20160803_2032_20160804_2359.pdf',
 'enerpi_potencia_consumo_ldr_20160804_2032_20160805_1323.png',
 'juan_iborra_1.h5',
 'juan_iborra_2.h5',
 'juan_iborra_3.h5',
 'log_debug.csv',
 'new.h5',
 'YEAR_2016']

In [7]:
pd.read_hdf('new.h5', 'rms', stop=1000)


---------------------------------------------------------------------------
HDF5ExtError                              Traceback (most recent call last)
<ipython-input-7-7c4e05a0a1b0> in <module>()
----> 1 pd.read_hdf('new.h5', 'rms', stop=1000)

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/pandas/io/pytables.py in read_hdf(path_or_buf, key, **kwargs)
    328                                  'multiple datasets.')
    329             key = keys[0]
--> 330         return store.select(key, auto_close=auto_close, **kwargs)
    331     except:
    332         # if there is an error, close the store

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/pandas/io/pytables.py in select(self, key, where, start, stop, columns, iterator, chunksize, auto_close, **kwargs)
    678                            chunksize=chunksize, auto_close=auto_close)
    679 
--> 680         return it.get_result()
    681 
    682     def select_as_coordinates(

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/pandas/io/pytables.py in get_result(self, coordinates)
   1363         # directly return the result
   1364         results = self.func(self.start, self.stop, where)
-> 1365         self.close()
   1366         return results
   1367 

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/pandas/io/pytables.py in close(self)
   1341     def close(self):
   1342         if self.auto_close:
-> 1343             self.store.close()
   1344 
   1345     def get_result(self, coordinates=False):

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/pandas/io/pytables.py in close(self)
    581         """
    582         if self._handle is not None:
--> 583             self._handle.close()
    584         self._handle = None
    585 

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/tables/file.py in close(self)
   2723 
   2724         # Close all loaded nodes.
-> 2725         self.root._f_close()
   2726 
   2727         self._node_manager.shutdown()

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/tables/group.py in _f_close(self)
    937         # this is not an explicit close issued by the user.
    938         if not (self._v__deleting or self._v_objectid is None):
--> 939             self._g_close_descendents()
    940 
    941         # When all the descendents have been closed, close this group.

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/tables/group.py in _g_close_descendents(self)
    900 
    901         node_manager = self._v_file._node_manager
--> 902         node_manager.close_subtree(self._v_pathname)
    903 
    904     _g_closeDescendents = previous_api(_g_close_descendents)

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/tables/file.py in close_subtree(self, prefix)
    540             if path.startswith(prefix) and '/_i_' not in path
    541         ]
--> 542         self._close_nodes(paths, cache.pop)
    543 
    544         # Close everything else (i.e. indices)

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/tables/file.py in _close_nodes(nodepaths, get_node)
    515                         node._g_close()
    516                     else:
--> 517                         node._f_close()
    518                     del node
    519                 except ClosedNodeError:

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/tables/table.py in _f_close(self, flush)
   3074         # Flush right now so the row object does not get in the middle.
   3075         if flush:
-> 3076             self.flush()
   3077 
   3078         # Some warnings can be issued after calling `self._g_set_location()`

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/tables/table.py in flush(self)
   3009         if self.indexed and self.autoindex:
   3010             # Flush any unindexed row
-> 3011             rowsadded = self.flush_rows_to_index(_lastrow=True)
   3012             assert rowsadded <= 0 or self._indexedrows == self.nrows, \
   3013                 ("internal error: the number of indexed rows (%d) "

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/tables/table.py in flush_rows_to_index(self, _lastrow)
   2615                     if nrows > 0 and not col.index.dirty:
   2616                         rowsadded = self._add_rows_to_index(
-> 2617                             colname, start, nrows, _lastrow, update=True)
   2618             self._unsaved_indexedrows -= rowsadded
   2619             self._indexedrows += rowsadded

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/tables/table.py in _add_rows_to_index(self, colname, start, nrows, lastrow, update)
   2646         if lastrow and startLR < self.nrows:
   2647             index.append_last_row(
-> 2648                 [self._read(startLR, self.nrows, 1, colname)],
   2649                 update=update)
   2650             indexedrows += self.nrows - startLR

/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/tables/table.py in _read(self, start, stop, step, field, out)
   1928             self._read_field_name(result, start, stop, step, field)
   1929         else:
-> 1930             self.row._fill_col(result, start, stop, step, field)
   1931 
   1932         if select_field:

tables/tableextension.pyx in tables.tableextension.Row._fill_col (tables/tableextension.c:14485)()

tables/tableextension.pyx in tables.tableextension.Table._read_records (tables/tableextension.c:7491)()

HDF5ExtError: HDF5 error back trace

  File "H5Dio.c", line 173, in H5Dread
    can't read data
  File "H5Dio.c", line 551, in H5D__read
    can't read data
  File "H5Dchunk.c", line 1875, in H5D__chunk_read
    unable to read raw data chunk
  File "H5Dchunk.c", line 2905, in H5D__chunk_lock
    data pipeline read failed
  File "H5Z.c", line 1372, in H5Z_pipeline
    filter returned failure during read
  File "H5Zdeflate.c", line 125, in H5Z_filter_deflate
    inflate() failed

End of HDF5 error back trace

Problems reading records.

In [24]:
glob.glob('*/*.h5', recursive=True), glob.glob('*.h5', recursive=True)


Out[24]:
(['CURRENT_MONTH/data_20160808_1.h5', 'CURRENT_MONTH/data_20160808_2.h5'],
 ['data_2016_08.h5',
  'enerpi_data.h5',
  'juan_iborra_1.h5',
  'juan_iborra_2.h5',
  'juan_iborra_3.h5',
  'new.h5'])

In [36]:
import re

YEAR_MASK = '{}_YEAR_{:%Y}'
RG_YEAR_MASK = re.compile('(?P<name>\.*)_YEAR_(?P<year>\d{4})')
MONTH_MASK = '{}_{:%Y_MONTH_%m}'
RG_MONTH_MASK = re.compile('(?P<name>\.*)_(?P<year>\d{4})_MONTH_(?P<month>\d{2})')
DAY_MASK = '{}_{:%Y_%m_DAY_%d}'
RG_DAY_MASK = re.compile('(?P<name>\.*)_(?P<year>\d{4})_(?P<month>\d{2})_DAY_(?P<day>\d{2})')

files = [f for f in glob.glob(os.path.join(os.getcwd(), '**'), recursive=True) if f.endswith('.h5')]


Out[36]:
['/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/CURRENT_MONTH/data_20160808_1.h5',
 '/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/CURRENT_MONTH/data_20160808_2.h5',
 '/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/data_2016_08.h5',
 '/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/enerpi_data.h5',
 '/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/juan_iborra_1.h5',
 '/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/juan_iborra_2.h5',
 '/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/juan_iborra_3.h5',
 '/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/new.h5']

In [38]:
class HDFCatalog(object):

    def __init__(self, base_path=DATA_PATH, preffix='DATA'):
        self.base_path = os.path.abspath(base_path)
        self.name = preffix
        # self.intervalos = None
        self.min_ts = None
        self.tree = self._init_catalog_tree()

    def __repr__(self):
        print('HDFCatalog[{}->{}]'.format(self.name, self.base_path))

    def _get_paths_interval(self, ts_ini, ts_fin=None):
        # TODO glob del árbol desde DATA_PATH identificando archivos y tiempos.
        files = [f.replace(self.base_path, '') for f in glob.glob(os.path.join(self.base_path, '**'), recursive=True)
                 if f.endswith(STORE_EXT)]
        print(files)
        return files

    def _init_catalog_tree(self):
        # TODO glob del árbol desde DATA_PATH identificando archivos y tiempos.
        files = [f.replace(self.base_path, '') for f in glob.glob(os.path.join(self.base_path, '**'), recursive=True)
                 if f.endswith(STORE_EXT)]
        print(files)
        return files

    def distribute_data(self, data):
        # TODO separa en meses completos + días + resto. Crea directorios
        pass

    def info_catalog(self):
        # TODO Tabla de información del catálogo: ruta, archivo, n_rows, ts_ini, ts_fin, medidas de completitud
        pass

    def get(self):
        # TODO Load concat data de dt_ini, dt_fin. Con parser de textos? Devuelve data
        pass

    def append(self):
        # TODO Append new data
        pass

    def put(self):
        # TODO Put new data
        pass

    def map(self):
        # TODO aplicar función a datos por store?
        pass

    def archive(self):
        # TODO Compactado de datos MENSUAL / DIARIO / HORARIO?
        # De now a Today por horas
        # De Today a DAY_XX
        # De All DAY_XX a MONTH_XX
        pass

    def backup(self, path_backup, compact_data=None):
        # TODO backup a ruta alternativa, con compresión y opción de replicado de tree o compactado (x años, o total)
        pass

    def export(self, export_to='csv'):
        # TODO Exportación a CSV / append to mysql?
        pass



def _extract_info_store(file):
    with pd.HDFStore(os.path.join(base_path, file), 'r')

def _get_paths_interval(self, ts_ini, ts_fin=None):
    # TODO glob del árbol desde DATA_PATH identificando archivos y tiempos.
    files = [f.replace(self.base_path, '') for f in glob.glob(os.path.join(self.base_path, '**'), recursive=True)
             if f.endswith(STORE_EXT)]
    print(files)


    return files


Out[38]:
['/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/data_2016_08.h5',
 '/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/enerpi_data.h5',
 '/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/juan_iborra_1.h5',
 '/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/juan_iborra_2.h5',
 '/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/juan_iborra_3.h5',
 '/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/new.h5']

In [41]:
dfs = [pd.read_hdf(p, 'rms') for p in paths[1:]]
dfs[0].head()


Out[41]:
power noise ref ldr
ts
2016-08-02 23:12:46.305646 592.416811 0.004734 84.0 0.186719
2016-08-02 23:12:47.316356 599.286578 0.004795 84.0 0.184285
2016-08-02 23:12:48.327245 601.662106 0.004747 84.0 0.183630
2016-08-02 23:12:49.337898 598.317571 0.004795 84.0 0.174957
2016-08-02 23:12:50.343299 595.473281 0.004817 82.0 0.168752

In [46]:
df_raw = pd.read_hdf(paths[0], 'raw')
print(df_raw.head())

df_raw.columns = dfs[0].columns
df_raw.index


                                   c1        c2     c3        c4
ts                                                              
2016-08-01 19:51:56.200602  42.551210  0.003470  120.0  0.844840
2016-08-01 19:51:57.209504  45.622881  0.003169  113.0  0.844919
2016-08-01 19:51:58.211276  46.696186  0.003200  113.0  0.845863
2016-08-01 19:51:59.219036  47.338830  0.003334  117.0  0.846282
2016-08-01 19:52:00.224722  47.181710  0.003260  121.0  0.845733
Out[46]:
DatetimeIndex(['2016-08-01 19:51:56.200602', '2016-08-01 19:51:57.209504',
               '2016-08-01 19:51:58.211276', '2016-08-01 19:51:59.219036',
               '2016-08-01 19:52:00.224722', '2016-08-01 19:52:01.229116',
               '2016-08-01 19:52:02.235039', '2016-08-01 19:52:03.239766',
               '2016-08-01 19:52:04.245154', '2016-08-01 19:52:05.245117',
               ...
               '2016-08-02 20:06:51.132818', '2016-08-02 20:06:52.135769',
               '2016-08-02 20:06:53.137310', '2016-08-02 20:06:54.148574',
               '2016-08-02 20:06:55.160009', '2016-08-02 20:06:56.171088',
               '2016-08-02 20:06:57.179790', '2016-08-02 20:06:58.179914',
               '2016-08-02 20:06:59.180827', '2016-08-02 20:07:00.191887'],
              dtype='datetime64[ns]', name='ts', length=84510, freq=None)

In [45]:
%matplotlib inline
df_raw.plot()


Out[45]:
<matplotlib.axes._subplots.AxesSubplot at 0x10c90d0f0>

In [133]:
from prettyprinting import *

#from enerpi.database import append_delta_y_consumo
def _compress_data(data, verbose=False):
    if verbose:
        data.info()
    if not data.empty:
        data = data.copy().astype('float32')
        data['ref'] = data['ref'].astype('int16')
        data['ldr'] = (data['ldr'] * 1000).round(0).astype('int16')
        if verbose:
            data.info()
        return data
    return data


#df1, con1 = process_data(df_raw)
#con1
_compress_data(df_raw, verbose=True).head()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 84510 entries, 2016-08-01 19:51:56.200602 to 2016-08-02 20:07:00.191887
Data columns (total 4 columns):
power    84510 non-null float64
noise    84510 non-null float64
ref      84510 non-null int64
ldr      84510 non-null float64
dtypes: float64(3), int64(1)
memory usage: 3.2 MB
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 84510 entries, 2016-08-01 19:51:56.200602 to 2016-08-02 20:07:00.191887
Data columns (total 4 columns):
power    84510 non-null float32
noise    84510 non-null float32
ref      84510 non-null int16
ldr      84510 non-null int16
dtypes: float32(2), int16(2)
memory usage: 1.6 MB
Out[133]:
power noise ref ldr
ts
2016-08-01 19:51:56.200602 42.551208 0.003470 120 845
2016-08-01 19:51:57.209504 45.622879 0.003169 113 845
2016-08-01 19:51:58.211276 46.696186 0.003200 113 846
2016-08-01 19:51:59.219036 47.338829 0.003334 117 846
2016-08-01 19:52:00.224722 47.181709 0.003260 121 846

In [130]:
#print_info(df1.dtypes)
#df1.info()
df_tot = pd.concat([df_raw] + dfs)
df_tot_c = _compress_data(df_tot, verbose=True)


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 301560 entries, 2016-08-01 19:51:56.200602 to 2016-08-05 13:23:11.682163
Data columns (total 4 columns):
power    301560 non-null float64
noise    301560 non-null float64
ref      301560 non-null float64
ldr      301560 non-null float64
dtypes: float64(4)
memory usage: 11.5 MB
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 301560 entries, 2016-08-01 19:51:56.200602 to 2016-08-05 13:23:11.682163
Data columns (total 4 columns):
power    301560 non-null float64
noise    301560 non-null float64
ref      301560 non-null float64
ldr      301560 non-null float64
dtypes: float64(4)
memory usage: 11.5 MB
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 301560 entries, 2016-08-01 19:51:56.200602 to 2016-08-05 13:23:11.682163
Data columns (total 4 columns):
power    301560 non-null float32
noise    301560 non-null float32
ref      301560 non-null int16
ldr      301560 non-null int16
dtypes: float32(2), int16(2)
memory usage: 5.8 MB

In [185]:
from enerpi.base import timeit

use_fletcher32 = True

@timeit('_save_hdf')
def _save_hdf(data, path, **kwargs):
    data.to_hdf(path, 'rms', **kwargs)
    print_cyan('STORE "{}"\t->\t{:.1f} KB'.format(path, os.path.getsize(path) / 1000))

_save_hdf(df_tot, 'data_juan_iborra_3d_default_big.h5')
_save_hdf(df_tot_c, 'data_juan_iborra_3d_default.h5')
_save_hdf(df_tot_c, 'data_juan_iborra_3d_zlib.h5', complevel=9, complib='zlib', fletcher32=use_fletcher32)
_save_hdf(df_tot_c, 'data_juan_iborra_3d_bzip2.h5', complevel=9, complib='bzip2', fletcher32=use_fletcher32)
_save_hdf(df_tot_c, 'data_juan_iborra_3d.h5', complevel=9, complib='blosc', fletcher32=use_fletcher32)


STORE "data_juan_iborra_3d_default_big.h5"	->	12069.6 KB
_save_hdf TOOK: 0.034 s
STORE "data_juan_iborra_3d_default.h5"	->	6040.6 KB
_save_hdf TOOK: 0.027 s
STORE "data_juan_iborra_3d_zlib.h5"	->	2844.0 KB
_save_hdf TOOK: 0.244 s
STORE "data_juan_iborra_3d_bzip2.h5"	->	2879.3 KB
_save_hdf TOOK: 1.046 s
STORE "data_juan_iborra_3d.h5"	->	3253.0 KB
_save_hdf TOOK: 0.067 s

In [186]:
@timeit('_load_hdf')
def _load_hdf(path, **kwargs):
    data = pd.read_hdf(path, 'rms', **kwargs)
    print_cyan('STORE "{}"\t->\t{:.1f} KB'.format(path, os.path.getsize(path) / 1000))
    return data

df = _load_hdf('data_juan_iborra_3d_default_big.h5')
df = _load_hdf('data_juan_iborra_3d_default.h5')
df = _load_hdf('data_juan_iborra_3d_zlib.h5')
df = _load_hdf('data_juan_iborra_3d_bzip2.h5')
df = _load_hdf('data_juan_iborra_3d.h5')


STORE "data_juan_iborra_3d_default_big.h5"	->	12069.6 KB
_load_hdf TOOK: 0.009 s
STORE "data_juan_iborra_3d_default.h5"	->	6040.6 KB
_load_hdf TOOK: 0.007 s
STORE "data_juan_iborra_3d_zlib.h5"	->	2844.0 KB
_load_hdf TOOK: 0.025 s
STORE "data_juan_iborra_3d_bzip2.h5"	->	2879.3 KB
_load_hdf TOOK: 0.214 s
STORE "data_juan_iborra_3d.h5"	->	3253.0 KB
_load_hdf TOOK: 0.014 s

In [231]:
@timeit('_process_data')
def _process_data(data, verbose=False):
    if not data.empty:
        if verbose:
            data.info()
        data = data.copy()
        deltas = pd.Series(data.index).diff().fillna(method='bfill')
        frac_hora = deltas / pd.Timedelta(hours=1)
        data['Wh'] = data.power * frac_hora.values
        data['delta'] = deltas.values
        data.loc[data['delta'] > '3s', 'high_delta'] = True
        data.loc[data['delta'] > '1min', 'execution'] = 1
        resampler = data[['power', 'Wh', 'delta', 'high_delta', 'execution']].resample('1h', label='left')
        consumo = (pd.DataFrame(resampler['Wh'].sum().rename('kWh')).fillna(0.) / 1000.).astype('float32')
        consumo['t_ref'] = (resampler['delta'].sum() / pd.Timedelta(hours=1)).astype('float32')
        consumo['n_jump'] = resampler['high_delta'].sum().fillna(0).astype('int16')
        consumo['n_exec'] = resampler['execution'].sum().fillna(0).astype('int32')
        consumo['p_max'] = resampler['power'].max().round(0).astype('float16')
        consumo['p_mean'] = resampler['power'].mean().round(0).astype('float16')
        consumo['p_min'] = resampler['power'].min().round(0).astype('float16')
        print_info(consumo.head())
        data['high_delta'] = data['high_delta'].astype(bool)
        data['execution'] = data['execution'].astype(bool)
        data.drop(['delta', 'Wh'], axis=1, inplace=True)
        if verbose:
            data.info()
            consumo.info()
        return data, consumo
    return data, None


df_p, consumo = _process_data(df, verbose=True)
consumo.to_hdf('data_juan_iborra_3d.h5', 'horas')


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 301560 entries, 2016-08-01 19:51:56.200602 to 2016-08-05 13:23:11.682163
Data columns (total 4 columns):
power    301560 non-null float32
noise    301560 non-null float32
ref      301560 non-null int16
ldr      301560 non-null int16
dtypes: float32(2), int16(2)
memory usage: 5.8 MB
                          kWh     t_ref  n_jump  n_exec   p_max  p_mean   p_min
ts                                                                             
2016-08-01 19:00:00  0.001968  0.033459       0       0    75.0    59.0    40.0
2016-08-01 20:00:00  1.769897  1.100953       1       1  2384.0  1605.0  1429.0
2016-08-01 21:00:00  0.715201  1.000046       0       0  1635.0   715.0   231.0
2016-08-01 22:00:00  1.716943  1.000071       0       0  6568.0  1717.0   401.0
2016-08-01 23:00:00  0.515473  1.000022       0       0   606.0   515.0   474.0
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 301560 entries, 2016-08-01 19:51:56.200602 to 2016-08-05 13:23:11.682163
Data columns (total 6 columns):
power         301560 non-null float32
noise         301560 non-null float32
ref           301560 non-null int16
ldr           301560 non-null int16
high_delta    301560 non-null bool
execution     301560 non-null bool
dtypes: bool(2), float32(2), int16(2)
memory usage: 6.3 MB
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 91 entries, 2016-08-01 19:00:00 to 2016-08-05 13:00:00
Freq: H
Data columns (total 7 columns):
kWh       91 non-null float32
t_ref     91 non-null float32
n_jump    91 non-null int16
n_exec    91 non-null int32
p_max     89 non-null float16
p_mean    89 non-null float16
p_min     89 non-null float16
dtypes: float16(3), float32(2), int16(1), int32(1)
memory usage: 2.5 KB
_process_data TOOK: 0.215 s

In [188]:
path_st = 'data_juan_iborra_3d.h5'
with pd.HDFStore(path_st, 'r') as st:
    print_info(st)
    data = st['rms']
    dh = st['horas']
    print_red('{:.1f} KB'.format(os.path.getsize(path_st) / 1000))


<class 'pandas.io.pytables.HDFStore'>
File path: data_juan_iborra_3d.h5
/horas            frame        (shape->[91,7])    
/rms              frame        (shape->[301560,4])
3255.1 KB

In [232]:
# Separa y clasifica:
ahora = pd.Timestamp.now()
current_year = ahora.year
current_month = ahora.month
current_day = ahora.day
ahora, current_year, current_month, current_day


'''
YEAR_MASK = 'YEAR_{:%Y}'
RG_YEAR_MASK = re.compile('(?P<name>\.*)_YEAR_(?P<year>\d{4})')
MONTH_MASK = '{}_{:%Y_MONTH_%m}'
RG_MONTH_MASK = re.compile('(?P<name>\.*)_(?P<year>\d{4})_MONTH_(?P<month>\d{2})')
DAY_MASK = '{}_{:%Y_%m_DAY_%d}'
RG_DAY_MASK = re.compile('(?P<name>\.*)_(?P<year>\d{4})_(?P<month>\d{2})_DAY_(?P<day>\d{2})')
'''
DIR_CURRENT_MONTH = 'CURRENT_MONTH'
preffix = 'ENERPI_DATA'
DAY_MASK = '{}_{:%Y_%m_DAY_%d}'

def _make_index_path(ts, w_day=False):
    if w_day:
        p = os.path.join(DIR_CURRENT_MONTH, DAY_MASK.format(preffix, ts))
    else:
        p = os.path.join(YEAR_MASK.format(ts), MONTH_MASK.format(preffix, ts))
    return p


def _clasifica_data(df):
    ahora = pd.Timestamp.now()
    ts_ini, ts_fin = df.index[0], df.index[-1]
    gb_años = data.groupby(pd.TimeGrouper(freq='A'))
    for ts_year, d_year in gb_años:
        gb_meses = d_year.groupby(pd.TimeGrouper(freq='M'))
        for ts_month, d_month in gb_meses:
            if (ts_year.year == ahora.year) and (ts_month.month == ahora.month):
                # CURRENT MONTH
                print_red('# CURRENT MONTH')
                gb_dias = d_month.groupby(pd.TimeGrouper(freq='D', closed='left', label='left'))
                for ts_day, d_day in gb_dias:
                    if ts_day.day == ahora.day:
                        # TODAY
                        print_magenta('# TODAY')
                    else:
                        # ARCHIVE DAY
                        print_red('# ARCHIVE DAY {:%Y-%m-%d}'.format(ts_day))
                        p = _make_index_path(ts_day, w_day=True)
                        d_day_p, c_day = _process_data(d_day)
                        print_info(p)
                        print_cyan(d_day.head(2))
                        print_cyan(d_day_p.tail(2))
                        print_red(c_day.head(2))
            else:
                # ARCHIVE MONTH
                print_red('# ARCHIVE MONTH')
                p = _make_index_path(ts_month, w_day=False)
                d_month_p, c_month = _process_data(d_month)
        print('DONE!')


_clasifica_data(data)


# CURRENT MONTH
# ARCHIVE DAY 2016-08-01
CURRENT_MONTH/ENERPI_DATA_2016_08_DAY_01
                          kWh     t_ref  n_jump  n_exec   p_max  p_mean   p_min
ts                                                                             
2016-08-01 19:00:00  0.001968  0.033459       0       0    75.0    59.0    40.0
2016-08-01 20:00:00  1.769897  1.100953       1       1  2384.0  1605.0  1429.0
2016-08-01 21:00:00  0.715201  1.000046       0       0  1635.0   715.0   231.0
2016-08-01 22:00:00  1.716943  1.000071       0       0  6568.0  1717.0   401.0
2016-08-01 23:00:00  0.515473  1.000022       0       0   606.0   515.0   474.0
_process_data TOOK: 0.027 s
                                power     noise  ref  ldr
ts                                                       
2016-08-01 19:51:56.200602  42.551208  0.003470  120  845
2016-08-01 19:51:57.209504  45.622879  0.003169  113  845
                                 power     noise  ref  ldr high_delta execution
ts                                                                             
2016-08-01 23:59:58.576031  501.746338  0.004296   84   37       True      True
2016-08-01 23:59:59.576685  507.307678  0.004146   83   38       True      True
                          kWh     t_ref  n_jump  n_exec   p_max  p_mean   p_min
ts                                                                             
2016-08-01 19:00:00  0.001968  0.033459       0       0    75.0    59.0    40.0
2016-08-01 20:00:00  1.769897  1.100953       1       1  2384.0  1605.0  1429.0
# ARCHIVE DAY 2016-08-02
CURRENT_MONTH/ENERPI_DATA_2016_08_DAY_02
                          kWh     t_ref  n_jump  n_exec   p_max  p_mean  p_min
ts                                                                            
2016-08-02 00:00:00  0.412912  1.000113       0       0  1284.0   413.0  268.0
2016-08-02 01:00:00  0.385426  0.999798       0       0   459.0   386.0  367.0
2016-08-02 02:00:00  0.376195  1.000069       0       0   394.0   376.0  363.0
2016-08-02 03:00:00  0.340966  0.999930       0       0  1157.0   341.0  259.0
2016-08-02 04:00:00  0.373691  1.000031       0       0   863.0   374.0  259.0
_process_data TOOK: 0.055 s
                                 power     noise  ref  ldr
ts                                                        
2016-08-02 00:00:00.582596  509.000580  0.004193   83   38
2016-08-02 00:00:01.585475  507.942261  0.004380   82   39
                                 power     noise  ref  ldr high_delta execution
ts                                                                             
2016-08-02 23:59:58.388958  566.336304  0.005081   84   41       True      True
2016-08-02 23:59:59.400328  557.777283  0.005302   84   41       True      True
                          kWh     t_ref  n_jump  n_exec   p_max  p_mean  p_min
ts                                                                            
2016-08-02 00:00:00  0.412912  1.000113       0       0  1284.0   413.0  268.0
2016-08-02 01:00:00  0.385426  0.999798       0       0   459.0   386.0  367.0
# ARCHIVE DAY 2016-08-03
CURRENT_MONTH/ENERPI_DATA_2016_08_DAY_03
                          kWh     t_ref  n_jump  n_exec   p_max  p_mean  p_min
ts                                                                            
2016-08-03 00:00:00  0.408402  1.000027       0       0   611.0   408.0  343.0
2016-08-03 01:00:00  0.339209  0.999881       0       0  1211.0   339.0  237.0
2016-08-03 02:00:00  0.406422  1.000069       0       0   523.0   406.0  328.0
2016-08-03 03:00:00  0.455213  1.000000       0       0  1384.0   455.0  328.0
2016-08-03 04:00:00  0.455398  1.000085       0       0   931.0   455.0  425.0
_process_data TOOK: 0.055 s
                                 power     noise  ref  ldr
ts                                                        
2016-08-03 00:00:00.410995  558.100769  0.005192   84   41
2016-08-03 00:00:01.415797  564.341614  0.004958   83   41
                                 power     noise  ref  ldr high_delta execution
ts                                                                             
2016-08-03 23:59:58.789394  469.284180  0.003086   82   45       True      True
2016-08-03 23:59:59.800320  468.676483  0.003092   84   43       True      True
                          kWh     t_ref  n_jump  n_exec   p_max  p_mean  p_min
ts                                                                            
2016-08-03 00:00:00  0.408402  1.000027       0       0   611.0   408.0  343.0
2016-08-03 01:00:00  0.339209  0.999881       0       0  1211.0   339.0  237.0
# ARCHIVE DAY 2016-08-04
CURRENT_MONTH/ENERPI_DATA_2016_08_DAY_04
                          kWh     t_ref  n_jump  n_exec   p_max  p_mean  p_min
ts                                                                            
2016-08-04 00:00:00  0.440713  0.999972       0       0   493.0   441.0  406.0
2016-08-04 01:00:00  0.436760  0.999854       0       0   894.0   437.0  400.0
2016-08-04 02:00:00  0.413150  1.000050       0       0   447.0   413.0  314.0
2016-08-04 03:00:00  0.393626  1.000155       0       0  1262.0   394.0  307.0
2016-08-04 04:00:00  0.383875  0.999857       0       0  1280.0   384.0  301.0
_process_data TOOK: 0.047 s
                                 power     noise  ref  ldr
ts                                                        
2016-08-04 00:00:00.803527  474.537323  0.003100   82   43
2016-08-04 00:00:01.814652  487.356079  0.003152   84   45
                                 power     noise  ref  ldr high_delta execution
ts                                                                             
2016-08-04 23:59:58.386661  427.701477  0.003021   84   14       True      True
2016-08-04 23:59:59.397089  428.555573  0.003061   84   18       True      True
                          kWh     t_ref  n_jump  n_exec  p_max  p_mean  p_min
ts                                                                           
2016-08-04 00:00:00  0.440713  0.999972       0       0  493.0   441.0  406.0
2016-08-04 01:00:00  0.436760  0.999854       0       0  894.0   437.0  400.0
# ARCHIVE DAY 2016-08-05
CURRENT_MONTH/ENERPI_DATA_2016_08_DAY_05
                          kWh     t_ref  n_jump  n_exec   p_max  p_mean  p_min
ts                                                                            
2016-08-05 00:00:00  0.396525  0.999969       0       0   462.0   397.0  367.0
2016-08-05 01:00:00  0.387132  0.999947       0       0   438.0   387.0  364.0
2016-08-05 02:00:00  0.373793  1.000224       0       0   444.0   374.0  262.0
2016-08-05 03:00:00  0.386348  0.999880       0       0  1507.0   386.0  266.0
2016-08-05 04:00:00  0.393390  0.999931       0       0   471.0   393.0  372.0
_process_data TOOK: 0.042 s
                                 power     noise  ref  ldr
ts                                                        
2016-08-05 00:00:00.399319  426.071991  0.003022   83   23
2016-08-05 00:00:01.409454  429.027069  0.003114   83   25
                                  power     noise  ref  ldr high_delta execution
ts                                                                              
2016-08-05 13:23:10.671605  1791.119873  0.005336   83  492       True      True
2016-08-05 13:23:11.682163  1788.043457  0.005193   84  492       True      True
                          kWh     t_ref  n_jump  n_exec  p_max  p_mean  p_min
ts                                                                           
2016-08-05 00:00:00  0.396525  0.999969       0       0  462.0   397.0  367.0
2016-08-05 01:00:00  0.387132  0.999947       0       0  438.0   387.0  364.0
DONE!

In [254]:
pd.Timestamp.fromtimestamp(os.path.getmtime(path_st))


Out[254]:
Timestamp('2016-08-11 15:03:36')

In [250]:
def _get_paths_interval(ts_ini, ts_fin=None):
    ts_ini = pd.Timestamp(ts_ini)
    ts_fin = pd.Timestamp(ts_fin) if ts_fin else pd.Timestamp.now()
    periods = (ts_fin.year * 12 + ts_fin.month) - (ts_ini.year * 12 + ts_ini.month)
    index = pd.DatetimeIndex(freq='M', start=ts_ini.date(), periods=periods + 1)
    paths = [for i in index]
    
    return pd.DatetimeIndex(freq='M', start=ts_ini.date(), periods=periods + 1)
        
    
_get_paths_interval('2016-02-03', ts_fin=None)
_get_paths_interval('2016-02-01', ts_fin=None)
_get_paths_interval('2016-01-31', ts_fin=None)
_get_paths_interval('2016-01-31', ts_fin='2016-02-01')
_get_paths_interval('2016-08-01', ts_fin='2016-08-11')


6 2016-02-03 2016-08-11
DatetimeIndex(['2016-02-29', '2016-03-31', '2016-04-30', '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31'], dtype='datetime64[ns]', freq='M')
6 2016-02-01 2016-08-11
DatetimeIndex(['2016-02-29', '2016-03-31', '2016-04-30', '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31'], dtype='datetime64[ns]', freq='M')
7 2016-01-31 2016-08-11
DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30', '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31'], dtype='datetime64[ns]', freq='M')
1 2016-01-31 2016-02-01
DatetimeIndex(['2016-01-31', '2016-02-29'], dtype='datetime64[ns]', freq='M')
0 2016-08-01 2016-08-11
DatetimeIndex(['2016-08-31'], dtype='datetime64[ns]', freq='M')
Out[250]:
DatetimeIndex(['2016-08-31'], dtype='datetime64[ns]', freq='M')

In [279]:
#last = pd.read_hdf('2016_08_10.h5', 'rms')
p1 = last.copy()
p1.index = p1.index - pd.Timedelta('60D')
p2 = last.copy()
p2.index = p2.index - pd.Timedelta('180D')
p3 = last.copy()
p3.index = p2.index - pd.Timedelta('360D')

p1.to_hdf('pru1_jun.h5', 'rms')
p2.to_hdf('pru2.h5', 'rms')
p3.to_hdf('pru3_15.h5', 'rms')
p3.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7500 entries, 2015-02-17 21:26:49.378310 to 2015-02-18 11:27:40.109109
Data columns (total 4 columns):
power    7500 non-null float64
noise    7500 non-null float64
ref      7500 non-null float64
ldr      7500 non-null float64
dtypes: float64(4)
memory usage: 293.0 KB

In [270]:
pd.read_hdf('DATA_YEAR_2015/DATA_2015_MONTH_02.h5', 'rms').info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7500 entries, 2015-02-17 21:26:49.378310 to 2015-02-18 11:27:40.109109
Data columns (total 4 columns):
power    7500 non-null float64
noise    7500 non-null float64
ref      7500 non-null float64
ldr      7500 non-null float64
dtypes: float64(4)
memory usage: 293.0 KB

In [274]:
last.plot()


Out[274]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b6ee588>

In [277]:
last.loc['2016-08-11 00:00':'2016-08-11 11:00']


Out[277]:
power noise ref ldr
ts
2016-08-11 01:26:02.433825 238.806735 0.005109 77.0 0.030926
2016-08-11 01:26:03.444509 239.324132 0.005226 84.0 0.030401
2016-08-11 01:26:04.455247 242.482888 0.005160 84.0 0.028026
2016-08-11 01:26:05.456690 248.397511 0.005048 74.0 0.017416
2016-08-11 01:26:06.462287 262.170834 0.005323 80.0 0.023375
2016-08-11 01:26:07.468395 273.267472 0.005509 76.0 0.029831
2016-08-11 01:26:08.478759 257.037725 0.005502 84.0 0.034076
2016-08-11 01:26:09.489191 234.616330 0.005362 84.0 0.034304
2016-08-11 01:26:10.499698 237.029302 0.005269 84.0 0.034230
2016-08-11 01:26:11.510536 236.412935 0.005318 84.0 0.034391
2016-08-11 01:26:12.521394 242.026644 0.005191 84.0 0.034395
2016-08-11 01:26:13.532528 243.917164 0.005169 84.0 0.034688
2016-08-11 01:26:14.543072 242.546287 0.005182 84.0 0.034542
2016-08-11 01:26:15.553925 241.926762 0.005124 84.0 0.034720
2016-08-11 01:26:16.564473 239.893189 0.005082 84.0 0.034554
2016-08-11 01:26:17.575007 243.255812 0.005147 84.0 0.034744
2016-08-11 01:26:18.585491 241.230207 0.005296 84.0 0.034503
2016-08-11 01:26:19.596091 239.354427 0.005370 84.0 0.034657
2016-08-11 01:26:20.606689 240.995105 0.005417 84.0 0.034437
2016-08-11 01:26:21.618044 232.667743 0.005368 84.0 0.034483
2016-08-11 01:26:22.628418 229.690563 0.005232 84.0 0.034355
2016-08-11 01:26:23.638782 240.995800 0.005105 84.0 0.034591
2016-08-11 01:26:24.649457 246.563609 0.005088 84.0 0.034344
2016-08-11 01:26:25.659810 250.649174 0.005113 84.0 0.034451
2016-08-11 01:26:26.670170 249.001101 0.005100 84.0 0.034779
2016-08-11 01:26:27.680432 244.345836 0.005135 84.0 0.035368
2016-08-11 01:26:28.683845 241.500400 0.005137 83.0 0.034711
2016-08-11 01:26:29.694593 245.043626 0.005021 84.0 0.034405
2016-08-11 01:26:30.702495 254.983657 0.005207 83.0 0.034542
2016-08-11 01:26:31.712885 246.102692 0.005324 84.0 0.034634
... ... ... ... ...
2016-08-11 09:27:03.526290 241.352947 0.005214 83.0 0.321375
2016-08-11 09:27:04.536827 240.622669 0.005194 84.0 0.328970
2016-08-11 09:27:05.547488 235.639104 0.005255 84.0 0.337575
2016-08-11 09:27:06.559049 230.682551 0.005357 84.0 0.347266
2016-08-11 09:27:07.569249 234.068520 0.005331 79.0 0.350572
2016-08-11 09:27:08.581797 236.510306 0.005303 79.0 0.350951
2016-08-11 09:27:09.590008 233.716989 0.005385 83.0 0.352251
2016-08-11 09:27:10.600660 233.160961 0.005393 84.0 0.351967
2016-08-11 09:27:11.611439 232.353099 0.005326 84.0 0.352076
2016-08-11 09:27:12.621998 235.104432 0.005270 84.0 0.352778
2016-08-11 09:27:13.632562 238.378951 0.005220 84.0 0.353237
2016-08-11 09:27:14.638178 241.097775 0.005256 82.0 0.353008
2016-08-11 09:27:15.648547 244.224751 0.005257 84.0 0.352646
2016-08-11 09:27:16.658920 239.442338 0.005224 84.0 0.352523
2016-08-11 09:27:17.669643 234.438600 0.005249 84.0 0.352630
2016-08-11 09:27:18.680343 234.775237 0.005229 84.0 0.352926
2016-08-11 09:27:19.687709 234.058463 0.005349 82.0 0.352738
2016-08-11 09:27:20.698220 234.691836 0.005603 84.0 0.352679
2016-08-11 09:27:21.707097 239.095276 0.005599 82.0 0.352928
2016-08-11 09:27:22.717503 240.799077 0.005515 84.0 0.352860
2016-08-11 09:27:23.727880 238.393301 0.005412 84.0 0.352766
2016-08-11 09:27:24.738594 235.672239 0.005249 84.0 0.352742
2016-08-11 09:27:25.749229 238.188652 0.005255 84.0 0.352858
2016-08-11 09:27:26.759608 237.930543 0.005382 84.0 0.352958
2016-08-11 09:27:27.770141 235.819319 0.005386 84.0 0.352743
2016-08-11 09:27:28.780705 239.199495 0.005248 84.0 0.352809
2016-08-11 09:27:29.791245 237.397900 0.005246 84.0 0.352854
2016-08-11 09:27:30.791042 233.523339 0.005281 82.0 0.352955
2016-08-11 09:27:31.790907 233.399660 0.005356 83.0 0.353105
2016-08-11 09:27:32.801972 236.070286 0.005470 84.0 0.353225

300 rows × 4 columns


In [221]:
for day, g in gb_años:
    print_red(day.year)
    print_info(g.head())
    
for day, g in gb_meses:
    print_red(day)
    print_red(day.month)
    print_info(g.head())
    print_cyan(g.tail())


2016
                                power     noise  ref  ldr
ts                                                       
2016-08-01 19:51:56.200602  42.551208  0.003470  120  845
2016-08-01 19:51:57.209504  45.622879  0.003169  113  845
2016-08-01 19:51:58.211276  46.696186  0.003200  113  846
2016-08-01 19:51:59.219036  47.338829  0.003334  117  846
2016-08-01 19:52:00.224722  47.181709  0.003260  121  846
2016-08-31 00:00:00
8
                                power     noise  ref  ldr
ts                                                       
2016-08-01 19:51:56.200602  42.551208  0.003470  120  845
2016-08-01 19:51:57.209504  45.622879  0.003169  113  845
2016-08-01 19:51:58.211276  46.696186  0.003200  113  846
2016-08-01 19:51:59.219036  47.338829  0.003334  117  846
2016-08-01 19:52:00.224722  47.181709  0.003260  121  846
                                  power     noise  ref  ldr
ts                                                         
2016-08-05 13:23:07.643264  1787.606689  0.005353   84  491
2016-08-05 13:23:08.653733  1787.239136  0.005393   84  492
2016-08-05 13:23:09.664590  1784.219849  0.005446   84  492
2016-08-05 13:23:10.671605  1791.119873  0.005336   83  492
2016-08-05 13:23:11.682163  1788.043457  0.005193   84  492

In [294]:
horas = pd.read_hdf('CURRENT_MONTH/DATA_2016_08_DAY_11.h5', 'hours')
rms = pd.read_hdf('CURRENT_MONTH/DATA_2016_08_DAY_11.h5', 'rms')
rms.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 360 entries, 2016-08-11 01:26:02.433825 to 2016-08-11 11:27:40.109109
Data columns (total 6 columns):
power         360 non-null float32
noise         360 non-null float32
ref           360 non-null int16
ldr           360 non-null int16
high_delta    360 non-null bool
execution     360 non-null bool
dtypes: bool(2), float32(2), int16(2)
memory usage: 7.7 KB

In [326]:
l = list(filter(lambda x: x[0] is not None, zip([data, None, None], 
                                       ['key_raw', 'key_summary', 'key_summary_extra'])))
list(zip(*l))[1], list(zip(*l))[1]


Out[326]:
(('key_raw',), ('key_raw',))

In [300]:
deltas = pd.Series(data.index).diff().fillna(method='bfill')
frac_hora = deltas / pd.Timedelta(hours=1)
data['Wh'] = data.power * frac_hora.values
data['delta'] = deltas.values
data.loc[data['delta'] > '3s', 'high_delta'] = True
data.dtypes


Out[300]:
power                 float32
noise                 float32
ref                     int16
ldr                     int16
Wh                    float64
delta         timedelta64[ns]
high_delta             object
dtype: object

In [306]:
data['delta'] = pd.Series(data.index).diff().fillna(method='bfill').dt.total_seconds().values
data['Wh'] = data.power * data.delta / 3600
data.loc[data['delta'] > 3, 'high_delta2'] = True
data.loc[data['delta'] > 60, 'execution'] = 1
print_cyan(data.head())
data[data['delta'] > 3]


                                power     noise  ref  ldr        Wh     delta high_delta  execution high_delta2
ts                                                                                                             
2016-08-01 19:51:56.200602  42.551208  0.003470  120  845  0.011925  1.008902       True        NaN         NaN
2016-08-01 19:51:57.209504  45.622879  0.003169  113  845  0.012786  1.008902       True        NaN         NaN
2016-08-01 19:51:58.211276  46.696186  0.003200  113  846  0.012994  1.001772       True        NaN         NaN
2016-08-01 19:51:59.219036  47.338829  0.003334  117  846  0.013252  1.007760       True        NaN         NaN
2016-08-01 19:52:00.224722  47.181709  0.003260  121  846  0.013181  1.005686       True        NaN         NaN
Out[306]:
power noise ref ldr Wh delta high_delta execution high_delta2
ts
2016-08-01 20:11:30.908344 1613.394287 0.005505 84 586 472.932092 1055.263146 True 1.0 True
2016-08-02 05:14:44.966842 380.206360 0.004163 1 32 0.731895 6.929980 True NaN True
2016-08-02 05:15:35.748159 389.775543 0.004163 1 28 1.030848 9.520998 True NaN True
2016-08-02 05:16:03.063972 386.484192 0.003931 1 31 0.990961 9.230541 True NaN True
2016-08-02 05:16:23.650407 383.864990 0.004151 1 29 0.475307 4.457568 True NaN True
2016-08-02 05:17:42.897016 385.994965 0.004288 1 32 1.107244 10.326766 True NaN True
2016-08-02 05:18:40.977913 383.189026 0.004139 1 40 1.097856 10.314178 True NaN True
2016-08-02 05:19:51.548235 386.791168 0.004283 1 30 0.772349 7.188525 True NaN True
2016-08-02 05:20:56.579287 378.563934 0.004120 1 29 0.705038 6.704641 True NaN True
2016-08-02 05:22:01.918368 376.972351 0.004179 1 21 0.835166 7.975646 True NaN True
2016-08-02 05:23:11.957643 378.864960 0.004094 1 36 0.703208 6.681930 True NaN True
2016-08-02 05:24:18.865122 376.868744 0.004272 1 36 0.799707 7.639120 True NaN True
2016-08-02 05:25:24.575200 374.203094 0.003980 1 35 0.983419 9.460925 True NaN True
2016-08-02 05:26:31.325873 378.728912 0.004158 1 36 0.679268 6.456768 True NaN True
2016-08-02 05:27:36.569250 380.846588 0.004193 1 43 1.134675 10.725661 True NaN True
2016-08-02 05:28:24.290665 380.875305 0.004028 1 31 0.878127 8.299980 True NaN True
2016-08-02 05:29:24.525239 373.317291 0.003943 1 44 1.131106 10.907562 True NaN True
2016-08-02 05:30:34.768987 374.986877 0.004175 1 31 1.131554 10.863294 True NaN True
2016-08-02 05:31:04.377658 382.258698 0.004149 1 31 1.116748 10.517204 True NaN True
2016-08-02 05:31:57.324614 380.021484 0.004203 1 36 0.919625 8.711745 True NaN True
2016-08-02 05:33:02.469097 382.805756 0.004050 1 34 0.694664 6.532790 True NaN True
2016-08-02 05:33:37.302844 386.980896 0.004030 1 47 0.926005 8.614428 True NaN True
2016-08-02 05:34:08.772439 373.183411 0.004153 1 32 0.761532 7.346299 True NaN True
2016-08-02 05:34:41.055895 375.140930 0.004030 1 31 1.117838 10.727212 True NaN True
2016-08-02 05:36:35.663824 380.760468 0.004114 1 21 1.058292 10.005903 True NaN True
2016-08-02 05:37:45.814564 376.354553 0.004119 1 36 0.714224 6.831876 True NaN True
2016-08-02 05:38:36.806697 380.259766 0.004087 1 37 0.927301 8.778958 True NaN True
2016-08-02 05:39:31.451253 378.596924 0.004078 1 30 1.087520 10.341002 True NaN True
2016-08-02 05:40:38.009234 376.449982 0.004153 1 30 0.757871 7.247540 True NaN True
2016-08-02 05:41:46.846242 373.944275 0.004081 1 32 1.299930 12.514559 True NaN True
... ... ... ... ... ... ... ... ... ...
2016-08-03 16:37:50.693608 1500.706299 0.005538 1 467 1.775720 4.259722 True NaN True
2016-08-03 16:38:57.139209 1499.090454 0.005450 1 467 5.709170 13.710321 True NaN True
2016-08-03 16:40:07.216332 1519.652344 0.005524 1 466 6.172338 14.622039 True NaN True
2016-08-03 16:41:21.647390 1519.909180 0.005336 1 466 7.199544 17.052570 True NaN True
2016-08-03 16:42:41.317489 1516.662964 0.005430 1 467 6.491517 15.408473 True NaN True
2016-08-03 16:43:27.492493 1501.612427 0.005534 1 464 7.852993 18.826945 True NaN True
2016-08-03 16:44:44.217861 1514.328979 0.005425 1 466 7.663761 18.218987 True NaN True
2016-08-03 16:45:50.223035 1526.801392 0.005596 1 467 7.464806 17.601047 True NaN True
2016-08-03 16:46:49.366303 1513.147705 0.005464 1 468 6.612710 15.732605 True NaN True
2016-08-03 16:48:03.908372 1508.879150 0.005410 1 468 5.463435 13.035083 True NaN True
2016-08-03 16:48:48.141531 1518.900879 0.005471 1 468 6.322653 14.985540 True NaN True
2016-08-03 20:32:52.778935 1519.579224 0.004569 84 277 1694.249571 4013.807481 True 1.0 True
2016-08-05 11:42:56.556930 1578.594849 0.005089 1 545 1.369106 3.122258 True NaN True
2016-08-05 11:49:33.106537 1540.533936 0.005123 1 549 1.284561 3.001830 True NaN True
2016-08-05 12:00:48.822309 1430.834473 0.004995 1 549 1.557957 3.919842 True NaN True
2016-08-05 12:04:54.988074 1433.462524 0.005125 1 537 1.796772 4.512417 True NaN True
2016-08-05 12:06:01.538601 1431.651733 0.004963 1 537 1.981105 4.981643 True NaN True
2016-08-05 12:07:06.301161 1442.705078 0.004946 1 521 1.997959 4.985531 True NaN True
2016-08-05 12:07:36.268273 1440.231689 0.004947 1 534 1.923441 4.807829 True NaN True
2016-08-05 12:08:49.251557 1418.183105 0.005067 1 534 1.358591 3.448728 True NaN True
2016-08-05 12:10:16.113987 1801.176636 0.005092 1 535 1.508352 3.014734 True NaN True
2016-08-05 12:10:47.840741 1807.173462 0.004638 1 533 1.786458 3.558733 True NaN True
2016-08-05 12:15:27.584276 1580.948608 0.004310 1 530 1.529699 3.483299 True NaN True
2016-08-05 12:16:27.816408 1584.972778 0.004391 1 529 1.719703 3.906018 True NaN True
2016-08-05 12:19:34.895411 1580.395386 0.004731 1 530 1.846153 4.205372 True NaN True
2016-08-05 12:26:18.143988 1715.105469 0.004820 1 536 1.734169 3.640014 True NaN True
2016-08-05 12:35:28.937925 1717.756714 0.005126 1 535 1.436389 3.010321 True NaN True
2016-08-05 12:38:43.646272 1705.006836 0.005146 1 538 1.734406 3.662075 True NaN True
2016-08-05 12:41:13.948676 1710.486328 0.005116 1 538 2.619719 5.513630 True NaN True
2016-08-05 12:44:03.318854 1689.601562 0.005130 1 536 3.161704 6.736580 True NaN True

210 rows × 9 columns


In [117]:
df_s = df1.copy()
df_s.noise = df_s.noise.round(5).astype('float32')
df_s.power = df_s.power.round(2).astype('float32')
df_s.Wh = df_s.Wh.astype('float32')
df_s.ref = df_s.ref.astype('int16')
df_s.ldr = (df_s.ldr * 1000).astype('int16')
df_s.delta = df_s.delta.dt.total_seconds().round(3).astype('float16')
df_s.high_delta = df_s.high_delta.astype(bool)
df_s.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 84510 entries, 2016-08-01 19:51:56.200602 to 2016-08-02 20:07:00.191887
Data columns (total 8 columns):
power         84510 non-null float32
noise         84510 non-null float32
ref           84510 non-null int16
ldr           84510 non-null int16
Wh            84510 non-null float32
delta         84510 non-null float16
high_delta    84510 non-null bool
execution     1 non-null float64
dtypes: bool(1), float16(1), float32(3), float64(1), int16(2)
memory usage: 2.8 MB

In [122]:
#ts = df1[df1.delta > '1min'].index.values[0]
#df1.loc[ts  - pd.Timedelta('18min'):ts + pd.Timedelta('1min')]
#df_s.high_delta.describe()
df1.T.drop(['Wh', 'delta', 'high_delta', 'execution']).T.astype('float32').info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 84510 entries, 2016-08-01 19:51:56.200602 to 2016-08-02 20:07:00.191887
Data columns (total 4 columns):
power    84510 non-null float32
noise    84510 non-null float32
ref      84510 non-null float32
ldr      84510 non-null float32
dtypes: float32(4)
memory usage: 1.9 MB

In [123]:
df1.T.drop(['Wh', 'delta', 'high_delta', 'execution']).T.astype('float32').plot()


Out[123]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b003c88>

In [ ]: