In [9]:
%matplotlib inline
import os
import glob
import pandas as pd
import numpy as np
import datetime as dt
from enerpi.base import timeit
from prettyprinting import *
import re
INIT_LOG_MARK = "Init ENERPI logging & broadcasting..."
@timeit('extract_log_file')
def extract_log_file(log_file, extract_temps=True, verbose=True):
rg_log_msg = re.compile('(?P<tipo>INFO|WARNING|DEBUG|ERROR) \[(?P<func>.+?)\] '
'- (?P<ts>\d{1,2}/\d\d/\d\d\d\d \d\d:\d\d:\d\d): (?P<msg>.*?)\n', re.DOTALL)
with open(log_file, 'r') as log_f:
df_log = pd.DataFrame(rg_log_msg.findall(log_f.read()),
columns=['tipo', 'func', 'ts', 'msg'])
df_log.drop('func', axis=1, inplace=True)
df_log['tipo'] = df_log['tipo'].astype('category')
df_log['ts'] = df_log['ts'].apply(lambda x: dt.datetime.strptime(x, '%d/%m/%Y %H:%M:%S'))
df_log.loc[df_log.msg.str.startswith('Tªs --> '), 'temp'] = True
df_log.loc[df_log.msg.str.startswith('SENDED: '), 'debug_send'] = True
b_warn = df_log.tipo == 'WARNING'
df_log.loc[b_warn, 'no_red'] = df_log[b_warn].msg.str.startswith('OSError: [Errno 101] La red es inaccesible')
df_log['exec'] = df_log['msg'].str.contains(INIT_LOG_MARK).cumsum().astype(int)
df_log = df_log.set_index('ts')
if extract_temps:
rg_temps = 'Tªs --> (?P<CPU>\d{1,2}\.\d) / (?P<GPU>\d{1,2}\.\d) ºC'
df_log = df_log.join(df_log[df_log['temp'].notnull()].msg.str.extract(rg_temps, expand=True).astype(float))
if verbose:
clasific = df_log.groupby(['exec', 'tipo']).count().dropna(how='all').astype(int)
print_ok(clasific)
conteo_tipos = df_log.groupby('tipo').count()
if 'ERROR' in conteo_tipos.index:
print_err(df_log[df_log.tipo == 'ERROR'].dropna(how='all', axis=1))
if 'INFO' in conteo_tipos.index:
print_info(df_log[df_log.tipo == 'INFO'].dropna(how='all', axis=1))
return df_log
os.chdir('/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/')
log_file = 'enerpi.log'
df_log = extract_log_file(log_file, extract_temps=True, verbose=True)
df_log.head()
Out[9]:
In [11]:
last = df_log[df_log['exec'] == 4]
last.tail(10)
Out[11]:
In [14]:
last[~last.no_red.fillna(False) & ~last.temp.fillna(False)]
Out[14]:
In [20]:
last[last.temp.fillna(False)][['CPU', 'GPU']].mean(axis=1).resample('1min').mean().plot(figsize=(18, 10))
Out[20]:
In [21]:
data = pd.read_hdf('temp_data.h5', 'rms')
data.resample('1min').mean()[['power', 'ldr']].plot(figsize=(18, 10))
Out[21]:
In [68]:
from enerpi.api import extract_log_file
log = extract_log_file('enerpi.log')
data = pd.read_hdf('TODAY.h5', 'rms')
log = log[log['exec'] == 2].dropna(how='all', axis=1)
print(log.head())
log.tail()
Out[68]:
In [80]:
log['n_rows'] = log['msg'].str.extract('KB, (\d{1,10}) rows', expand=False).astype(float) / 100
log.loc[log['n_rows'].isnull(), 'n_rows'] = log.loc[log['n_rows'].isnull(), 'msg'].str.extract('ROWS_NOW=(\d{1,10})', expand=False).astype(float) / 100
log['size_kb'] = log['msg'].str.extract('Size Store: (\d{1,10}\.\d) KB,', expand=False).astype(float)
log.loc[log['size_kb'].isnull(), 'size_kb'] = log.loc[log['size_kb'].isnull(), 'msg'].str.extract('New Size: (\d{1,10}\.\d) KB', expand=False).astype(float)
#log['size_kb_c'] = log['msg'].str.extract('New Size: (\d{1,10}\.\d) KB', expand=False).astype(float)
log.T.drop('exec').T.plot(figsize=(18, 8))
Out[80]:
In [85]:
log.T.drop('exec').T.between_time('12:00', '13:00').dropna().plot(figsize=(18, 8))
Out[85]:
In [86]:
data.plot()
Out[86]:
In [88]:
data.power.plot()
Out[88]:
In [94]:
data.to_hdf('pruebas.hf5', 'data')
print_yellow(os.path.getsize('pruebas.hf5') / 1000)
with pd.HDFStore('pruebas.hf5', 'w') as st:
print_infob(st)
#print_infob(st.remove('/data'))
print_magenta(os.path.getsize('pruebas.hf5') / 1000)
In [96]:
STORE_EXT = '.h5'
DIR_CURRENT_MONTH = 'CURRENT_MONTH'
DIR_BACKUP = 'OLD_STORES'
ST_TODAY = 'TODAY' + STORE_EXT
os.path.join(DIR_CURRENT_MONTH, ST_TODAY)
Out[96]:
In [135]:
pru = data.head()
pru2 = pru.copy()
#pru2.index = pru2.index + pd.Timedelta('2D')
print(pru)
pru2
pru3 = pd.DataFrame(pd.concat([pru, pru2], axis=0)).reset_index().drop_duplicates(subset='ts').set_index('ts')
pru3.index.is_monotonic_increasing
df = pd.DataFrame([], columns=['st', 'key', 'ts_ini', 'ts_fin', 'ts_st', 'n_rows', 'is_raw', 'cols']
).sort_values(by='ts_ini')
pru2.ref += 100
pru2
index = pd.read_csv('data_catalog.csv', index_col=0, parse_dates=['ts_ini', 'ts_fin', 'ts_st'])
index.cols = index.cols.map(lambda x: json.loads(x.replace("'", '"')))
index = index.drop_duplicates(subset=['st', 'key', 'ts_st', 'ts_ini', 'ts_fin'])
index.tail()
Out[135]:
In [143]:
pd.read_hdf('DATA_YEAR_2016/DATA_2016_MONTH_08.h5', 'hours')
#pd.read_hdf('debug_buffer_disk.h5', 'rms')
Out[143]:
In [34]:
#pd.read_hdf('TODAY.h5', 'rms')
import json
index = pd.read_csv('/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/DATA/data_catalog.csv',
index_col=0, parse_dates=['ts_ini', 'ts_fin'])
index.cols = index.cols.map(lambda x: json.loads(x.replace("'", '"')))
print_info(index.dtypes)
index
Out[34]:
In [46]:
Out[46]:
In [49]:
json.loads(index.cols[22].replace("'", '"'))
Out[49]:
In [144]:
#'enerpi.log'
log = extract_log_file('enerpi.log')
log
Out[144]:
In [153]:
last_hours = 24
pd.Timestamp.now().replace(minute=0, second=0, microsecond=0) - pd.Timedelta(hours=last_hours)
Out[153]:
In [152]:
pd.Timestamp.now().replace(minute=0, second=0, microsecond=0)
Out[152]:
In [154]:
last_data = pd.read_hdf('OLD_STORES/debug.h5', 'rms')
last_data.info()
last_data.count()
Out[154]:
In [159]:
raw_data = pd.read_hdf('OLD_STORES/temp_data.h5', 'rms')
raw_data.info()
In [161]:
p_data = pd.concat([pd.read_hdf('CURRENT_MONTH/DATA_2016_08_DAY_12.h5', 'rms'),
pd.read_hdf('CURRENT_MONTH/DATA_2016_08_DAY_13.h5', 'rms')]).loc[:'2016-08-13 21:01:02']
p_data.info()
In [166]:
(raw_data.ldr - p_data.ldr).describe()
Out[166]:
In [182]:
ldr = pd.concat([raw_data.ldr, p_data.ldr], axis=1)
ldr.columns = ['raw', 'process']
ldr['calc'] = (ldr.raw * 1000.).round(0).astype('int16')
compara = ldr[ldr.calc != ldr.process].groupby(['process', 'calc']).count()
d_ren = compara.reset_index().sort_values(by='calc').set_index('process')['calc'].to_dict()
In [185]:
last_data.ldr.map(d_ren).plot()
Out[185]:
In [186]:
last_data.power.plot()
Out[186]:
In [190]:
last_data.ldr.apply(lambda x: d_ren[x] if x in d_ren else x).plot()
Out[190]:
In [191]:
raw_data_last = pd.read_hdf('enerpi_data.h5', 'rms')
raw_data_last.info()
In [193]:
pd.read_hdf('CURRENT_MONTH/DATA_2016_08_DAY_12.h5', 'rms').info()
pd.read_hdf('CURRENT_MONTH/DATA_2016_08_DAY_13.h5', 'rms').info()
pd.read_hdf('CURRENT_MONTH/DATA_2016_08_DAY_14.h5', 'rms').info()
pd.read_hdf('CURRENT_MONTH/TODAY.h5', 'rms').info()
In [195]:
pd.read_hdf('CURRENT_MONTH/TODAY.h5', 'rms').ldr.map(d_ren).plot()
Out[195]:
In [199]:
pd.read_hdf('CURRENT_MONTH/DATA_2016_08_DAY_13.h5', 'rms').ldr.map(d_ren).plot()
Out[199]:
In [202]:
pd.read_hdf('CURRENT_MONTH/DATA_2016_08_DAY_14.h5', 'rms').ldr.map(d_ren).plot()
Out[202]:
In [201]:
pd.read_hdf('CURRENT_MONTH/DATA_2016_08_DAY_12.h5', 'rms').ldr.plot()
Out[201]:
In [215]:
import random
(pd.Series(np.linspace(0, 1, 1001)) * 1000).apply(lambda x: x + random.random() / 10).round(0).astype('int16')
Out[215]:
In [221]:
log = extract_log_file('enerpi.log')
last_log = log[log['exec'] == log['exec'].max()].dropna(how='all', axis=1)
last_log.head()
Out[221]:
In [223]:
last_log[last_log.tipo == 'INFO']
Out[223]:
In [238]:
# Extract 'OSError: [Errno 101] La red es inaccesible; C_UNREACHABLE: [ 6 969]'
c_unreach = last_log[last_log.msg.str.contains('Errno 101')
].msg.str.extract('C_UNREACHABLE: \[\s?\s?\s?\s?\s?\s?\s?(?P<c_cont>\d{1,10}) (?P<c_tot>\d{1,10})\]',
expand=True).astype(int)
c_unreach.plot()
Out[238]:
In [243]:
cu = c_unreach.resample('1s').first()
cu.c_cont = cu.c_cont.fillna(method='bfill').astype(int)
cu.c_tot = cu.c_tot.fillna(method='ffill').astype(int)
cu.plot()
Out[243]:
In [244]:
cu.describe()
Out[244]:
In [245]:
cu.c_cont.plot()
Out[245]:
In [247]:
cu.resample('1h').max()
Out[247]:
In [ ]: