In [6]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
from IPython.display import Image, HTML
import random
import math
import pysolar
import datetime as dt
import pytz
import matplotlib.dates as mpd
import scipy
import scipy.signal as signal
import sklearn
import numexpr as ne
from datacharm import *
from datacharm.dataplots import tableau20, ch_color
from enerpi.base import timeit
from enerpi.api import enerpi_data_catalog
from enerpi.enerplot import plot_tile_last_24h, plot_power_consumption_hourly
from enerpi.sunposition import sun_position
LAT = 38.631463
LONG = -0.866402
ELEV_M = 500
TZ = pytz.timezone('Europe/Madrid')
FS = (16, 10)
sns.set_style('ticks')
pd.set_option('display.width', 120)
def _tslim(ax, h0=12, hf=22, m0=0, mf=0):
xlim = [mpd.num2date(x, tz=TZ) for x in ax.get_xlim()]
new = [mpd.date2num(d.replace(hour=h, minute=m)) for d, h, m in zip(xlim, [h0, hf], [m0, mf])]
ax.set_xlim(new)
return ax
# Catálogo y lectura de todos los datos.
cat = enerpi_data_catalog()
data, data_s = cat.get_all_data()
print_info(data_s.tail())
LDR = pd.DataFrame(data.ldr).tz_localize(TZ)
print_cyan(LDR.describe().T.astype(int))
LDR.hist(bins=(LDR.max() - LDR.min() + 1).values[0] // 5, log=True, figsize=(18, 6), lw=0, color=tableau20[2])
plt.plot()
POWER = pd.DataFrame(data.power).tz_localize(TZ)
print_cyan(POWER.describe().T.astype(int))
POWER.hist(bins=int((POWER.max() - POWER.min() + 1).values[0] // 5), log=True, figsize=(18, 6), lw=0, color=tableau20[4])
Out[6]:
In [1126]:
sns.kdeplot(POWER.power.values, shade=True, gridsize=6000)
Out[1126]:
In [5]:
sns.palplot(tableau20)
In [34]:
homog_power = POWER.resample('1s').mean().fillna(method='bfill', limit=3).fillna(-1) #.round().astype('int16')
homog_power.info()
In [1268]:
POWER.power.round().value_counts()
Out[1268]:
In [41]:
power_day13 = homog_power.loc['2016-08-13']
ax = power_day13.plot(lw=1, alpha=.8)
power_day14 = homog_power.loc['2016-08-14']
power_day14.plot(ax=ax, lw=1, alpha=.8)
Out[41]:
In [30]:
power_day13.resample('1s').mean().fillna(method='bfill', limit=3).fillna(-1).round().astype(int)
Out[30]:
In [42]:
power_standby = homog_power.loc['2016-08-29']
power_standby.plot(lw=1, alpha=.8)
Out[42]:
In [103]:
mf5 = pd.Series(signal.medfilt(power_standby.power, kernel_size=5), index=power_standby.index, name='mf_5')
mf11 = pd.Series(signal.wiener(power_standby.power, 15), index=power_standby.index, name='wiener_11')
t0, tf = '3:10', '3:20'
ax = power_standby.between_time(t0, tf).plot(lw=1)
mf5.between_time(t0, tf).plot(ax=ax)
mf11.between_time(t0, tf).plot(ax=ax)
plt.show()
t0, tf = '3:20', '3:25'
ax = power_standby.between_time(t0, tf).plot(lw=1)
mf11.between_time(t0, tf).plot(ax=ax)
mf5.between_time(t0, tf).plot(ax=ax)
Out[103]:
In [100]:
power_standby.hist(bins=200, lw=0, log=True, alpha=.5)
mf11.hist(bins=200, lw=0, log=True, alpha=.5)
Out[100]:
In [104]:
N = len(power_standby)
out_fft = np.fft.rfft((power_standby.power - power_standby.power.mean()).values)
out_fft_mf = np.fft.rfft((mf5 - mf5.mean()).values)
out_fft_mf2 = np.fft.rfft((mf11 - mf11.mean()).values)
plt.figure(figsize=(18, 15))
corte = 350 # seg
plt.subplot(3, 2, 1)
plt.plot(np.abs(out_fft[:corte + 1]), lw=1, alpha=.7)
plt.subplot(3, 2, 2)
plt.plot(np.abs(out_fft[corte:N//2]), lw=1, alpha=.7)
plt.subplot(3, 2, 3)
plt.plot(np.abs(out_fft_mf[:corte + 1]), lw=1, alpha=.7)
plt.subplot(3, 2, 4)
plt.plot(np.abs(out_fft_mf[corte:N//2]), lw=1, alpha=.7)
plt.subplot(3, 2, 5)
plt.plot(np.abs(out_fft_mf2[:corte + 1]), lw=1, alpha=.7)
plt.subplot(3, 2, 6)
plt.plot(np.abs(out_fft_mf2[corte:N//2]), lw=1, alpha=.7);
In [62]:
plt.plot(np.abs(out_fft_mf2[:60]), lw=1, alpha=.7);
In [ ]:
In [312]:
@timeit('process_power', verbose=True)
def process_power(df_homog_power,
kernel_size=15, roll_window_event=9, threshold_event=10,
margin_td=pd.Timedelta('120s'),
margin_ant=pd.Timedelta('3s')):
"""
Tomando una pd.DataFrame de 'POWER' homogéneo (con resample mean a 1s),
* aplica filtrado 'Wiener' a la señal raw,
* calcula ∆_wiener, deltas de power entre samples
* calcula ∆'_wiener con roll_window_event centrado, detectando máximos y mínimos locales
en los cambios de ∆_wiener, tomándolos como eventos de cambio.
* Agrupa los eventos de cambio en subconjuntos separados, al menos, un pd.Timedelta 'margin_td'.
(forma el evento desde inicio - pd.Timedelta 'margin_ant')
return:
df_power, list_index_events, df_feats_grupos
"""
def _is_event(x):
mid = len(x) // 2
xmin = np.min(x)
xmax = np.max(x)
if ((xmax - xmin) > threshold_event):
if (x[mid] == xmin):
return -1
elif (x[mid] == xmax):
return 1
return 0
df_power = df_homog_power.copy()
df_power['wiener'] = signal.wiener(df_power.power, kernel_size).astype('float32')
df_power['medfilt'] = signal.medfilt(df_power.power, kernel_size).astype('float32')
df_power['roll_std'] = df_power.wiener.rolling(11).std().astype('float32')
df_power['deltas'] = df_power.wiener.diff().fillna(0)
df_power['eventos'] = df_power['deltas'].diff().fillna(0).rolling(roll_window_event, center=True
).apply(_is_event).fillna(0).astype('int16')
list_index_events = []
last_event_init = None
events_in_interval = []
for t in df_power[df_power['eventos'] != 0].index:
if last_event_init is None:
events_in_interval.append(t)
elif t > last_event_init + margin_td:
list_index_events.append(events_in_interval)
events_in_interval = [t]
else:
events_in_interval.append(t)
last_event_init = t
print_info('Nº de grupos de eventos: {} (para un nº de eventos total de: {})'
.format(len(list_index_events), len(df_power[df_power['eventos'] != 0])))
sub_events = []
cols = ['power', 'medfilt', 'wiener', 'deltas', 'eventos']
cols_feat_grupos = ['index_ge', 't0', 'duracion', 'wiener_min', 'wiener_mean', 'wiener_max', 'wiener_std', 'wiener_median',
'eventos_sum', 'eventos_abs_sum', 'deltas_max', 'deltas_min', 'deltas_sum', 'deltas_abs_sum']
for i, ts in enumerate(list_index_events):
t0, tf = ts[0] - margin_ant, ts[-1] + margin_td
d_i = df_power.loc[t0:tf, cols]
resumen_i = (i, d_i.index[0], len(d_i),
d_i.wiener.min(), d_i.wiener.mean(), d_i.wiener.max(), d_i.wiener.std(), d_i.wiener.median(),
d_i.eventos.sum(), d_i.eventos.abs().sum(),
d_i.deltas.max(), d_i.deltas.min(), d_i.deltas.sum(), d_i.deltas.abs().sum())
sub_events.append(resumen_i)
df_feats_grupos = pd.DataFrame(sub_events, columns=cols_feat_grupos).set_index(cols_feat_grupos[0])
return df_power, list_index_events, df_feats_grupos
df_power, list_index_events, df_feats_grupos = process_power(power_standby)
df_feats_grupos.describe()
Out[312]:
In [510]:
# Análisis frecuencial de eventos / comparación de fft's
# Separación de eventos por duración. KMeans++ de k clusters
k = 9
km_dur_event = KMeans(n_clusters=k, n_jobs=-1).fit(df_feats_grupos_all_2.duracion.values.reshape(-1, 1))
print_info(sorted(km_dur_event.cluster_centers_))
amplitudes = []
grupo_durac = []
for i, ts in enumerate(lista_grupos_eventos_total_2):
t0, tf = ts[0] - margin_ant, ts[-1] + margin_td
d_i = POWER_FILT_2.loc[t0:tf, cols]
if len(d_i) > 5:
w = blackman(len(d_i))
amplitudes_i = np.sqrt(np.abs(scipy.fftpack.rfft(d_i.power * w)))[:(len(d_i) // 2)]
#amplitudes_i = np.sqrt(np.abs(np.fft.rfft(d_i.power)))[:(len(d_i) // 2)]
amplitudes.append(amplitudes_i)
grupo_durac.append(km_dur_event.labels_[i])
colors = tableau20[::2][:len(km_dur_event.cluster_centers_)]
plt.figure(figsize=(18, 12))
for a, g in zip(amplitudes, grupo_durac):
color = colors[g]
if len(a) > 200:
params = dict(lw=.75, alpha=.3, color=color)
else:
params = dict(lw=.1, alpha=.01, color=color)
plt.plot(a[2:], **params)
plt.xlim(0, 600);
In [511]:
f, axes = plt.subplots(1, 2, figsize=(20, 12))
ymax, ne = 0, 0
for a, g in zip(amplitudes, grupo_durac):
color = colors[g]
if len(a) > 300:
params = dict(lw=.75, alpha=.35, color=color)
axes[0].plot(a[2:], **params)
ne += 1
ymax = max(ymax, max(a[2:]))
axes[0].annotate('{} Eventos (de ∆T > {} s)'.format(ne, 300), (2000, ymax * .75), ha='left')
ymax, ne = 0, 0
for a, g in zip(amplitudes, grupo_durac):
color = colors[g]
if len(a) <= 300:
if len(a) <= 100:
params = dict(lw=.25, alpha=.03, color=color)
else:
params = dict(lw=.5, alpha=.1, color=color)
axes[1].plot(a[2:], **params)
ne += 1
ymax = max(ymax, max(a[2:]))
axes[1].annotate('{} Eventos (de ∆T ≤ {} s)'.format(ne, 300), (200, ymax * .75), ha='left')
axes[1].set_xlim(0, 300);