In [2]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
from IPython.display import Image, HTML
import random
import math
import pysolar
import datetime as dt
import pytz
import matplotlib.dates as mpd
import scipy
from scipy import signal
from scipy.signal import medfilt
import numexpr as ne
from datacharm import *
from datacharm.dataplots import tableau20, ch_color
from datacharm.datasun import sun_position
from enerpi.base import timeit
from enerpi.api import enerpi_data_catalog
from enerpi.enerplot import plot_tile_last_24h, plot_power_consumption_hourly
from enerpi.process import separa_ldr_artificial_natural, get_solar_day
from enerpi.radsolar import ajuste_ldr_con_barrido_irrad_inc, local_maxima_minima
LAT = 38.631463
LONG = -0.866402
ELEV_M = 500
TZ = pytz.timezone('Europe/Madrid')
FS = (16, 10)
def _tslim(ax, h0=12, hf=22, m0=0, mf=0):
xlim = [mpd.num2date(x, tz=TZ) for x in ax.get_xlim()]
new = [mpd.date2num(d.replace(hour=h, minute=m)) for d, h, m in zip(xlim, [h0, hf], [m0, mf])]
ax.set_xlim(new)
return ax
def set_sun_times(df, tz=TZ, lat_deg=LAT, long_deg=LONG, offset='5min'):
if df.index[-1].date() == df.index[0].date():
# Same day:
ts_day = pd.Timestamp(df.index[0].date(), tz=tz)
sunrise, sunset = pysolar.util.get_sunrise_sunset(lat_deg, long_deg, ts_day + pd.Timedelta('12h'))
delta = pd.Timedelta(offset)
sunrise, sunset = [sunrise - delta, sunset + delta]
df['sun'] = False
df.loc[sunrise:sunset, 'sun'] = True
return df
else:
print_err('IMPLEMENTAR!')
assert()
sns.set_style('whitegrid')
# Catálogo y lectura de todos los datos.
cat = enerpi_data_catalog()
data, data_s = cat.get_all_data()
print_info(data_s.tail())
LDR = pd.DataFrame(data.ldr).tz_localize(TZ)
print_cyan(LDR.describe().T.astype(int))
LDR.hist(bins=(LDR.max() - LDR.min() + 1).values[0] // 5, log=True, figsize=(18, 8))
Image('/Users/uge/Desktop/LDR_analysis/LDR_análisis_día_2016_09_12.png')
Out[2]:
In [3]:
path_st = '/Users/uge/Desktop/LDR_analysis/POIS_ident_1s.h5'
# data_homog, data_rs, pois = genera_pois_de_all_data_LDR(path_st)
pois = pd.read_hdf(path_st, 'POIS')
data_homog = pd.read_hdf(path_st, 'data_homog')
data_rs = pd.read_hdf(path_st, 'data_rs')
print(len(pois), pois.columns)
data = data_rs.join(data_homog.ldr.rename('raw_ldr')).drop('sun', axis=1)
data['mf_dmax'] = data['median_filter'].rolling(10).max() - data['median_filter']
data['mf_dmin'] = data['median_filter'] - data['median_filter'].rolling(10).min()
data['mf_dif'] = data['mf_dmax'] - data['mf_dmin']
In [386]:
data['mf_dif'].values[:100]
Out[386]:
In [7]:
data.iloc[:10000].plot(figsize=FS)
Out[7]:
In [9]:
data['mf_dif'].hist(figsize=FS, lw=0, bins=600, log=True)
Out[9]:
In [12]:
ax = data['mf_dif'].resample('5s').max().hist(figsize=FS, lw=0, bins=600, log=True, alpha=.5)
ax = data['mf_dif'].resample('5s').min().hist(ax=ax, lw=0, bins=600, log=True, alpha=.5)
In [13]:
ax = data['mf_dif'].rolling(5, center=True).max().hist(figsize=FS, lw=0, bins=600, log=True, alpha=.5)
In [17]:
ax = data['mf_dif'].rolling(5, center=True).sum().iloc[2000:10000].plot(figsize=FS)
data['median_filter'].iloc[2000:10000].plot(ax=ax)
data['median_filter'].rolling(5, center=True).max().iloc[2000:10000].plot(ax=ax)
Out[17]:
In [62]:
subdata = data.loc['2016-08-12': '2016-08-12'].copy()
#subdata['evento'] = 0
subdata.loc[subdata['mf_dif'] > 5, 'evento'] = 1
subdata.loc[subdata['mf_dif'] < -5, 'evento'] = -1
subdata['ch_evento'] = subdata.evento.fillna(0).cumsum().diff() #subdata.evento.fillna(method='ffill').diff().fillna(0)
print(subdata.evento.sum(), subdata.evento.abs().sum())
subdata.evento.fillna(0).cumsum().diff().plot()
Out[62]:
In [53]:
ch_evento = subdata.evento.diff().fillna(0)
ch_evento[ch_evento.abs() > 0]
Out[53]:
In [63]:
t0, tf = '13:30', '14:00'
ax = subdata.median_filter.between_time(t0, tf).plot(figsize=FS, lw=1.5, color='darkblue')
subdata.median_filter[subdata.ch_evento < 0].between_time(t0, tf).plot(lw=0, marker='o', markersize=5, alpha=.5, color='green', ax=ax)
subdata.median_filter[subdata.ch_evento > 0].between_time(t0, tf).plot(lw=0, marker='o', markersize=5, alpha=.5, color='red', ax=ax)
Out[63]:
In [55]:
ax = subdata.mf_dif.between_time(t0, tf).plot(figsize=FS, lw=1.5, color='darkblue')
subdata.mf_dif[subdata.ch_evento < 0].between_time(t0, tf).plot(lw=0, marker='o', markersize=5, alpha=.5, color='green', ax=ax)
subdata.mf_dif[subdata.ch_evento > 0].between_time(t0, tf).plot(lw=0, marker='o', markersize=5, alpha=.5, color='red', ax=ax)
Out[55]:
In [58]:
subdata.evento.fillna(0).cumsum().plot()
Out[58]:
In [297]:
from statsmodels.nonparametric.smoothers_lowess import lowess
lowess?
In [92]:
result = lowess(subdata.between_time(t0, tf).raw_ldr, subdata.between_time(t0, tf).index.values, frac=.005)
x_smooth = result[:,0]
y_smooth = result[:,1]
#tot_result = lowess(data.pressure, data.time.values, frac=0.1)
#x_tot_smooth = tot_result[:,0]
#y_tot_smooth = tot_result[:,1]
fig, ax = plt.subplots(figsize=FS)
ax.plot(subdata.between_time(t0, tf).index, subdata.between_time(t0, tf).raw_ldr, label="raw", lw=1)
ax.plot(subdata.between_time(t0, tf).index, subdata.between_time(t0, tf).median_filter, label="MF")
#ax.plot(x_tot_smooth, y_tot_smooth, label="lowess 1%", linewidth=3, color="g")
ax.plot(subdata.between_time(t0, tf).index, y_smooth, label="lowess", linewidth=3, color="r", alpha=.5)
plt.legend()
Out[92]:
In [ ]:
In [ ]:
In [111]:
import numpy
def smooth(x,window_len=11,window='hanning'):
"""smooth the data using a window with requested size.
This method is based on the convolution of a scaled window with the signal.
The signal is prepared by introducing reflected copies of the signal
(with the window size) in both ends so that transient parts are minimized
in the begining and end part of the output signal.
input:
x: the input signal
window_len: the dimension of the smoothing window; should be an odd integer
window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
flat window will produce a moving average smoothing.
output:
the smoothed signal
example:
t=linspace(-2,2,0.1)
x=sin(t)+randn(len(t))*0.1
y=smooth(x)
see also:
numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
scipy.signal.lfilter
TODO: the window parameter could be the window itself if an array instead of a string
NOTE: length(output) != length(input), to correct this: return y[(window_len/2-1):-(window_len/2)] instead of just y.
"""
if x.ndim != 1:
raise ValueError("smooth only accepts 1 dimension arrays.")
if x.size < window_len:
raise (ValueError, "Input vector needs to be bigger than window size.")
if window_len<3:
return x
if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
s=numpy.r_[x[window_len-1:0:-1],x,x[-1:-window_len:-1]]
#print(len(s))
if window == 'flat': #moving average
w=numpy.ones(window_len,'d')
else:
w=eval('numpy.'+window+'(window_len)')
y=numpy.convolve(w/w.sum(),s,mode='valid')
return y
from numpy import *
from pylab import *
def smooth_demo(data=None):
if data is None:
t=linspace(-4,4,100)
x=sin(t)
xn=x+randn(len(t))*0.1
y=smooth(x)
else:
x = data.values
xn = np.array(range(len(data.index)))
y=smooth(data)
ws=31
figure(figsize=FS)
subplot(211)
plot(ones(ws))
windows=['flat', 'hanning', 'hamming', 'bartlett', 'blackman']
hold(True)
for w in windows[1:]:
eval('plot('+w+'(ws) )')
axis([0,30,0,1.1])
legend(windows)
title("The smoothing windows")
subplot(212)
plot(x)
plot(xn)
for w in windows:
plot(smooth(xn,10,w))
l=['original signal', 'signal with noise']
l.extend(windows)
legend(l)
title("Smoothing a noisy signal")
show()
smooth_demo()
smooth_demo(subdata.raw_ldr)
smooth_demo(subdata.median_filter)
In [121]:
ss = []
for w in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
#subdata[w] = smooth(subdata.median_filter,window_len=11,window=w)
ss.append(pd.Series(smooth(subdata.raw_ldr,window_len=100,window=w), name=w))
#subdata.plot(figsize=FS)
pd.concat(ss, axis=1).iloc[5000:10000].plot(figsize=FS, lw=1, alpha=.8)
Out[121]:
In [323]:
data.columns
Out[323]:
In [3]:
str_day = '2016-09-05'
day = data.loc[str_day:str_day].copy()
day.median_filter.iloc[::5].plot(figsize=FS)
Out[3]:
In [147]:
def plot_detalle_filtros(data_day, t0, tf):
df = data_day.between_time(t0, tf)
ewm = day.ewm(halflife=2).raw_ldr.mean()
f, ax = plt.subplots(1, 1, figsize=FS)
df.raw_ldr.plot(ax=ax, lw=1.5, alpha=.8, color=tableau20[0])
df.median_filter.plot(ax=ax, lw=1.25, alpha=.8, color=tableau20[2])
ewm.between_time(t0, tf).plot(ax=ax, lw=3, alpha=.5, color=tableau20[6])
plt.show()
#t0, tf = '21:00', '22:30'
t0, tf = '11:00', '12:00'
plot_detalle_filtros(day, t0, tf)
t0, tf = '20:50', '22:30'
plot_detalle_filtros(day, t0, tf)
In [218]:
prueba = day.between_time('21:14', '21:25')
ax = prueba.raw_ldr.plot(figsize=FS, marker='o', alpha=.7)
prueba.median_filter.plot(ax=ax, marker='o', alpha=.6)
prueba.rms_err_m7.plot(ax=ax, marker='o', alpha=.6)
Out[218]:
In [ ]:
In [5]:
tic = time()
def plot_detalle_filtros(data_day, t0, tf):
df = data_day.between_time(t0, tf)
ewm = day.ewm(halflife=2).raw_ldr.mean()
f, ax = plt.subplots(1, 1, figsize=FS)
df.raw_ldr.plot(ax=ax, lw=1.5, alpha=.8, color=tableau20[0])
df.median_filter.plot(ax=ax, lw=1.25, alpha=.8, color=tableau20[2])
#ewm.between_time(t0, tf).plot(ax=ax, lw=3, alpha=.5, color=tableau20[6])
df.steps.plot(ax=ax, lw=1, alpha=.7, color=tableau20[8])
df.steps_max.plot(ax=ax, lw=1, alpha=.8, color=tableau20[10], marker='o')
plt.show()
def _info_tiempos(cad):
global tic
toc = time()
print_ok('--> {:35}\t OK\t [TOOK {:.3f} seconds]'.format(cad, toc - tic))
tic = toc
@timeit('extrae_eventos_LDR', verbose=True)
def extrae_eventos_LDR(data_raw, kernel_size=75, threshold_step=10, roll_number=7):
_info_tiempos('Iniciando desde...')
print_info("IDENTIFICACIÓN POI's LDR.\n * {} raw points. De {:%d-%m-%y} a {:%d-%m-%y} ({:.1f} horas)"
.format(len(data_raw), data_raw.index[0], data_raw.index[-1], len(data_raw) / 3600.))
delta_rs = pd.Timedelta('5s')
DELTA_MIN_PARA_CONSIDERACION_MAX_LOCAL = 20
# Resampling 1s
data_homog = pd.DataFrame(data_raw.ldr.resample('1s').mean().fillna(method='bfill', limit=5).fillna(-1))
_info_tiempos('Resampling 1s')
# Median filter
data_homog['median_filter'] = medfilt(data_homog.ldr, kernel_size=[kernel_size])
_info_tiempos('MedFilt')
# mf_resampled = set_sun_times(mf_resampled, delta_rs, tz=TZ, lat_deg=LAT, long_deg=LONG, offset='10min')
# Saltos en eventos
mf_roller = data_homog.median_filter.rolling(roll_number, center=True)
ind_mid = roll_number // 2
data_homog['steps'] = mf_roller.apply(lambda x: (x[roll_number-1] + x[roll_number-2]) / 2 - (x[1] + x[0]) / 2).fillna(0)
_info_tiempos('Steps calc')
# Busca saltos max-min
roll_steps = data_homog['steps'].rolling(roll_number, center=True)
idx_up = data_homog[roll_steps.apply(lambda x: (np.argmax(x) == ind_mid) and (x[ind_mid] > threshold_step)).fillna(0) > 0].index
idx_down = data_homog[roll_steps.apply(lambda x: (np.argmin(x) == ind_mid) and (x[ind_mid] < -threshold_step)).fillna(0) > 0].index
_info_tiempos('Steps UP / DOWN Filter')
#cols_eventos = ['steps', 'median_filter', 'rms_err_min301']
cols_eventos = ['steps', 'median_filter']
subidas = data_homog.loc[idx_up, cols_eventos].copy()
bajadas = data_homog.loc[idx_down, cols_eventos].copy()
_info_tiempos('Eventos')
mf_slope_roll = data_homog.median_filter.diff().rolling(roll_number, center=True).mean()
mf_roll = mf_roller.median()
offsets_mf = range(-11, 12, 3)
for d in offsets_mf:
subidas['slope_roll_{}'.format(d)] = mf_slope_roll.shift(d).loc[idx_up]
bajadas['slope_roll_{}'.format(d)] = mf_slope_roll.shift(d).loc[idx_down]
subidas['mf_roll_{}'.format(d)] = mf_roll.shift(d).loc[idx_up]
bajadas['mf_roll_{}'.format(d)] = mf_roll.shift(d).loc[idx_down]
assert(len(subidas.index.intersection(bajadas.index)) == 0)
eventos = pd.concat([subidas, bajadas], axis=0).sort_index()
eventos['es_subida'] = eventos['steps'] > 0
_info_tiempos('Eventos slope & process')
# Altitud / azimut solar
eventos = eventos.join(sun_position(eventos.index, latitude_deg=LAT, longitude_deg=LONG, elevation=ELEV_M,
delta_n_calc=1, south_orient=True).round(1))
_info_tiempos('Eventos SUN position')
# INTERVALOS
#day.loc[eventos.index, 'interv'] = 1
#day.interv = day.interv.fillna(0).cumsum()
#day.groupby('interv').median_filter.count()
print(eventos.shape)
return eventos, data_homog
#print_red(eventos.head(5))
str_day = '2016-09-01'
eventos, data = extrae_eventos_LDR(LDR.loc[str_day:str_day], threshold_step=10, roll_number=7)
#print_cyan(data.head())
eventos.head()
Out[5]:
In [6]:
eventos_tot, data_tot = extrae_eventos_LDR(LDR, threshold_step=10, roll_number=7)
eventos_tot.count()
Out[6]:
In [32]:
sns.pairplot(eventos_tot, hue='es_subida')
Out[32]: