In [2]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
#%load_ext memory_profiler
from IPython.display import Image, HTML

import datetime as dt
#from hdbscan import HDBSCAN
from itertools import cycle
import math
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mpd
from mpl_toolkits.mplot3d import Axes3D
import numexpr as ne
import numpy as np
import pandas as pd
import pysolar
import pytz
import random
import scipy
from scipy.signal import medfilt, wiener, firwin
import scipy.signal as signal
import seaborn as sns
import sklearn
import sklearn.metrics as metrics
from sklearn.cluster import KMeans, DBSCAN, AffinityPropagation, AgglomerativeClustering
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler

from enerpi.base import timeit
#from enerpi.api import enerpi_data_catalog
#from enerpi.process_ldr import pca_clustering, plot_clustering, plot_clustering_as_voronoi, plot_clustering_figure, tableau20
from enerpi.enerplot import tableau20
from enerpi.event_detection import load_data, process_data_for_event_detection, rect_smoothing, genera_df_intervalos, plot_steps_detection, get_subsets
from enerpi.sunposition import sun_position
from prettyprinting import *


TZ = pytz.timezone('Europe/Madrid')
FS = (16, 10)
sns.set_style('ticks')
pd.set_option('display.width', 120)
cm = cycle(tableau20[2::2])

train = pd.read_hdf('/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/notebooks/train.h5', 'power')
train_ev = process_data_for_event_detection(train)

# Subsets of train data:
df_1, df_2, df_3, df_4, df_5, df_6, df_7 = get_subsets(train_ev)
#df_1 = train_ev.loc['2016-09-10'].between_time('8:30', '10:30')
#df_2 = train_ev.loc['2016-09-08'].between_time('8:00', '14:00')
#t0, tf = '2016-09-10 19:30', '2016-09-10 23:40'
#df_3 = train_ev.loc[t0:tf]
#t0, tf = '2016-09-10 20:30', '2016-09-10 22:50'
#df_4 = train_ev.loc[t0:tf]

#df_5 = train_ev.loc['2016-09-15']
#df_6 = train_ev.loc['2016-09-16']
#df_7 = train_ev.loc['2016-09-17']

print_info(train_ev.head())
train_ev.tail()


process_data_for_event_detection TOOK: 0.360 s
                                power      wiener  delta_wiener abs_ch     r_std      r_mean
ts                                                                                          
2016-09-08 00:00:00+02:00  361.305145  342.040003      0.000000  False  3.107158  351.192982
2016-09-08 00:00:01+02:00  369.346436  351.099659      9.059656  False  3.975461  352.145975
2016-09-08 00:00:02+02:00  366.049744  349.689666     -1.409993  False  4.284531  353.284948
2016-09-08 00:00:03+02:00  365.854706  350.670636      0.980969  False  4.483639  354.383428
2016-09-08 00:00:04+02:00  365.262390  351.124023      0.453387  False  4.566204  355.560576
Out[2]:
power wiener delta_wiener abs_ch r_std r_mean
ts
2016-09-21 23:59:55+02:00 303.374023 287.207422 -7.863055 False 7.336845 283.642861
2016-09-21 23:59:56+02:00 293.039185 277.787957 -9.419465 False 7.336845 283.642861
2016-09-21 23:59:57+02:00 303.040344 284.064992 6.277035 False 7.336845 283.642861
2016-09-21 23:59:58+02:00 301.786438 281.173786 -2.891206 False 7.336845 283.642861
2016-09-21 23:59:59+02:00 294.609619 272.548244 -8.625542 False 7.336845 283.642861

In [2]:
EXCLUDED_COLS = ['power', 'delta_wiener', 'abs_ch', 'r_std', 'r_mean']

# From http://stackoverflow.com/questions/5515720/python-smooth-time-series-data
# Using http://www.scipy.org/Cookbook/SignalSmooth:
FILTERS = ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']
D_FILTERS = {'power': dict(lw=.5, alpha=.9, color=tableau20[1]),
             'wiener': dict(lw=.75, alpha=.7, color=tableau20[0]),
             'medfilt': dict(lw=.75, alpha=.5, color=tableau20[2]),
             'flat': dict(lw=.75, alpha=.5, color=tableau20[6]),
             'hanning': dict(lw=.75, alpha=.8, color=tableau20[4]),
             #'hamming': dict(lw=.75, alpha=.8, color=tableau20[6]),
             #'bartlett': dict(lw=.75, alpha=.8, color=tableau20[8]),
             'blackman': dict(lw=.75, alpha=.8, color=tableau20[10]), 
             'savgol': dict(lw=1, alpha=.8, color=tableau20[12])}


def _smooth(x, window_len=11, window='hanning'):
        if x.ndim != 1:
                raise ValueError("smooth only accepts 1 dimension arrays.")
        if x.size < window_len:
                raise ValueError("Input vector needs to be bigger than window size.")
        if window_len < 3:
                return x
        if not window in FILTERS:
                raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
        s= np.r_[2*x[0]-x[window_len-1::-1],x,2*x[-1]-x[-1:-window_len:-1]]
        if window == 'flat': #moving average
            w=np.ones(window_len,'d')
        else:  
            w=eval('np.'+window+'(window_len)')
        y=np.convolve(w/w.sum(),s,mode='same')
        return y[window_len:-window_len+1]

    
@timeit('process_filters', verbose=True)
def process_filters(df, window_len=11, filters=FILTERS):
    df_out = df.copy()
    for w in filters:
        df_out[w] = _smooth(df.power.values, window_len, w)
    return df_out
    

def plot_filters(df, filters=D_FILTERS.keys()):
    ax = df.power.plot(figsize=FS, legend='RAW', **D_FILTERS['power'])
    for k in filters:
        if (k in df) and (k in filters) and k != 'power':
            df[k].plot(ax=ax, legend=k, **D_FILTERS[k])
    ax.xaxis.set_major_formatter(mpd.DateFormatter('%H:%M:%S', tz=TZ))
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, ha='center')
    ax.xaxis.set_tick_params(labelsize=9, pad=3)
    return ax


window_len = 7
df_1f = process_filters(df_1, window_len=window_len, filters=FILTERS)
df_2f = process_filters(df_2, window_len=window_len, filters=FILTERS)
df_3f = process_filters(df_3, window_len=window_len, filters=FILTERS)


plot_filters(df_1f.between_time('8:55', '8:57'));


process_filters TOOK: 0.005 s
process_filters TOOK: 0.006 s
process_filters TOOK: 0.006 s

In [4]:
df = df_1f.between_time('8:55', '8:57').copy()
df['flat_power'] = _smooth(df.power.values, 7, 'flat')
df['flat_wiener'] = _smooth(df.wiener.values, 7, 'flat')

#, **D_FILTERS['wiener']
ax = df.power.plot(figsize=FS, **D_FILTERS['power'], legend='RAW')
df.wiener.plot(ax=ax, **D_FILTERS['wiener'], legend='wiener')
df.flat_power.plot(ax=ax, **D_FILTERS['flat'], legend='flat power')
df.flat_wiener.plot(ax=ax, color='red', lw=1, legend='flat wiener');



In [105]:
def _roll(x):
    n = x.shape[0]
    mid = n // 2 + 1
    return np.mean(x[mid+1:]) - np.mean(x[:mid])
    

ax = df.wiener.rolling(7, center=True).apply(_roll).plot()
df.wiener.rolling(7, center=True).apply(_roll).diff().plot(ax=ax)
df.wiener.rolling(7, center=True).apply(_roll).diff().diff().plot(ax=ax)
df.wiener.plot(ax=ax)


Out[105]:
<matplotlib.axes._subplots.AxesSubplot at 0x125817cc0>

In [ ]:


In [ ]:


In [3]:
df = df_5 #.between_time('9:00', '13:00')
df_step = rect_smoothing(df)
df_interv = genera_df_intervalos(df, df_step)
print_ok(df_interv.tail())


rect_smoothing TOOK: 2.476 s
              n_ch     n  median_all  std_all  step_median_0   delta                        ts  \
interv_shift                                                                                     
0                5   547       337.0      4.2            337     0.0 2016-09-14 22:00:00+00:00   
1               16  1124       243.0      8.9            243   -94.0 2016-09-14 22:09:07+00:00   
2                9  3725       220.0     10.8            219   -24.0 2016-09-14 22:27:51+00:00   
3               10   101       249.0      6.8            248    29.0 2016-09-14 23:29:56+00:00   
4                9  1471       217.0      2.7            217   -31.0 2016-09-14 23:31:37+00:00   
5                6  2119       233.0     15.8            232    15.0 2016-09-14 23:56:08+00:00   
6               19    77       256.0      8.8            256    24.0 2016-09-15 00:31:27+00:00   
7                1  3585       213.0      5.5            213   -43.0 2016-09-15 00:32:44+00:00   
8                1    78       233.0      3.9            233    20.0 2016-09-15 01:32:29+00:00   
9                2  1859       211.0      2.2            210   -23.0 2016-09-15 01:33:47+00:00   
10              22  7745       212.0     12.6            211     1.0 2016-09-15 02:04:46+00:00   
11               8     8       224.0     64.3            223    12.0 2016-09-15 04:13:51+00:00   
12               7  1323       231.0     19.1            231     8.0 2016-09-15 04:13:59+00:00   
13               9    72       259.0      7.4            258    27.0 2016-09-15 04:36:02+00:00   
14               2  3042       213.0      7.6            213   -45.0 2016-09-15 04:37:14+00:00   
15               8     8       233.0    121.9            232    19.0 2016-09-15 05:27:56+00:00   
16              11   101      1098.0     15.1           1098   866.0 2016-09-15 05:28:04+00:00   
17              29   236      1177.0     13.6           1177    79.0 2016-09-15 05:29:45+00:00   
18              14    57      1102.0     12.5           1101   -76.0 2016-09-15 05:33:41+00:00   
19              27   182      1176.0     16.3           1175    74.0 2016-09-15 05:34:38+00:00   
20              53   140      2039.0     79.7           2038   863.0 2016-09-15 05:37:40+00:00   
21               5   427       223.0     81.0            223 -1815.0 2016-09-15 05:40:00+00:00   
22               5    87       184.0      5.7            183   -40.0 2016-09-15 05:47:07+00:00   
23              16  1928       224.0      4.1            224    41.0 2016-09-15 05:48:34+00:00   
24               9  1040       250.0     24.1            249    25.0 2016-09-15 06:20:42+00:00   
25               8     8       263.0     20.5            260    11.0 2016-09-15 06:38:02+00:00   
26              27    77      1182.0     60.0           1182   922.0 2016-09-15 06:38:10+00:00   
27              11    25       240.0     52.6            240  -942.0 2016-09-15 06:39:27+00:00   
28              22    24      1658.0    396.4           1657  1417.0 2016-09-15 06:39:52+00:00   
29               9    11      1211.0      9.2           1211  -446.0 2016-09-15 06:40:16+00:00   
...            ...   ...         ...      ...            ...     ...                       ...   
226             15  1399       285.0     19.1            284    23.0 2016-09-15 17:38:54+00:00   
227              1    32       316.0      5.3            316    32.0 2016-09-15 18:02:13+00:00   
228              7   827       281.0      7.9            280   -36.0 2016-09-15 18:02:45+00:00   
229             19    74       314.0      9.0            314    34.0 2016-09-15 18:16:32+00:00   
230              3   467       279.0      6.3            279   -35.0 2016-09-15 18:17:46+00:00   
231              6  1406       257.0      5.5            256   -23.0 2016-09-15 18:25:33+00:00   
232             29  1378       333.0      8.6            332    76.0 2016-09-15 18:48:59+00:00   
233              6    73       408.0      8.9            407    75.0 2016-09-15 19:11:57+00:00   
234             16   263       321.0     12.0            320   -87.0 2016-09-15 19:13:10+00:00   
235             17   138       366.0      8.4            366    46.0 2016-09-15 19:17:33+00:00   
236             17   642       322.0      5.5            321   -45.0 2016-09-15 19:19:51+00:00   
237             38  2728       367.0     15.8            367    46.0 2016-09-15 19:30:33+00:00   
238             37   230       425.0     14.2            424    57.0 2016-09-15 20:16:01+00:00   
239              7   125       463.0     10.8            462    38.0 2016-09-15 20:19:51+00:00   
240              6    86       399.0      6.6            398   -64.0 2016-09-15 20:21:56+00:00   
241              8     8       404.0     82.5            403     5.0 2016-09-15 20:23:22+00:00   
242             46   189      1646.0     41.1           1646  1243.0 2016-09-15 20:23:30+00:00   
243              8     8      1603.0    183.0           1606   -40.0 2016-09-15 20:26:39+00:00   
244              8    52      1050.0      4.1           1050  -556.0 2016-09-15 20:26:47+00:00   
245             19    94      1102.0     24.3           1102    52.0 2016-09-15 20:27:39+00:00   
246             19    49      1724.0     29.4           1725   623.0 2016-09-15 20:29:13+00:00   
247              7    76       401.0    114.0            401 -1324.0 2016-09-15 20:30:02+00:00   
248              8     8       402.0    181.0            400    -1.0 2016-09-15 20:31:18+00:00   
249             16    96      1165.0     33.3           1164   764.0 2016-09-15 20:31:26+00:00   
250             10  1484       365.0     23.6            365  -799.0 2016-09-15 20:33:02+00:00   
251             15  1148       397.0     17.6            396    31.0 2016-09-15 20:57:46+00:00   
252             12   264       344.0      7.2            343   -53.0 2016-09-15 21:16:54+00:00   
253             11    96       378.0      7.1            378    35.0 2016-09-15 21:21:18+00:00   
254             10  1672       341.0      4.9            341   -37.0 2016-09-15 21:22:54+00:00   
255              9   554       283.0      5.0            282   -59.0 2016-09-15 21:50:46+00:00   

                                ts_fin       std_0       std_c       std_f  
interv_shift                                                                
0            2016-09-14 22:09:06+00:00    0.706287    4.176203    4.165934  
1            2016-09-14 22:27:50+00:00    7.097810    7.897554    7.368539  
2            2016-09-14 23:29:55+00:00    4.276450   10.814481   10.791729  
3            2016-09-14 23:31:36+00:00    3.146998    5.383556    4.514254  
4            2016-09-14 23:56:07+00:00    2.592059    2.426960    2.381286  
5            2016-09-15 00:31:26+00:00  203.365335    4.393329    4.390782  
6            2016-09-15 00:32:43+00:00    3.307848    7.303943    6.715664  
7            2016-09-15 01:32:28+00:00    4.205049    5.441310    5.426110  
8            2016-09-15 01:33:46+00:00    2.805348    3.091609    2.833132  
9            2016-09-15 02:04:45+00:00    1.862908    2.026714    1.952001  
10           2016-09-15 04:13:50+00:00    2.660773   12.565506   12.565432  
11           2016-09-15 04:13:58+00:00    2.151976    0.051455   85.524018  
12           2016-09-15 04:36:01+00:00  229.528890    4.105225    4.053365  
13           2016-09-15 04:37:13+00:00    3.703907    5.726572    5.485067  
14           2016-09-15 05:27:55+00:00    2.691346    7.534776    7.517303  
15           2016-09-15 05:28:03+00:00    3.064236    1.735267  163.888090  
16           2016-09-15 05:29:44+00:00   56.055657    5.583217    5.700666  
17           2016-09-15 05:33:40+00:00    8.671286   11.775098   11.221835  
18           2016-09-15 05:34:37+00:00    4.793477    9.164601    6.647455  
19           2016-09-15 05:37:39+00:00    7.007700   14.367728   13.830930  
20           2016-09-15 05:39:59+00:00  342.457613   28.694651   27.199787  
21           2016-09-15 05:47:06+00:00  596.048238    5.098835    5.164384  
22           2016-09-15 05:48:33+00:00    3.834906    4.584619    4.122649  
23           2016-09-15 06:20:41+00:00    3.161122    3.832323    3.726838  
24           2016-09-15 06:38:01+00:00  212.600497    7.301379    7.298883  
25           2016-09-15 06:38:09+00:00   20.801913    5.557484   13.326559  
26           2016-09-15 06:39:26+00:00  181.758068   15.339108   31.458821  
27           2016-09-15 06:39:51+00:00  101.421159    6.677638    6.183641  
28           2016-09-15 06:40:15+00:00  348.250450  132.202269  178.426483  
29           2016-09-15 06:40:26+00:00    9.069944    4.929677    6.117995  
...                                ...         ...         ...         ...  
226          2016-09-15 18:02:12+00:00  206.657643   15.886246    6.795850  
227          2016-09-15 18:02:44+00:00    3.497331    3.355938    2.451590  
228          2016-09-15 18:16:31+00:00    2.310634    7.774642    7.713532  
229          2016-09-15 18:17:45+00:00    2.020525    7.751179    8.001621  
230          2016-09-15 18:25:32+00:00    3.496356    6.242171    6.242610  
231          2016-09-15 18:48:58+00:00    3.017379    5.519637    5.513974  
232          2016-09-15 19:11:56+00:00    7.618892    7.823677    7.751584  
233          2016-09-15 19:13:09+00:00    2.303126    7.813281    6.913084  
234          2016-09-15 19:17:32+00:00    6.751613    9.298458    7.597869  
235          2016-09-15 19:19:50+00:00    3.306966    6.666588    5.605100  
236          2016-09-15 19:30:32+00:00    4.969435    4.972173    4.901924  
237          2016-09-15 20:16:00+00:00  178.085261    9.232451    9.156970  
238          2016-09-15 20:19:50+00:00    6.687800   12.286746   11.691280  
239          2016-09-15 20:21:55+00:00    3.993377    9.826719   10.907886  
240          2016-09-15 20:23:21+00:00    4.343246    4.946308    4.465974  
241          2016-09-15 20:23:29+00:00    4.997341    3.357331  111.725667  
242          2016-09-15 20:26:38+00:00  172.074877   17.176910   16.405419  
243          2016-09-15 20:26:46+00:00    6.371531    3.938182  172.972812  
244          2016-09-15 20:27:38+00:00    9.088643    3.404285    3.064778  
245          2016-09-15 20:29:12+00:00    4.450022   15.455476   22.427578  
246          2016-09-15 20:30:01+00:00   61.151394   12.945055   12.880082  
247          2016-09-15 20:31:17+00:00  357.610677    9.470705    8.487663  
248          2016-09-15 20:31:25+00:00    6.340577    4.603621  239.586001  
249          2016-09-15 20:33:01+00:00  106.686519   10.080776   11.859893  
250          2016-09-15 20:57:45+00:00  260.637248   11.255937   11.173345  
251          2016-09-15 21:16:53+00:00    2.859008   17.515644   17.447744  
252          2016-09-15 21:21:17+00:00    3.655302    5.734583    4.779857  
253          2016-09-15 21:22:53+00:00    4.177089    4.962161    3.691324  
254          2016-09-15 21:50:45+00:00    3.355338    4.780551    4.738950  
255          2016-09-15 21:59:59+00:00    4.976116    4.074794    3.662475  

[256 rows x 11 columns]
              n_ch     n  median_all  std_all  step_median_0  delta                        ts  \
interv_shift                                                                                    
251             15  1148       397.0     17.6            396   31.0 2016-09-15 20:57:46+00:00   
252             12   264       344.0      7.2            343  -53.0 2016-09-15 21:16:54+00:00   
253             11    96       378.0      7.1            378   35.0 2016-09-15 21:21:18+00:00   
254             10  1672       341.0      4.9            341  -37.0 2016-09-15 21:22:54+00:00   
255              9   554       283.0      5.0            282  -59.0 2016-09-15 21:50:46+00:00   

                                ts_fin     std_0      std_c      std_f  
interv_shift                                                            
251          2016-09-15 21:16:53+00:00  2.859008  17.515644  17.447744  
252          2016-09-15 21:21:17+00:00  3.655302   5.734583   4.779857  
253          2016-09-15 21:22:53+00:00  4.177089   4.962161   3.691324  
254          2016-09-15 21:50:45+00:00  3.355338   4.780551   4.738950  
255          2016-09-15 21:59:59+00:00  4.976116   4.074794   3.662475  

In [4]:
plot_steps_detection(df, df_step);



In [37]:
df_interv['ts_fin'] = df_interv.ts.shift(-1)
df_interv['next_delta'] = df_interv['delta'].shift(-1)
df_interv['permanent_delta'] = df_interv['delta'] + df_interv['next_delta']


df_interv.tail()


Out[37]:
n_ch abs_ch n median_all std_all delta step_v ts ch wiener std_c ts_fin next_delta permanent_delta
interv_shift
251 15 15 1148 397.0 17.6 31 396 2016-09-15 22:57:45 False 17.478665 -1.0 2016-09-15 23:16:53 -53.0 -22.0
252 12 12 264 344.0 7.8 -53 343 2016-09-15 23:16:53 True 5.243193 3.0 2016-09-15 23:21:17 35.0 -18.0
253 11 11 96 378.0 7.9 35 378 2016-09-15 23:21:17 False 4.252339 -1.0 2016-09-15 23:22:53 -37.0 -2.0
254 10 10 1672 341.0 5.0 -37 341 2016-09-15 23:22:53 False 4.754309 -1.0 2016-09-15 23:50:45 -59.0 -96.0
255 10 10 555 283.0 5.3 -59 282 2016-09-15 23:50:45 True 3.850973 3.2 NaT NaN NaN

In [5]:
def __plot_steps_detection(df_data, df_step, df_interv=None):
    _, ax = plt.subplots(figsize=FS)
    ax.xaxis.set_major_formatter(mpd.DateFormatter('%H:%M:%S', tz=TZ))
    if df_interv is None:
        df_data.wiener.plot(ax=ax, lw=.5, alpha=.3, color=tableau20[0])
        # (df_step.is_init.abs() * 1000).plot(ax=ax, lw=.75, alpha=.3, color=tableau20[6])
        ax.vlines(df_step[df_step.is_init].index, 0, 500, lw=.75, alpha=.3, color=tableau20[6])
        df_step.step_median.plot(ax=ax, lw=1, alpha=.9, color=tableau20[4])
        # (df_step.ch.abs() * 500).plot(ax=ax, lw=.75, alpha=.3, color=tableau20[6])
        # df_debug.mean_planos.plot(ax=ax, lw=.75, alpha=.7, color=tableau20[8])
        # df_debug.error_cum.plot(ax=ax, lw=.5, alpha=.9, color=tableau20[2])
    else:
        offset = pd.Timedelta('3s')
        df_i = df_data.loc[df_interv.ts.iloc[0]-offset:df_interv.ts_fin.iloc[-1]+offset]
        df_i_s = df_step.loc[df_interv.ts.iloc[0]-3*offset:df_interv.ts_fin.iloc[-1]+3*offset]
        if not df_i.empty:
            df_i.wiener.plot(ax=ax, lw=.5, alpha=.3, color=tableau20[0])
            df_i_s.step_median.plot(ax=ax, lw=1, alpha=.9, color=tableau20[4])
        else:
            print_err(df_interv)
            
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, ha='center')
    ax.xaxis.set_tick_params(labelsize=9, pad=3)
    return ax

In [ ]:


In [ ]:


In [ ]:


In [79]:
dp = df_interv.drop(['ts', 'ts_fin', 'ch', 'std_c', 'abs_ch', 'wiener'], axis=1).dropna(how='any', axis=0)
# [df_interv.n < 300]
sns.pairplot(dp) #, hue='permanent_delta')


Out[79]:
<seaborn.axisgrid.PairGrid at 0x16b714668>

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [8]:
@timeit('fusion_big_events', verbose=True)
def _fusion_big_events(df_interv):
    filtrados = df_interv[(df_interv.delta > 500) | (df_interv.median_all > 1000) | (df_interv.std_all > 100)]

    grupos = []
    g_i = []
    ant_vip = False
    level = 0
    levels = []
    d_acum = df_interv.median_all[0]
    for i, row in df_interv.iterrows():
        if ((d_acum > 500) or 
            (row.delta > 500) or 
            ((row.std_all > 100) and (d_acum > 50)) or 
            (row.median_all - level > 200)):
            if not ant_vip and (len(g_i) == 0):
                level = row.median_all - row.delta
                d_acum = row.delta
                #print_info('NEW {} con median_all={:.0f}, delta={:.0f} --> level = {:.0f}'
                #           .format(i, row.median_all, row.delta, level))
                if len(g_i) > 0:
                    #print_info('se cierra anterior: {}'.format(g_i))
                    grupos.append(g_i)
                g_i = [i]
            else:
                d_acum += row.delta
                #print(i, abs(d_acum), row.delta, row.median_all)
                if abs(d_acum) < 100:
                    if len(g_i) > 0:
                        #print_cyan('se añaden {}'.format(g_i))
                        grupos.append(g_i)
                    g_i = []
                    level = row.median_all # - row.delta
                    d_acum = level
                else:
                    g_i.append(i)
            ant_vip = True
        else:
            ant_vip = False
            if len(g_i) > 0:
                #g_i.append(i)
                grupos.append(g_i)
                #print_red('se cierra {}, level_ant={:.0f}, ∆={:.0f}, new_level={:.0f}'
                #          .format(g_i, level, row.delta, row.median_all))
            #g_i = [i]
            g_i = []
            level = row.median_all
            d_acum = level
        #print_magenta('{} -> {:.0f}, {}'.format(i, level, g_i))
        levels.append(level)
    df_interv['level'] = levels
        #if i - 1 == i_ant:
        #    g_i.append(i)
        #    i_ant = i
        #else:
        #    grupos.append(g_i)
        #    g_i = [i]
        #    i_ant = i

    print_red(grupos[:5])
    return df_interv, grupos


@timeit('integrate_step_detection', verbose=True)
def _integrate_step_detection(df, df_step, df_interv):
    """
    Concatena la información de la detección de escalones (eventos) y numera los intervalos.
    """
    df_out = df[['power', 'wiener']].join(df_step[['step_median', 'step_mean', 'is_init']])
    df_out['init_event'] = df_out['is_init']
    df_out['big_event'] = False
    # df_out.is_init.cumsum()

    # Big events:
    df_interv, grupos = _fusion_big_events(df_interv)
    
    df_interv['ts_fin'] = df_interv['ts'].shift(-1)
    df_interv['big_event'] = -1
    for i, idx in enumerate(grupos):
        df_interv.loc[idx, 'big_event'] = i + 1

    offset = pd.Timedelta('3s')
    gb_big = df_interv[df_interv.big_event > 0].groupby('big_event')
    big_events = pd.concat([gb_big.ts.first() - offset,
                            gb_big.ts_fin.last() + offset,
                            gb_big.n.sum(),
                            gb_big['level'].first(),
                            gb_big.median_all.max(),
                            gb_big.std_all.median()], axis=1).reset_index()
    
    counter_big_ev = 0
    offset_min = pd.Timedelta('1s')
    for ts, ts_fin in zip(big_events.ts, big_events.ts_fin):
        df_out.loc[ts+offset_min:ts_fin, 'init_event'] = False
        df_out.loc[ts:ts_fin, 'big_event'] = True

    df_out['intervalo'] = df_out['init_event'].cumsum()
    return df_out


df_out = _integrate_step_detection(df, df_step, df_interv)
df_out.head()
#df_out.tz_localize(None).reset_index().groupby('intervalo').big_event.sum()


[[0], [31, 32, 33, 34, 35, 36, 37, 38, 39], [41, 42], [61, 62], [67]]
fusion_big_events TOOK: 0.024 s
integrate_step_detection TOOK: 0.060 s
Out[8]:
power wiener step_median step_mean is_init init_event big_event intervalo
ts
2016-09-17 00:00:00+02:00 222.060257 231.626156 218 220 False False False 0
2016-09-17 00:00:01+02:00 233.656845 231.442630 218 220 False False False 0
2016-09-17 00:00:02+02:00 236.062698 231.891288 218 220 False False False 0
2016-09-17 00:00:03+02:00 229.603241 232.717262 218 220 False False False 0
2016-09-17 00:00:04+02:00 239.614746 233.419937 218 220 False False False 0

In [6]:
[mpd.num2date(x) for x in [736221.916667, 736222.915972]]


Out[6]:
[datetime.datetime(2016, 9, 14, 22, 0, 0, 28803, tzinfo=<matplotlib.dates._UTC object at 0x1081d40b8>),
 datetime.datetime(2016, 9, 15, 21, 58, 59, 980798, tzinfo=<matplotlib.dates._UTC object at 0x1081d40b8>)]

In [6]:
def _plot_intervals(df_out):
    dp = df_out.reset_index().set_index('intervalo')
    f, ax = plt.subplots(1, 1, figsize=(16, 8))
    for i in set(dp.index):
        df_i = dp.loc[i].set_index('ts')
        if not df_i.empty:
            is_big = dp.loc[i, 'big_event'].sum() > 0
            color = next(cm) if is_big else 'k'
            alpha = .8 if is_big else .5
            legend = 'Ev_{}'.format(i) if is_big else ''
            ax.vlines(df_i[df_i.is_init].index, 0, df_i[df_i.is_init].step_median * 2, lw=.25, alpha=.5, color='grey')
            df_i.wiener.plot(ax=ax, lw=.75, alpha=alpha, color=color, label=legend)
    return ax


_plot_intervals(df_out) # .between_time('13:00', '15:59')


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x1151a8198>

In [7]:
lista_dfs = [df_1, df_2, df_3, df_4, df_5, df_6, df_7]
lista_dfs_out = []
for df in lista_dfs:
    df_step = rect_smoothing(df)
    df_interv = genera_df_intervalos(df, df_step)
    df_out = _integrate_step_detection(df, df_step, df_interv)
    lista_dfs_out.append((df_out, df_step, df_interv))


['n', 'n_planos', 'mean_t', 'std_t', 'mean_planos', 'std_planos', 'mean_final', 'std_final', 'mean_u', 'std_u', 'pnorm', 'pnorm_next', 'error_cum']
incr, hay_incr_considerar, hay_ch_abs, next_dif_mean_supera_std, es_outlier, es_outlier_safe, next_es_outlier, next_es_outlier_safe, hay_cambio_futuro
rect_smoothing TOOK: 0.991 s
[[0], [2, 3, 4, 5], [8], [10, 11, 12, 13, 14, 15, 16]]
fusion_big_events TOOK: 0.060 s
integrate_step_detection TOOK: 0.077 s
['n', 'n_planos', 'mean_t', 'std_t', 'mean_planos', 'std_planos', 'mean_final', 'std_final', 'mean_u', 'std_u', 'pnorm', 'pnorm_next', 'error_cum']
incr, hay_incr_considerar, hay_ch_abs, next_dif_mean_supera_std, es_outlier, es_outlier_safe, next_es_outlier, next_es_outlier_safe, hay_cambio_futuro
rect_smoothing TOOK: 1.964 s
[[0]]
fusion_big_events TOOK: 0.005 s
integrate_step_detection TOOK: 0.014 s
['n', 'n_planos', 'mean_t', 'std_t', 'mean_planos', 'std_planos', 'mean_final', 'std_final', 'mean_u', 'std_u', 'pnorm', 'pnorm_next', 'error_cum']
incr, hay_incr_considerar, hay_ch_abs, next_dif_mean_supera_std, es_outlier, es_outlier_safe, next_es_outlier, next_es_outlier_safe, hay_cambio_futuro
rect_smoothing TOOK: 2.589 s
[[0], [3], [5, 6], [8, 9], [11, 12, 13, 14, 15]]
fusion_big_events TOOK: 0.019 s
integrate_step_detection TOOK: 0.040 s
['n', 'n_planos', 'mean_t', 'std_t', 'mean_planos', 'std_planos', 'mean_final', 'std_final', 'mean_u', 'std_u', 'pnorm', 'pnorm_next', 'error_cum']
incr, hay_incr_considerar, hay_ch_abs, next_dif_mean_supera_std, es_outlier, es_outlier_safe, next_es_outlier, next_es_outlier_safe, hay_cambio_futuro
rect_smoothing TOOK: 0.491 s
[[0], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64]]
fusion_big_events TOOK: 0.012 s
integrate_step_detection TOOK: 0.027 s
['n', 'n_planos', 'mean_t', 'std_t', 'mean_planos', 'std_planos', 'mean_final', 'std_final', 'mean_u', 'std_u', 'pnorm', 'pnorm_next', 'error_cum']
incr, hay_incr_considerar, hay_ch_abs, next_dif_mean_supera_std, es_outlier, es_outlier_safe, next_es_outlier, next_es_outlier_safe, hay_cambio_futuro
rect_smoothing TOOK: 5.511 s
[[0], [16, 17, 18, 19, 20], [26], [28, 29, 30, 31, 32], [54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69]]
fusion_big_events TOOK: 0.030 s
integrate_step_detection TOOK: 0.075 s
['n', 'n_planos', 'mean_t', 'std_t', 'mean_planos', 'std_planos', 'mean_final', 'std_final', 'mean_u', 'std_u', 'pnorm', 'pnorm_next', 'error_cum']
incr, hay_incr_considerar, hay_ch_abs, next_dif_mean_supera_std, es_outlier, es_outlier_safe, next_es_outlier, next_es_outlier_safe, hay_cambio_futuro
rect_smoothing TOOK: 10.063 s
[[0], [29, 30, 31, 32, 33, 34, 35, 36, 37], [39, 40, 41, 42, 43, 44], [46], [57]]
fusion_big_events TOOK: 0.026 s
integrate_step_detection TOOK: 0.060 s
['n', 'n_planos', 'mean_t', 'std_t', 'mean_planos', 'std_planos', 'mean_final', 'std_final', 'mean_u', 'std_u', 'pnorm', 'pnorm_next', 'error_cum']
incr, hay_incr_considerar, hay_ch_abs, next_dif_mean_supera_std, es_outlier, es_outlier_safe, next_es_outlier, next_es_outlier_safe, hay_cambio_futuro
rect_smoothing TOOK: 8.568 s
[[0], [31, 32, 33, 34, 35, 36, 37, 38, 39], [41, 42], [61, 62], [67]]
fusion_big_events TOOK: 0.022 s
integrate_step_detection TOOK: 0.054 s

In [9]:
for (df_out, df_step, df_interv) in lista_dfs_out:
    _plot_intervals(df_out)
    plt.title("{:%d-%B'%y}".format(df_out.index[0]))
    plt.show()



In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [142]:
dp = df[df.delta_wiener.rolling(5).sum().shift(-7).abs() > 10].reset_index()

ax = plot_steps_detection(df, df_step)
ax.scatter(dp.ts.values, dp.wiener)
plt.ylim(200, 400)
#plt.xlim(dp.ts[0], dp.ts[-1])
#.plot(kind='scatter', x='ts', y='wiener')


Out[142]:
(200, 400)

In [160]:
def _sum_pos(x):
    return np.sum(x[x > 0]) - np.sum(x[x < 0])

def _sum_neg(x):
    return -np.sum(x[x < 0])



ax = df_5.between_time('9:35', '9:45').wiener.plot()
(df_5.between_time('9:35', '9:45').delta_wiener.rolling(9).sum().shift(-7).abs()*20).plot(ax=ax, lw=.5)
#((df_5.between_time('9:35', '9:45').delta_wiener.rolling(9).apply(_sum_pos).shift(-7).abs() - df_5.between_time('9:35', '9:45').delta_wiener.rolling(9).sum().shift(-7).abs() )*20).plot(ax=ax, lw=.5)
#(df_5.between_time('9:35', '9:45').delta_wiener.rolling(5).apply(_sum_neg).shift(-7).abs()*20).plot(ax=ax, lw=.5)


Out[160]:
<matplotlib.axes._subplots.AxesSubplot at 0x1418bf9e8>

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [98]:
# Kalman filter example demo in Python

# A Python implementation of the example given in pages 11-15 of "An
# Introduction to the Kalman Filter" by Greg Welch and Gary Bishop,
# University of North Carolina at Chapel Hill, Department of Computer
# Science, TR 95-041,
# http://www.cs.unc.edu/~welch/kalman/kalmanIntro.html

# by Andrew D. Straw

import numpy
import pylab

# intial parameters
n_iter = 50
sz = (n_iter,) # size of array
x = -0.37727 # truth value (typo in example at top of p. 13 calls this z)
z = numpy.random.normal(x,0.1,size=sz) # observations (normal about x, sigma=0.1)

Q = 1e-5 # process variance

# allocate space for arrays
xhat=numpy.zeros(sz)      # a posteri estimate of x
P=numpy.zeros(sz)         # a posteri error estimate
xhatminus=numpy.zeros(sz) # a priori estimate of x
Pminus=numpy.zeros(sz)    # a priori error estimate
K=numpy.zeros(sz)         # gain or blending factor

R = 0.1**2 # estimate of measurement variance, change to see effect

# intial guesses
xhat[0] = 0.0
P[0] = 1.0

for k in range(1,n_iter):
    # time update
    xhatminus[k] = xhat[k-1]
    Pminus[k] = P[k-1]+Q
    
    # measurement update
    K[k] = Pminus[k]/( Pminus[k]+R )
    xhat[k] = xhatminus[k]+K[k]*(z[k]-xhatminus[k])
    P[k] = (1-K[k])*Pminus[k]

pylab.figure()
pylab.plot(z,'k+',label='noisy measurements')
pylab.plot(xhat,'b-',label='a posteri estimate')
pylab.axhline(x,color='g',label='truth value')
pylab.legend()
pylab.xlabel('Iteration')
pylab.ylabel('Voltage')

pylab.figure()
valid_iter = range(1,n_iter) # Pminus not valid at step 0
pylab.plot(valid_iter,Pminus[valid_iter],label='a priori error estimate')
pylab.xlabel('Iteration')
pylab.ylabel('$(Voltage)^2$')
pylab.setp(pylab.gca(),'ylim',[0,.01])
pylab.show()

#df['firwin_hanning'] = firwin(df.power, kernel_size=15)
#df



In [97]:
firwin?

In [ ]:


In [58]:
# scipy.signal.savgol_filter
from scipy.signal import savgol_filter


@timeit('process_savgol', verbose=True)
def process_savgol(df, window_len=11, polyorder=2):
    df_out = df.copy()
    df_out['savgol'] = medfilt(savgol_filter(df.power.values, window_len, polyorder, 
                                             deriv=0, delta=3, mode='nearest'), 
                               kernel_size=2 * window_len + 1)
    return df_out
    

def plot_savgol(df, window_len=11, polyorder=2):
    ax = df.power.plot(figsize=FS, legend='RAW', **D_FILTERS['power'])
    k = 'savgol'
    df[k].plot(ax=ax, legend=k, lw=1, color=tableau20[4], alpha=.8)
    ax.xaxis.set_major_formatter(mpd.DateFormatter('%H:%M:%S', tz=TZ))
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, ha='center')
    ax.xaxis.set_tick_params(labelsize=9, pad=3)
    return ax


df_1sg = process_savgol(df_1, 7, 4)

plot_savgol(df_1sg) #.between_time('8:50', '9:05'))
df_1sg.head()


process_savgol TOOK: 0.008 s
Out[58]:
power wiener delta_wiener abs_ch r_std r_mean savgol
ts
2016-09-10 08:30:00+02:00 232.465179 248.667640 0.627884 False 0.899339 247.797532 234.070206
2016-09-10 08:30:01+02:00 245.376541 247.866430 -0.801211 False 1.532316 248.255107 238.917404
2016-09-10 08:30:02+02:00 255.756119 247.156047 -0.710383 False 1.886681 248.892855 241.762878
2016-09-10 08:30:03+02:00 252.692841 246.989616 -0.166431 False 1.976066 249.597289 242.076218
2016-09-10 08:30:04+02:00 238.602524 247.037969 0.048353 False 1.941258 250.426916 243.187134

In [70]:
window_len, polyorder = 7, 4

df_2sg = process_savgol(df_2, window_len, polyorder)
df_3sg = process_savgol(df_3, window_len, polyorder)
df_4sg = process_savgol(df_4, window_len, polyorder)
df_5sg = process_savgol(df_5, window_len, polyorder)


plot_savgol(df_2sg.between_time('9:00', '10:00'))
plt.show()
plot_savgol(df_3sg.between_time('19:33', '20:10'))
plt.show()
#plot_savgol(df_4sg) #.between_time('9:00', '10:00'))
#plt.show()
plot_savgol(df_5sg.between_time('8:17', '8:42:00'))
plt.show()


process_savgol TOOK: 0.021 s
process_savgol TOOK: 0.014 s
process_savgol TOOK: 0.010 s
process_savgol TOOK: 0.075 s

In [73]:
plot_savgol(df_5sg.between_time('13:52', '14:52'))


Out[73]:
<matplotlib.axes._subplots.AxesSubplot at 0x126be5ac8>

In [78]:
ax = plot_savgol(df_5sg.between_time('9:52', '12:22'))
df_5sg.between_time('9:52', '12:22').wiener.plot(lw=.5, alpha=.5, color=tableau20[6])


Out[78]:
<matplotlib.axes._subplots.AxesSubplot at 0x1134f34e0>

In [ ]:
# Detección de intervalos
#lista_dfs = [df_1.between_time('8:51', '9:02'), df_2.between_time('13:45', '15:30'), df_2, df_3, df_4]
# lista_dfs = [df_2.between_time('13:45', '15:30')]
# lista_dfs = [df_5, df_6, df_7]
# lista_dfs = [df_7.between_time('08:00', '23:00')]
# lista_dfs = [df_7.between_time('10:40', '12:30')]

In [11]:
from fitter import Fitter


#f = Fitter(train)
#f.fit()
# may take some time since by default, all distributions are tried
# but you call manually provide a smaller set of distributions
#f.summary()

In [ ]:


In [ ]:


In [7]:
df = df_2.copy()

# print_cyan(scipy.signal.get_window())
df['medfilt'] = medfilt(df.power, kernel_size=15)
# df['firwin_hanning'] = firwin(df.power, kernel_size=15)



In [ ]: