In [1]:
import pandas as pd
import numpy as np

resampling

  • does not have frequency and we want it
  • does not have the frequency we want

In [8]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')
rng[1:4]


Out[8]:
DatetimeIndex(['2011-01-01 01:00:00', '2011-01-01 02:00:00',
               '2011-01-01 03:00:00'],
              dtype='datetime64[ns]', freq='H')

In [7]:
ts = pd.Series(list(range(len(rng))), index=rng)
ts.head()


Out[7]:
2011-01-01 00:00:00    0
2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
Freq: H, dtype: int64

convert hourly to 45 min frequency and fill data

  • ffill --> forward fill --> use previous month data
  • bfill

In [10]:
converted = ts.asfreq('45Min', method='ffill')
converted.head(10)


Out[10]:
2011-01-01 00:00:00    0
2011-01-01 00:45:00    0
2011-01-01 01:30:00    1
2011-01-01 02:15:00    2
2011-01-01 03:00:00    3
2011-01-01 03:45:00    3
2011-01-01 04:30:00    4
2011-01-01 05:15:00    5
2011-01-01 06:00:00    6
2011-01-01 06:45:00    6
Freq: 45T, dtype: int64

In [11]:
ts.shape


Out[11]:
(72,)

In [12]:
converted.shape


Out[12]:
(95,)

In [14]:
converted2 = ts.asfreq('3H')
converted2.head()


Out[14]:
2011-01-01 00:00:00     0
2011-01-01 03:00:00     3
2011-01-01 06:00:00     6
2011-01-01 09:00:00     9
2011-01-01 12:00:00    12
Freq: 3H, dtype: int64

resampling better option to not lose all the data


In [17]:
#mean of 0 and 1, 2 and 3 etc
ts.resample('2H').mean()[0:10]


Out[17]:
2011-01-01 00:00:00     0.5
2011-01-01 02:00:00     2.5
2011-01-01 04:00:00     4.5
2011-01-01 06:00:00     6.5
2011-01-01 08:00:00     8.5
2011-01-01 10:00:00    10.5
2011-01-01 12:00:00    12.5
2011-01-01 14:00:00    14.5
2011-01-01 16:00:00    16.5
2011-01-01 18:00:00    18.5
Freq: 2H, dtype: float64

In [30]:
#resampling events in irregular time series
irreq_ts = ts[ list( np.random.choice( a = list( range( len(ts))), size=10, replace=False ))]
irreq_ts


Out[30]:
2011-01-03 02:00:00    50
2011-01-03 13:00:00    61
2011-01-03 01:00:00    49
2011-01-03 16:00:00    64
2011-01-01 04:00:00     4
2011-01-02 22:00:00    46
2011-01-01 09:00:00     9
2011-01-02 11:00:00    35
2011-01-02 20:00:00    44
2011-01-02 04:00:00    28
dtype: int64

In [34]:
irreq_ts = irreq_ts.sort_index()
irreq_ts


Out[34]:
2011-01-01 04:00:00     4
2011-01-01 09:00:00     9
2011-01-02 04:00:00    28
2011-01-02 11:00:00    35
2011-01-02 20:00:00    44
2011-01-02 22:00:00    46
2011-01-03 01:00:00    49
2011-01-03 02:00:00    50
2011-01-03 13:00:00    61
2011-01-03 16:00:00    64
dtype: int64

In [36]:
irreq_ts.resample('H').fillna( method='ffill', limit=5)


Out[36]:
2011-01-01 04:00:00     4.0
2011-01-01 05:00:00     4.0
2011-01-01 06:00:00     4.0
2011-01-01 07:00:00     4.0
2011-01-01 08:00:00     4.0
2011-01-01 09:00:00     9.0
2011-01-01 10:00:00     9.0
2011-01-01 11:00:00     9.0
2011-01-01 12:00:00     9.0
2011-01-01 13:00:00     9.0
2011-01-01 14:00:00     9.0
2011-01-01 15:00:00     NaN
2011-01-01 16:00:00     NaN
2011-01-01 17:00:00     NaN
2011-01-01 18:00:00     NaN
2011-01-01 19:00:00     NaN
2011-01-01 20:00:00     NaN
2011-01-01 21:00:00     NaN
2011-01-01 22:00:00     NaN
2011-01-01 23:00:00     NaN
2011-01-02 00:00:00     NaN
2011-01-02 01:00:00     NaN
2011-01-02 02:00:00     NaN
2011-01-02 03:00:00     NaN
2011-01-02 04:00:00    28.0
2011-01-02 05:00:00    28.0
2011-01-02 06:00:00    28.0
2011-01-02 07:00:00    28.0
2011-01-02 08:00:00    28.0
2011-01-02 09:00:00    28.0
                       ... 
2011-01-02 11:00:00    35.0
2011-01-02 12:00:00    35.0
2011-01-02 13:00:00    35.0
2011-01-02 14:00:00    35.0
2011-01-02 15:00:00    35.0
2011-01-02 16:00:00    35.0
2011-01-02 17:00:00     NaN
2011-01-02 18:00:00     NaN
2011-01-02 19:00:00     NaN
2011-01-02 20:00:00    44.0
2011-01-02 21:00:00    44.0
2011-01-02 22:00:00    46.0
2011-01-02 23:00:00    46.0
2011-01-03 00:00:00    46.0
2011-01-03 01:00:00    49.0
2011-01-03 02:00:00    50.0
2011-01-03 03:00:00    50.0
2011-01-03 04:00:00    50.0
2011-01-03 05:00:00    50.0
2011-01-03 06:00:00    50.0
2011-01-03 07:00:00    50.0
2011-01-03 08:00:00     NaN
2011-01-03 09:00:00     NaN
2011-01-03 10:00:00     NaN
2011-01-03 11:00:00     NaN
2011-01-03 12:00:00     NaN
2011-01-03 13:00:00    61.0
2011-01-03 14:00:00    61.0
2011-01-03 15:00:00    61.0
2011-01-03 16:00:00    64.0
Freq: H, Length: 61, dtype: float64

In [38]:
irreq_ts.resample('H').count()


Out[38]:
2011-01-01 04:00:00    1
2011-01-01 05:00:00    0
2011-01-01 06:00:00    0
2011-01-01 07:00:00    0
2011-01-01 08:00:00    0
2011-01-01 09:00:00    1
2011-01-01 10:00:00    0
2011-01-01 11:00:00    0
2011-01-01 12:00:00    0
2011-01-01 13:00:00    0
2011-01-01 14:00:00    0
2011-01-01 15:00:00    0
2011-01-01 16:00:00    0
2011-01-01 17:00:00    0
2011-01-01 18:00:00    0
2011-01-01 19:00:00    0
2011-01-01 20:00:00    0
2011-01-01 21:00:00    0
2011-01-01 22:00:00    0
2011-01-01 23:00:00    0
2011-01-02 00:00:00    0
2011-01-02 01:00:00    0
2011-01-02 02:00:00    0
2011-01-02 03:00:00    0
2011-01-02 04:00:00    1
2011-01-02 05:00:00    0
2011-01-02 06:00:00    0
2011-01-02 07:00:00    0
2011-01-02 08:00:00    0
2011-01-02 09:00:00    0
                      ..
2011-01-02 11:00:00    1
2011-01-02 12:00:00    0
2011-01-02 13:00:00    0
2011-01-02 14:00:00    0
2011-01-02 15:00:00    0
2011-01-02 16:00:00    0
2011-01-02 17:00:00    0
2011-01-02 18:00:00    0
2011-01-02 19:00:00    0
2011-01-02 20:00:00    1
2011-01-02 21:00:00    0
2011-01-02 22:00:00    1
2011-01-02 23:00:00    0
2011-01-03 00:00:00    0
2011-01-03 01:00:00    1
2011-01-03 02:00:00    1
2011-01-03 03:00:00    0
2011-01-03 04:00:00    0
2011-01-03 05:00:00    0
2011-01-03 06:00:00    0
2011-01-03 07:00:00    0
2011-01-03 08:00:00    0
2011-01-03 09:00:00    0
2011-01-03 10:00:00    0
2011-01-03 11:00:00    0
2011-01-03 12:00:00    0
2011-01-03 13:00:00    1
2011-01-03 14:00:00    0
2011-01-03 15:00:00    0
2011-01-03 16:00:00    1
Freq: H, Length: 61, dtype: int64

In [ ]: