In [1]:
import datetime as dt

import numpy as np

import os, sys
sys.path.append(os.path.abspath('../../main/python'))
import thalesians.tsa.utils as utils

xbatch and batch


In [2]:
for x in utils.xbatch(2, range(10)):
    print(x)


range(0, 2)
range(2, 4)
range(4, 6)
range(6, 8)
range(8, 10)

In [3]:
for x in utils.xbatch(3, ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']):
    print(x)


['Jan', 'Feb', 'Mar']
['Apr', 'May', 'Jun']
['Jul', 'Aug', 'Sep']
['Oct', 'Nov', 'Dec']

In [4]:
for x in utils.xbatch(3, ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec')):
    print(x)


('Jan', 'Feb', 'Mar')
('Apr', 'May', 'Jun')
('Jul', 'Aug', 'Sep')
('Oct', 'Nov', 'Dec')

In [5]:
for x in utils.xbatch(2, np.array(range(10))):
    print(x)


[0 1]
[2 3]
[4 5]
[6 7]
[8 9]

In [6]:
utils.xbatch(2, range(10))


Out[6]:
<generator object xbatch at 0x0000024C49273138>

In [7]:
utils.batch(2, range(10))


Out[7]:
[range(0, 2), range(2, 4), range(4, 6), range(6, 8), range(8, 10)]

In [8]:
utils.batch(3, [429, 5, 2, 14, 42, 132, 1, 1])


Out[8]:
[[429, 5, 2], [14, 42, 132], [1, 1]]

In [9]:
utils.batch(4, range(10))


Out[9]:
[range(0, 4), range(4, 8), range(8, 10)]

peek


In [10]:
it = utils.xbatch(2, range(10))
first_three, new_it = utils.peek(it, 3)
print('First three:', first_three)
print('Iterating through new_it:')
for x in new_it:
    print(x)
print('Iterating through it:')
for x in it:
    print(x)


First three: [range(0, 2), range(2, 4), range(4, 6)]
Iterating through new_it:
range(0, 2)
range(2, 4)
range(4, 6)
range(6, 8)
range(8, 10)
Iterating through it:

In [11]:
it = utils.xbatch(2, range(10))
first_three, new_it = utils.peek(it, 3)
print('First three:', first_three)
print('Iterating through it:')
for x in it:
    print(x)


First three: [range(0, 2), range(2, 4), range(4, 6)]
Iterating through it:
range(6, 8)
range(8, 10)

bracket


In [12]:
data = [8, 11, 12, 13, 14, 27, 29, 37, 49, 50, 51, 79, 85]
          1    1    2    2    3    3    4    4    5    5    6    6    7    7    8    8    9
0----5----0----5----0----5----0----5----0----5----0----5----0----5----0----5----0----5----0
   [0   [1   [2   [3   [4   [5   [6   [7   [8   [9   [10  [11  [12  [13  [14  [15  [16  [17
   ]0   ]1   ]2   ]3   ]4   ]5   ]6   ]7   ]8   ]9   ]10  ]11  ]12  ]13  ]14  ]15  ]16  ]17
        ^  ^^^^            ^ ^       ^           ^^^                           ^     ^
        |  ||||            | |       |           |||                           |     |

In [13]:
utils.bracket(data, 3, 5)


Out[13]:
([{[8, 13), 1},
  {[13, 18), 2},
  {[23, 28), 4},
  {[28, 33), 5},
  {[33, 38), 6},
  {[48, 53), 9},
  {[78, 83), 15},
  {[83, 88), 16}],
 [0, 0, 0, 1, 1, 2, 3, 4, 5, 5, 5, 6, 7])

In [14]:
utils.bracket(data, 3, 5, intervals_right_closed=True)


Out[14]:
([{(3, 8], 0},
  {(8, 13], 1},
  {(13, 18], 2},
  {(23, 28], 4},
  {(28, 33], 5},
  {(33, 38], 6},
  {(48, 53], 9},
  {(78, 83], 15},
  {(83, 88], 16}],
 [0, 1, 1, 1, 2, 3, 4, 5, 6, 6, 6, 7, 8])

In [15]:
utils.bracket(data, 3, 5, coalesce=True)


Out[15]:
([{[8, 18), 1}, {[23, 38), 4}, {[48, 53), 9}, {[78, 88), 15}],
 [0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3])

In [16]:
utils.bracket(data, 3, 5, intervals_right_closed=True, coalesce=True)


Out[16]:
([{(3, 18], 0}, {(23, 38], 4}, {(48, 53], 9}, {(78, 88], 15}],
 [0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3])

In [17]:
data = [dt.date(2017, 1, 31) + dt.timedelta(days=x) for x in [8, 11, 12, 13, 14, 27, 29, 37, 49, 50, 51, 79, 85]];
data


Out[17]:
[datetime.date(2017, 2, 8),
 datetime.date(2017, 2, 11),
 datetime.date(2017, 2, 12),
 datetime.date(2017, 2, 13),
 datetime.date(2017, 2, 14),
 datetime.date(2017, 2, 27),
 datetime.date(2017, 3, 1),
 datetime.date(2017, 3, 9),
 datetime.date(2017, 3, 21),
 datetime.date(2017, 3, 22),
 datetime.date(2017, 3, 23),
 datetime.date(2017, 4, 20),
 datetime.date(2017, 4, 26)]

In [18]:
utils.bracket(data, dt.date(2017, 2, 3), dt.timedelta(days=5))


Out[18]:
([{[2017-02-08, 2017-02-13), 1},
  {[2017-02-13, 2017-02-18), 2},
  {[2017-02-23, 2017-02-28), 4},
  {[2017-02-28, 2017-03-05), 5},
  {[2017-03-05, 2017-03-10), 6},
  {[2017-03-20, 2017-03-25), 9},
  {[2017-04-19, 2017-04-24), 15},
  {[2017-04-24, 2017-04-29), 16}],
 [0, 0, 0, 1, 1, 2, 3, 4, 5, 5, 5, 6, 7])

In [19]:
utils.bracket(data, dt.date(2017, 2, 3), dt.timedelta(days=5), intervals_right_closed=True)


Out[19]:
([{(2017-02-03, 2017-02-08], 0},
  {(2017-02-08, 2017-02-13], 1},
  {(2017-02-13, 2017-02-18], 2},
  {(2017-02-23, 2017-02-28], 4},
  {(2017-02-28, 2017-03-05], 5},
  {(2017-03-05, 2017-03-10], 6},
  {(2017-03-20, 2017-03-25], 9},
  {(2017-04-19, 2017-04-24], 15},
  {(2017-04-24, 2017-04-29], 16}],
 [0, 1, 1, 1, 2, 3, 4, 5, 6, 6, 6, 7, 8])

In [20]:
utils.bracket(data, dt.date(2017, 2, 3), dt.timedelta(days=5), coalesce=True)


Out[20]:
([{[2017-02-08, 2017-02-18), 1},
  {[2017-02-23, 2017-03-10), 4},
  {[2017-03-20, 2017-03-25), 9},
  {[2017-04-19, 2017-04-29), 15}],
 [0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3])

In [21]:
utils.bracket(data, dt.date(2017, 2, 3), dt.timedelta(days=5), intervals_right_closed=True, coalesce=True)


Out[21]:
([{(2017-02-03, 2017-02-18], 0},
  {(2017-02-23, 2017-03-10], 4},
  {(2017-03-20, 2017-03-25], 9},
  {(2017-04-19, 2017-04-29], 15}],
 [0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3])

In [22]:
data = [dt.datetime(2017, 1, 31, 0, 0, 0) + dt.timedelta(minutes=x) for x in [8, 11, 12, 13, 14, 27, 29, 37, 49, 50, 51, 79, 85]];
data


Out[22]:
[datetime.datetime(2017, 1, 31, 0, 8),
 datetime.datetime(2017, 1, 31, 0, 11),
 datetime.datetime(2017, 1, 31, 0, 12),
 datetime.datetime(2017, 1, 31, 0, 13),
 datetime.datetime(2017, 1, 31, 0, 14),
 datetime.datetime(2017, 1, 31, 0, 27),
 datetime.datetime(2017, 1, 31, 0, 29),
 datetime.datetime(2017, 1, 31, 0, 37),
 datetime.datetime(2017, 1, 31, 0, 49),
 datetime.datetime(2017, 1, 31, 0, 50),
 datetime.datetime(2017, 1, 31, 0, 51),
 datetime.datetime(2017, 1, 31, 1, 19),
 datetime.datetime(2017, 1, 31, 1, 25)]

In [23]:
utils.bracket(data, dt.datetime(2017, 1, 31, 0, 3, 0), dt.timedelta(minutes=5))


Out[23]:
([{[2017-01-31 00:08:00, 2017-01-31 00:13:00), 1},
  {[2017-01-31 00:13:00, 2017-01-31 00:18:00), 2},
  {[2017-01-31 00:23:00, 2017-01-31 00:28:00), 4},
  {[2017-01-31 00:28:00, 2017-01-31 00:33:00), 5},
  {[2017-01-31 00:33:00, 2017-01-31 00:38:00), 6},
  {[2017-01-31 00:48:00, 2017-01-31 00:53:00), 9},
  {[2017-01-31 01:18:00, 2017-01-31 01:23:00), 15},
  {[2017-01-31 01:23:00, 2017-01-31 01:28:00), 16}],
 [0, 0, 0, 1, 1, 2, 3, 4, 5, 5, 5, 6, 7])

In [24]:
utils.bracket(data, dt.datetime(2017, 1, 31, 0, 3, 0), dt.timedelta(minutes=5), intervals_right_closed=True)


Out[24]:
([{(2017-01-31 00:03:00, 2017-01-31 00:08:00], 0},
  {(2017-01-31 00:08:00, 2017-01-31 00:13:00], 1},
  {(2017-01-31 00:13:00, 2017-01-31 00:18:00], 2},
  {(2017-01-31 00:23:00, 2017-01-31 00:28:00], 4},
  {(2017-01-31 00:28:00, 2017-01-31 00:33:00], 5},
  {(2017-01-31 00:33:00, 2017-01-31 00:38:00], 6},
  {(2017-01-31 00:48:00, 2017-01-31 00:53:00], 9},
  {(2017-01-31 01:18:00, 2017-01-31 01:23:00], 15},
  {(2017-01-31 01:23:00, 2017-01-31 01:28:00], 16}],
 [0, 1, 1, 1, 2, 3, 4, 5, 6, 6, 6, 7, 8])

In [25]:
utils.bracket(data, dt.datetime(2017, 1, 31, 0, 3, 0), dt.timedelta(minutes=5), coalesce=True)


Out[25]:
([{[2017-01-31 00:08:00, 2017-01-31 00:18:00), 1},
  {[2017-01-31 00:23:00, 2017-01-31 00:38:00), 4},
  {[2017-01-31 00:48:00, 2017-01-31 00:53:00), 9},
  {[2017-01-31 01:18:00, 2017-01-31 01:28:00), 15}],
 [0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3])

In [26]:
utils.bracket(data, dt.datetime(2017, 1, 31, 0, 3, 0), dt.timedelta(minutes=5), intervals_right_closed=True, coalesce=True)


Out[26]:
([{(2017-01-31 00:03:00, 2017-01-31 00:18:00], 0},
  {(2017-01-31 00:23:00, 2017-01-31 00:38:00], 4},
  {(2017-01-31 00:48:00, 2017-01-31 00:53:00], 9},
  {(2017-01-31 01:18:00, 2017-01-31 01:28:00], 15}],
 [0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3])