Merging sorted lists


In [73]:
def _merge_re(xs, ys):
    
    if not xs:
        return ys
    if not ys:
        return xs
    
    x, y = xs[0], ys[0]
    
    if x < y:
        ret = [x]
        xs  = xs[1:]
    else:
        ret = [y]
        ys  = ys[1:]
        
    ret.extend(merge(xs, ys))
    
    return ret

In [74]:
def _merge_it(xs, ys):
    
    if not xs:
        return ys
    if not ys:
        return xs
    
    i = j = 0
    ret = []
    
    while i < len(xs) and j < len(ys):
        if xs[i] < ys[j]:
            ret.append(xs[i])
            i += 1
        else:
            ret.append(ys[j])
            j += 1
            
    ret.extend(xs[i:])
    ret.extend(ys[j:])
    
    return ret

In [110]:
# Python 3
def _merge_it(xs, ys):
    
    if not xs:
        yield from ys
    if not ys:
        yield from xs
    
    i = j = 0
    
    while i < len(xs) and j < len(ys):
        if xs[i] < ys[j]:
            yield xs[i]
            i += 1
        else:
            yield ys[j]
            j += 1
            
    yield from xs[i:]
    yield from ys[j:]


  File "<ipython-input-110-2e1c38af8851>", line 4
    yield from ys
             ^
SyntaxError: invalid syntax

In [3]:
from random import randint
rand_sorted_list = lambda n, interval=10: [randint(interval*i, interval*(i+1)) for i in xrange(n)]

In [79]:
a = rand_sorted_list(15); a


Out[79]:
[8, 20, 21, 40, 45, 60, 68, 71, 85, 98, 104, 120, 128, 137, 142]

In [80]:
b = rand_sorted_list(12); b


Out[80]:
[4, 16, 28, 39, 46, 59, 70, 72, 84, 100, 104, 111]

In [81]:
_merge_it(a, b)


Out[81]:
[4,
 8,
 16,
 20,
 21,
 28,
 39,
 40,
 45,
 46,
 59,
 60,
 68,
 70,
 71,
 72,
 84,
 85,
 98,
 100,
 104,
 104,
 111,
 120,
 128,
 137,
 142]

In [83]:
_merge_it(a, b) == _merge_re(a, b)


Out[83]:
True

In [84]:
merge_all = lambda *xss: reduce(_merge_it, xss)

In [89]:
merge_all(*[rand_sorted_list(10) for _ in xrange(5)])


Out[89]:
[1,
 5,
 6,
 7,
 10,
 10,
 10,
 12,
 16,
 18,
 22,
 22,
 22,
 29,
 30,
 32,
 34,
 38,
 39,
 40,
 40,
 42,
 45,
 45,
 49,
 51,
 55,
 57,
 59,
 59,
 62,
 63,
 65,
 66,
 70,
 71,
 75,
 76,
 78,
 79,
 86,
 86,
 87,
 87,
 88,
 91,
 97,
 97,
 99,
 100]

In [12]:
from heapq import heapify, heappush, heappop

In [13]:
h = [randint(0, 100) for _ in xrange(10)]; h


Out[13]:
[100, 54, 25, 50, 69, 41, 60, 22, 78, 6]

In [54]:
def _merge_all(*lsts):
    
    h = map(lambda x: (x[1][0], x[0]), enumerate(lsts))
    indices = [1] * len(lsts)
    heapify(h)
    while any(map(lambda i, lst: i < len(lst), indices, lsts)):
        v, i = heappop(h)
        heappush(h, (lsts[i][indices[i]], i))
        indices[i] += 1
        yield v
    while h:
        v, _ = heappop(h)
        yield v

In [55]:
list(_merge_all([1, 6, 7], [4, 5, 8]))


Out[55]:
[1, 4, 5, 6, 7, 8]

In [57]:
list(_merge_all([1, 4, 7], [10, 23, 65, 66]))


---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-57-f6a2bc2605a6> in <module>()
----> 1 list(_merge_all([1, 4, 7], [10, 23, 65, 66]))

<ipython-input-54-4ba3306d2cc0> in _merge_all(*lsts)
      6     while any(map(lambda i, lst: i < len(lst), indices, lsts)):
      7         v, i = heappop(h)
----> 8         heappush(h, (lsts[i][indices[i]], i))
      9         indices[i] += 1
     10         yield v

IndexError: list index out of range

In [67]:
def _merge_all(*lsts):
    
    h = map(lambda x: (x[1][0], x[0]), enumerate(lsts))
    indices = [1] * len(lsts)
    heapify(h)
    while any(map(lambda i, lst: i < len(lst), indices, lsts)):
        v, i = heappop(h)
        lst = lsts[i]
        j = indices[i]
        if j < len(lst):
            heappush(h, (lst[j], i))
            indices[i] += 1
        yield v
    while h:
        v, _ = heappop(h)
        yield v

In [68]:
list(_merge_all([1, 4, 7], [10, 23, 65, 66]))


Out[68]:
[1, 4, 7, 10, 23, 65, 66]

In [70]:
list(_merge_all([1, 23, 66], [10, 25, 35, 65, 72], [2, 6, 8, 12, 14]))


Out[70]:
[1, 2, 6, 8, 10, 12, 14, 23, 25, 35, 65, 66, 72]

In [71]:
list(_merge_all([], [10, 25, 35, 65, 72], [2, 6, 8, 12, 14]))


---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-71-9e362c33f2b6> in <module>()
----> 1 list(_merge_all([], [10, 25, 35, 65, 72], [2, 6, 8, 12, 14]))

<ipython-input-67-02851221cc1d> in _merge_all(*lsts)
      1 def _merge_all(*lsts):
      2 
----> 3     h = map(lambda x: (x[1][0], x[0]), enumerate(lsts))
      4     indices = [1] * len(lsts)
      5     heapify(h)

<ipython-input-67-02851221cc1d> in <lambda>(x)
      1 def _merge_all(*lsts):
      2 
----> 3     h = map(lambda x: (x[1][0], x[0]), enumerate(lsts))
      4     indices = [1] * len(lsts)
      5     heapify(h)

IndexError: list index out of range

In [83]:
def _merge_all(*lsts):
    
    h = []
    indices = [0] * len(lsts)
    for i, lst in enumerate(lsts):
        if indices[i] < len(lst):
            h.append((lst[indices[i]], i))
            indices[i] += 1
    heapify(h)
    
    while h:
        v, i = heappop(h)
        yield v
        lst = lsts[i]
        if indices[i] < len(lst):
            heappush(h, (lst[indices[i]], i))
            indices[i] += 1

In [84]:
list(_merge_all([], [10, 25, 35, 65, 72], [2, 6, 8, 12, 14]))


Out[84]:
[2, 6, 8, 10, 12, 14, 25, 35, 65, 72]

In [108]:
def _merge_all(*iterables):
    
    h = []
    for it in map(iter, iterables):
        v = next(it, None)
        if v is not None:
            h.append((v, it))
    heapify(h)

    while h:
        v, it = heappop(h)
        yield v
        w = next(it, None)
        if w is not None:
            heappush(h, (w, it))

In [109]:
list(_merge_all([], [10, 25, 35, 65, 72], [2, 6, 8, 12, 14]))


Out[109]:
[2, 6, 8, 10, 12, 14, 25, 35, 65, 72]

In [90]:
it = iter([10, 25, 35, 65, 72]); it


Out[90]:
<listiterator at 0x10943dfd0>

In [93]:
next(it)


Out[93]:
10

In [95]:
next(it)


Out[95]:
25

In [96]:
list(it)


Out[96]:
[35, 65, 72]

In [101]:
it = iter([]); it


Out[101]:
<listiterator at 0x10943d710>

In [103]:
next(it, None)

In [ ]: