Getting the top K sorted elements

Time and space complexity


In [ ]:
import heapq

In [ ]:
%load_ext memory_profiler

In [36]:
# generator of this size
n = 20000000
k = 10

In [41]:
def get_it():
    return (-a for a in range(n))

In [18]:
%%time
sorted(get_it(), key=lambda x: x)[:k]


CPU times: user 2.24 s, sys: 267 ms, total: 2.5 s
Wall time: 2.5 s
Out[18]:
[-9999999,
 -9999998,
 -9999997,
 -9999996,
 -9999995,
 -9999994,
 -9999993,
 -9999992,
 -9999991,
 -9999990]

In [40]:
%%memit
sorted(get_it(), key=lambda x: x)[:k]


peak memory: 964.60 MiB, increment: 846.59 MiB

In [34]:
%%time
heapq.nsmallest(k, get_it())


CPU times: user 4.97 s, sys: 23.4 ms, total: 4.99 s
Wall time: 5.01 s
Out[34]:
[-9999999,
 -9999998,
 -9999997,
 -9999996,
 -9999995,
 -9999994,
 -9999993,
 -9999992,
 -9999991,
 -9999990]

In [39]:
%%memit
heapq.nsmallest(k, get_it())


peak memory: 118.01 MiB, increment: 0.00 MiB

Sorting tuples


In [ ]:
def get_it_tuple():
    return ((a, -a) for a in range(n))

In [46]:
%%timeit
sorted(get_it_tuple(), key=lambda x: x[1])[:10]


1 loop, best of 3: 8.32 s per loop

In [47]:
%%memit
sorted(get_it_tuple(), key=lambda x: x[1])[:10]


peak memory: 2927.98 MiB, increment: 2884.93 MiB

In [43]:
%%timeit
heapq.nsmallest(k, get_it_tuple(), key=lambda x: x[1])


1 loop, best of 3: 13.3 s per loop

In [44]:
%%memit
heapq.nsmallest(k, get_it_tuple(), key=lambda x: x[1])


peak memory: 42.05 MiB, increment: 0.00 MiB