In [1]:
def uniques_only(iterable):
    """
    Returns an iterable in the same order with duplicates removed
    """
    seen = set()
    for item in iterable:
        if item not in seen:
            yield item
            seen.add(item)

In [3]:
from timeit import default_timer
import unittest

class UniquesOnlyTests(unittest.TestCase):

    """Tests for uniques_only."""

    def assertIterableEqual(self, iterable1, iterable2):
        self.assertEqual(list(iterable1), list(iterable2))

    def test_no_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 2, 3]), [1, 2, 3])

    def test_adjacent_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 1, 2, 2, 3]), [1, 2, 3])

    def test_non_adjacent_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 2, 3, 1, 2]), [1, 2, 3])

    def test_lots_of_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 2, 2, 1, 1, 2, 1]), [1, 2])

    def test_accepts_iterator(self):
        nums = (n**2 for n in [1, 2, 3])
        self.assertIterableEqual(uniques_only(nums), [1, 4, 9])

    # To test the Bonus part of this exercise, comment out the following line
    # @unittest.expectedFailure
    def test_returns_iterator(self):
        nums = iter([1, 2, 3])
        output = uniques_only(nums)
        self.assertEqual(iter(output), iter(output))
        self.assertEqual(next(output), 1)
        self.assertEqual(next(nums), 2)

    # To test the Bonus part of this exercise, comment out the following line
    # @unittest.expectedFailure
    def test_accepts_nonhashable_types(self):
        output = uniques_only([[1, 2], [3], [1], [3]])
        self.assertIterableEqual(output, [[1, 2], [3], [1]])

    # To test the Bonus part of this exercise, comment out the following line
    # @unittest.expectedFailure
    def test_hashable_types_faster(self):
        hashables = [(n,) for n in range(4000)] + [0]
        unhashables = [[n] for n in range(4000)] + [0]
        with Timer() as hashable:
            for _ in uniques_only(hashables):
                pass
        with Timer() as unhashable:
            for _ in uniques_only(unhashables):
                pass
        self.assertLess(hashable.elapsed * 5, unhashable.elapsed)


class Timer:

    """Context manager to time a code block."""

    def __enter__(self):
        self.start = default_timer()
        return self

    def __exit__(self, *args):
        self.end = default_timer()
        self.elapsed = self.end - self.start


if __name__ == "__main__":
    unittest.main(argv=['ignore-first-argument'], exit=False)


.E.E....
======================================================================
ERROR: test_accepts_nonhashable_types (__main__.UniquesOnlyTests)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-3-3095761db9d3>", line 40, in test_accepts_nonhashable_types
    self.assertIterableEqual(output, [[1, 2], [3], [1]])
  File "<ipython-input-3-3095761db9d3>", line 9, in assertIterableEqual
    self.assertEqual(list(iterable1), list(iterable2))
  File "<ipython-input-1-f582a8f3ce27>", line 7, in uniques_only
    if item not in seen:
TypeError: unhashable type: 'list'

======================================================================
ERROR: test_hashable_types_faster (__main__.UniquesOnlyTests)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-3-3095761db9d3>", line 51, in test_hashable_types_faster
    for _ in uniques_only(unhashables):
  File "<ipython-input-1-f582a8f3ce27>", line 7, in uniques_only
    if item not in seen:
TypeError: unhashable type: 'list'

----------------------------------------------------------------------
Ran 8 tests in 0.026s

FAILED (errors=2)

Note1: sets and dicts cannot take unhashable types like lists


In [4]:
tmp = set()

In [5]:
1 in tmp


Out[5]:
False

In [6]:
[1] in tmp


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-6-4128a7ea547b> in <module>()
----> 1 [1] in tmp

TypeError: unhashable type: 'list'

In [8]:
tmp.add(1)
tmp


Out[8]:
{1}

In [9]:
tmp.add([1])


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-9-b2ea92368345> in <module>()
----> 1 tmp.add([1])

TypeError: unhashable type: 'list'

In [11]:
tmp = set([1])
tmp


Out[11]:
{1}

So we may use a list instead of set like so:


In [14]:
def uniques_only(iterable):
    """
    Returns an iterable in the same order with duplicates removed
    """
    seen = []
    for item in iterable:
        if item not in seen:
            yield item
            seen.append(item)

In [15]:
from timeit import default_timer
import unittest

class UniquesOnlyTests(unittest.TestCase):

    """Tests for uniques_only."""

    def assertIterableEqual(self, iterable1, iterable2):
        self.assertEqual(list(iterable1), list(iterable2))

    def test_no_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 2, 3]), [1, 2, 3])

    def test_adjacent_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 1, 2, 2, 3]), [1, 2, 3])

    def test_non_adjacent_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 2, 3, 1, 2]), [1, 2, 3])

    def test_lots_of_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 2, 2, 1, 1, 2, 1]), [1, 2])

    def test_accepts_iterator(self):
        nums = (n**2 for n in [1, 2, 3])
        self.assertIterableEqual(uniques_only(nums), [1, 4, 9])

    # To test the Bonus part of this exercise, comment out the following line
    # @unittest.expectedFailure
    def test_returns_iterator(self):
        nums = iter([1, 2, 3])
        output = uniques_only(nums)
        self.assertEqual(iter(output), iter(output))
        self.assertEqual(next(output), 1)
        self.assertEqual(next(nums), 2)

    # To test the Bonus part of this exercise, comment out the following line
    # @unittest.expectedFailure
    def test_accepts_nonhashable_types(self):
        output = uniques_only([[1, 2], [3], [1], [3]])
        self.assertIterableEqual(output, [[1, 2], [3], [1]])

    # To test the Bonus part of this exercise, comment out the following line
    # @unittest.expectedFailure
    def test_hashable_types_faster(self):
        hashables = [(n,) for n in range(4000)] + [0]
        unhashables = [[n] for n in range(4000)] + [0]
        with Timer() as hashable:
            for _ in uniques_only(hashables):
                pass
        with Timer() as unhashable:
            for _ in uniques_only(unhashables):
                pass
        self.assertLess(hashable.elapsed * 5, unhashable.elapsed)


class Timer:

    """Context manager to time a code block."""

    def __enter__(self):
        self.start = default_timer()
        return self

    def __exit__(self, *args):
        self.end = default_timer()
        self.elapsed = self.end - self.start


if __name__ == "__main__":
    unittest.main(argv=['ignore-first-argument'], exit=False)


...F....
======================================================================
FAIL: test_hashable_types_faster (__main__.UniquesOnlyTests)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-15-3095761db9d3>", line 53, in test_hashable_types_faster
    self.assertLess(hashable.elapsed * 5, unhashable.elapsed)
AssertionError: 1.1137854112109835 not less than 0.19412242067392071

----------------------------------------------------------------------
Ran 8 tests in 0.494s

FAILED (failures=1)

Note2: Sets vs Lists. Checking for containment (X not in Y) in a list requires looping through the whole list. This is slow. Sets rely on hashes for lookups so containment checks won't slow down as our hash grows in size

So we can use a list for unhashable types and set for hashable types like so:


In [16]:
def uniques_only(iterable):
    """
    Returns an iterable in the same order with duplicates removed
    """
    hashable_seen = set()
    unhashable_seen = []
    for item in iterable:
        try:
            if item not in hashable_seen:
                yield item
                hashable_seen.add(item)
        except TypeError:
            if item not in unhashable_seen:
                yield item
                unhashable_seen.append(item)

In [17]:
from timeit import default_timer
import unittest

class UniquesOnlyTests(unittest.TestCase):

    """Tests for uniques_only."""

    def assertIterableEqual(self, iterable1, iterable2):
        self.assertEqual(list(iterable1), list(iterable2))

    def test_no_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 2, 3]), [1, 2, 3])

    def test_adjacent_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 1, 2, 2, 3]), [1, 2, 3])

    def test_non_adjacent_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 2, 3, 1, 2]), [1, 2, 3])

    def test_lots_of_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 2, 2, 1, 1, 2, 1]), [1, 2])

    def test_accepts_iterator(self):
        nums = (n**2 for n in [1, 2, 3])
        self.assertIterableEqual(uniques_only(nums), [1, 4, 9])

    # To test the Bonus part of this exercise, comment out the following line
    # @unittest.expectedFailure
    def test_returns_iterator(self):
        nums = iter([1, 2, 3])
        output = uniques_only(nums)
        self.assertEqual(iter(output), iter(output))
        self.assertEqual(next(output), 1)
        self.assertEqual(next(nums), 2)

    # To test the Bonus part of this exercise, comment out the following line
    # @unittest.expectedFailure
    def test_accepts_nonhashable_types(self):
        output = uniques_only([[1, 2], [3], [1], [3]])
        self.assertIterableEqual(output, [[1, 2], [3], [1]])

    # To test the Bonus part of this exercise, comment out the following line
    # @unittest.expectedFailure
    def test_hashable_types_faster(self):
        hashables = [(n,) for n in range(4000)] + [0]
        unhashables = [[n] for n in range(4000)] + [0]
        with Timer() as hashable:
            for _ in uniques_only(hashables):
                pass
        with Timer() as unhashable:
            for _ in uniques_only(unhashables):
                pass
        self.assertLess(hashable.elapsed * 5, unhashable.elapsed)


class Timer:

    """Context manager to time a code block."""

    def __enter__(self):
        self.start = default_timer()
        return self

    def __exit__(self, *args):
        self.end = default_timer()
        self.elapsed = self.end - self.start


if __name__ == "__main__":
    unittest.main(argv=['ignore-first-argument'], exit=False)


........
----------------------------------------------------------------------
Ran 8 tests in 0.223s

OK

Above we are using 'TypeError' to identify whether an item is hashable or not. Instead we could use ' 'isinstance' function to check if the item is hashable or not like so:


In [21]:
from collections.abc import Hashable

def uniques_only(iterable):
    """
    Returns an iterable in the same order with duplicates removed
    """
    hashable_seen = set()
    unhashable_seen = []
    for item in iterable:
        if isinstance(item, Hashable):
            if item not in hashable_seen:
                yield item
                hashable_seen.add(item)
        else:
            if item not in unhashable_seen:
                yield item
                unhashable_seen.append(item)

In [22]:
from timeit import default_timer
import unittest

class UniquesOnlyTests(unittest.TestCase):

    """Tests for uniques_only."""

    def assertIterableEqual(self, iterable1, iterable2):
        self.assertEqual(list(iterable1), list(iterable2))

    def test_no_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 2, 3]), [1, 2, 3])

    def test_adjacent_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 1, 2, 2, 3]), [1, 2, 3])

    def test_non_adjacent_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 2, 3, 1, 2]), [1, 2, 3])

    def test_lots_of_duplicates(self):
        self.assertIterableEqual(uniques_only([1, 2, 2, 1, 1, 2, 1]), [1, 2])

    def test_accepts_iterator(self):
        nums = (n**2 for n in [1, 2, 3])
        self.assertIterableEqual(uniques_only(nums), [1, 4, 9])

    # To test the Bonus part of this exercise, comment out the following line
    # @unittest.expectedFailure
    def test_returns_iterator(self):
        nums = iter([1, 2, 3])
        output = uniques_only(nums)
        self.assertEqual(iter(output), iter(output))
        self.assertEqual(next(output), 1)
        self.assertEqual(next(nums), 2)

    # To test the Bonus part of this exercise, comment out the following line
    # @unittest.expectedFailure
    def test_accepts_nonhashable_types(self):
        output = uniques_only([[1, 2], [3], [1], [3]])
        self.assertIterableEqual(output, [[1, 2], [3], [1]])

    # To test the Bonus part of this exercise, comment out the following line
    # @unittest.expectedFailure
    def test_hashable_types_faster(self):
        hashables = [(n,) for n in range(4000)] + [0]
        unhashables = [[n] for n in range(4000)] + [0]
        with Timer() as hashable:
            for _ in uniques_only(hashables):
                pass
        with Timer() as unhashable:
            for _ in uniques_only(unhashables):
                pass
        self.assertLess(hashable.elapsed * 5, unhashable.elapsed)


class Timer:

    """Context manager to time a code block."""

    def __enter__(self):
        self.start = default_timer()
        return self

    def __exit__(self, *args):
        self.end = default_timer()
        self.elapsed = self.end - self.start


if __name__ == "__main__":
    unittest.main(argv=['ignore-first-argument'], exit=False)


........
----------------------------------------------------------------------
Ran 8 tests in 0.232s

OK

In [ ]: