In [1]:
from jupyterthemes import get_themes
from jupyterthemes.stylefx import set_nb_theme
themes = get_themes()
set_nb_theme(themes[1])


Out[1]:

In [2]:
%load_ext watermark
%watermark -a 'Ethen' -d -t -v -p jupyterthemes


Ethen 2016-12-26 22:23:12 

CPython 3.5.2
IPython 4.2.0

jupyterthemes 0.13.9

Searching, Hashing, Sorting

Following the online book, Problem Solving with Algorithms and Data Structures. Chapter 5 introduces methods for searching and sorting numbers.

Searching

We can take advantage of a ordered list by doing a binary search. We start by searching in the middle, if it is not the item that we're searching for, we can use the ordered nature of the list to eliminate half of the remaining items.


In [3]:
def binary_search(testlist, query):
    if not testlist:
        return False
    else:
        mid_idx = len(testlist) // 2
        mid = testlist[mid_idx]
        if mid == query:
            return True
        elif query < mid:
            return binary_search(testlist[:mid_idx], query)
        else:
            return binary_search(testlist[mid_idx + 1:], query)

In [4]:
testlist = [0, 1, 2, 8, 13, 17, 19, 32, 42]
query = 19
print(binary_search(testlist, query))
query = 3
print(binary_search(testlist, query))


True
False

Keep in mind that this approach requires sorting the list, which may not be ideal if we're simply going to search for 1 number on the very large list (since we have to first sort the list, which is not a cheap operation).

Hashing

Implementing hash table. Source Hashing.


In [5]:
class HashTable:
    """
    a.k.a python's dictionary
    the initial size of the table has been chosen to 
    be 11, although this number is arbitrary, it's important
    for it to be a prime number so that collision resolution 
    will be efficient; this implementation does not handle
    resizing the hashtable when it runs out of the original size
    """
    def __init__(self):
        # slot will hold the key and data will hold the value 
        self.size = 11
        self.slot = [None] * self.size
        self.data = [None] * self.size
    
    def _put(self, key, value):
        hash_value = self._hash(key)
        if self.slot[hash_value] == None:
            self.slot[hash_value] = key
            self.data[hash_value] = value
        elif self.slot[hash_value] == key:
            # replace the original key value
            self.data[hash_value] = value
        else:
            # rehash to get the next location possible
            # if a collision is to occurr
            next_slot = self._rehash(hash_value)
            slot_value = self.slot[next_slot]
            while slot_value != None and slot_value != key:
                next_slot = self._rehash(next_slot)
                slot_value = self.slot[next_slot]

            if self.slot[next_slot] == None:
                self.slot[next_slot] = key
                self.data[next_slot] = value
            else:
                self.data[next_slot] = value
        
    def _get(self, key):
        data = None
        stop = False
        found = False
        start_slot = self._hash(key)
        next_slot  = start_slot      
        while self.slot[next_slot] != None and not found and not stop:
            if self.slot[next_slot] == key:
                data = self.data[next_slot]
                found = True
            else:
                # if we rehash to the starting value
                # then it means the data is not here
                next_slot = self._rehash(next_slot)
                if next_slot == start_slot:
                    stop = True        
        return data
    
    def _hash(self, key):
        return key % self.size
    
    def _rehash(self, oldhash):
        """
        a simple plus 1 rehash, where we add 1 to
        the original value and hash it again to 
        see if the slot it empty (None)
        """
        return (oldhash + 1) % self.size
    
    def __getitem__(self, key):
        # allow access using``[]`` syntax
        return self._get(key)

    def __setitem__(self, key, value):
        self._put(key, value)

In [6]:
H = HashTable()
H[54] = 'cat'
H[26] = 'dog'
H[93] = 'lion'
H[17] = 'tiger'
H[77] = 'bird'
H[44] = 'goat'
H[55] = 'pig'
print(H.slot)
print(H.data)


[77, 44, 55, None, 26, 93, 17, None, None, None, 54]
['bird', 'goat', 'pig', None, 'dog', 'lion', 'tiger', None, None, None, 'cat']

In [7]:
print(H[55])
print(H[20])


pig
None

Sorting

Merge Sort


In [8]:
def merge_sort(alist):
    if len(alist) > 1:
        mid = len(alist) // 2
        left_half = alist[:mid]
        right_half = alist[mid:]
        merge_sort(left_half)
        merge_sort(right_half)
        
        # loop through the left and right half,
        # compare the value and fill them back
        # to the original list
        i, j, k = 0, 0, 0
        while i < len(left_half) and j < len(right_half):
            if left_half[i] < right_half[j]:
                alist[k] = left_half[i]
                i += 1
            else:
                alist[k] = right_half[j]
                j += 1
            k += 1
        
        # after filling in the sorted value,
        # there will be left-over values on
        # either the left or right half, simply
        # append all the left-over values back
        while i < len(left_half):
            alist[k] = left_half[i]
            i += 1
            k += 1

        while j < len(right_half):
            alist[k] = right_half[j]
            j += 1
            k += 1
            
    return alist

In [9]:
alist = [54, 26, 93, 17, 77]
merge_sort(alist)


Out[9]:
[17, 26, 54, 77, 93]

In [10]:
def quick_sort(alist):
    _sort(alist, 0, len(alist) - 1)
    
def _sort(alist, first, last):
    if first < last:
        split_point = _partition(alist, first, last)
        _sort(alist, first, split_point - 1)
        _sort(alist, split_point + 1, last)
        
def _partition(alist, first, last):
    right = last
    left = first + 1
    pivot_value = alist[first]
    
    # find the split point of the pivot and move all other
    # items to the appropriate side of the list (i.e. if
    # the item is greater than pivot, then it should be
    # on the right hand side and vice versa)
    done = False
    while not done: 
        while left <= right and alist[left] <= pivot_value:
            left += 1
        while alist[right] >= pivot_value and right >= left:
            right -= 1
        
        if right <= left:
            done = True
        else:
            alist[right], alist[left] = alist[left], alist[right]
    
    # swap pivot value to split point
    alist[first], alist[right] = alist[right], alist[first]
    return right

In [11]:
# list sorted in place
alist = [54, 26, 93, 17, 77, 50]
quick_sort(alist)
alist


Out[11]:
[17, 26, 50, 54, 77, 93]