In [1]:
from jupyterthemes import get_themes
from jupyterthemes.stylefx import set_nb_theme
themes = get_themes()
set_nb_theme(themes[1])
Out[1]:
In [2]:
%load_ext watermark
%watermark -a 'Ethen' -d -t -v -p jupyterthemes
Following the online book, Problem Solving with Algorithms and Data Structures. Chapter 5 introduces methods for searching and sorting numbers.
We can take advantage of a ordered list by doing a binary search. We start by searching in the middle, if it is not the item that we're searching for, we can use the ordered nature of the list to eliminate half of the remaining items.
In [3]:
def binary_search(testlist, query):
if not testlist:
return False
else:
mid_idx = len(testlist) // 2
mid = testlist[mid_idx]
if mid == query:
return True
elif query < mid:
return binary_search(testlist[:mid_idx], query)
else:
return binary_search(testlist[mid_idx + 1:], query)
In [4]:
testlist = [0, 1, 2, 8, 13, 17, 19, 32, 42]
query = 19
print(binary_search(testlist, query))
query = 3
print(binary_search(testlist, query))
Keep in mind that this approach requires sorting the list, which may not be ideal if we're simply going to search for 1 number on the very large list (since we have to first sort the list, which is not a cheap operation).
Implementing hash table. Source Hashing.
In [5]:
class HashTable:
"""
a.k.a python's dictionary
the initial size of the table has been chosen to
be 11, although this number is arbitrary, it's important
for it to be a prime number so that collision resolution
will be efficient; this implementation does not handle
resizing the hashtable when it runs out of the original size
"""
def __init__(self):
# slot will hold the key and data will hold the value
self.size = 11
self.slot = [None] * self.size
self.data = [None] * self.size
def _put(self, key, value):
hash_value = self._hash(key)
if self.slot[hash_value] == None:
self.slot[hash_value] = key
self.data[hash_value] = value
elif self.slot[hash_value] == key:
# replace the original key value
self.data[hash_value] = value
else:
# rehash to get the next location possible
# if a collision is to occurr
next_slot = self._rehash(hash_value)
slot_value = self.slot[next_slot]
while slot_value != None and slot_value != key:
next_slot = self._rehash(next_slot)
slot_value = self.slot[next_slot]
if self.slot[next_slot] == None:
self.slot[next_slot] = key
self.data[next_slot] = value
else:
self.data[next_slot] = value
def _get(self, key):
data = None
stop = False
found = False
start_slot = self._hash(key)
next_slot = start_slot
while self.slot[next_slot] != None and not found and not stop:
if self.slot[next_slot] == key:
data = self.data[next_slot]
found = True
else:
# if we rehash to the starting value
# then it means the data is not here
next_slot = self._rehash(next_slot)
if next_slot == start_slot:
stop = True
return data
def _hash(self, key):
return key % self.size
def _rehash(self, oldhash):
"""
a simple plus 1 rehash, where we add 1 to
the original value and hash it again to
see if the slot it empty (None)
"""
return (oldhash + 1) % self.size
def __getitem__(self, key):
# allow access using``[]`` syntax
return self._get(key)
def __setitem__(self, key, value):
self._put(key, value)
In [6]:
H = HashTable()
H[54] = 'cat'
H[26] = 'dog'
H[93] = 'lion'
H[17] = 'tiger'
H[77] = 'bird'
H[44] = 'goat'
H[55] = 'pig'
print(H.slot)
print(H.data)
In [7]:
print(H[55])
print(H[20])
In [8]:
def merge_sort(alist):
if len(alist) > 1:
mid = len(alist) // 2
left_half = alist[:mid]
right_half = alist[mid:]
merge_sort(left_half)
merge_sort(right_half)
# loop through the left and right half,
# compare the value and fill them back
# to the original list
i, j, k = 0, 0, 0
while i < len(left_half) and j < len(right_half):
if left_half[i] < right_half[j]:
alist[k] = left_half[i]
i += 1
else:
alist[k] = right_half[j]
j += 1
k += 1
# after filling in the sorted value,
# there will be left-over values on
# either the left or right half, simply
# append all the left-over values back
while i < len(left_half):
alist[k] = left_half[i]
i += 1
k += 1
while j < len(right_half):
alist[k] = right_half[j]
j += 1
k += 1
return alist
In [9]:
alist = [54, 26, 93, 17, 77]
merge_sort(alist)
Out[9]:
In [10]:
def quick_sort(alist):
_sort(alist, 0, len(alist) - 1)
def _sort(alist, first, last):
if first < last:
split_point = _partition(alist, first, last)
_sort(alist, first, split_point - 1)
_sort(alist, split_point + 1, last)
def _partition(alist, first, last):
right = last
left = first + 1
pivot_value = alist[first]
# find the split point of the pivot and move all other
# items to the appropriate side of the list (i.e. if
# the item is greater than pivot, then it should be
# on the right hand side and vice versa)
done = False
while not done:
while left <= right and alist[left] <= pivot_value:
left += 1
while alist[right] >= pivot_value and right >= left:
right -= 1
if right <= left:
done = True
else:
alist[right], alist[left] = alist[left], alist[right]
# swap pivot value to split point
alist[first], alist[right] = alist[right], alist[first]
return right
In [11]:
# list sorted in place
alist = [54, 26, 93, 17, 77, 50]
quick_sort(alist)
alist
Out[11]: