Search function

  • Input: a sorted list and a search term
  • Operation: find the element in the list
  • Output: the location of the item in the list or indicate if it's not there

In [1]:
# (Sort function, used before the search algorithm)

from numpy import random as rd

def sort_list(list_to_sort):
    if not isinstance(list_to_sort, list): # Not a list error
        raise NameError('Error: arg shouls be a list.')
    if len(list_to_sort) < 2: # empty or 1-element list
        return list_to_sort
    new_list = []
    new_list.append(list_to_sort[0])
    
    for i in list_to_sort[1:]:
        if i > new_list[-1]: # if biggest number, append it and continue
            new_list.append(i)
            continue
            
        elif len(new_list) < 10: # new list (still) small, doesn't need optimisation
            for j in range(0, len(new_list)):
                if i <= new_list[j]: # if smaller or equal than element at index j, insert before j
                    new_list.insert(j, i)
                    break
                    
        else: # search for the middle, ...
            left_boundary = 0
            right_boundary = len(new_list)-1
            positionFound = False
            while positionFound == False:
                # mid ~= median of new list
                mid = int(left_boundary + (right_boundary - left_boundary) / 2)

                if right_boundary - left_boundary < 2:
                    new_list.insert(right_boundary, i)
                    positionFound = True
                    break
                elif i == new_list[mid]:
                    new_list.insert(mid, i)
                    positionFound = True
                    break

                elif i > new_list[mid]: # => narrow to the right
                    left_boundary = mid
                elif i < new_list[mid]: # => narrow to the left
                    right_boundary = mid                    

    return new_list
    

sample_lists = []
ranges = [10, 100, 1000]

for r in ranges:
    sample_lists.append((rd.rand(1, r)*100)[0].tolist())

In [2]:
'''
Running test with a sorted list of 10 items. Needle:  93.8485227549
CPU times: user 8 µs, sys: 1 µs, total: 9 µs
Wall time: 11 µs

Running test with a sorted list of 100 items. Needle:  3.117958693
CPU times: user 9 µs, sys: 0 ns, total: 9 µs
Wall time: 12.2 µs

Running test with a sorted list of 1000 items. Needle:  60.6537886941
CPU times: user 11 µs, sys: 0 ns, total: 11 µs
Wall time: 12.9 µs
'''

def find_value_index(sorted_list, needle):
    found = False
    left_boundary = 0
    right_boundary = len(sorted_list)-1

    while found == False:
        mid = int(left_boundary + (right_boundary - left_boundary) / 2) # quadtree! (sort of)
        if needle > sorted_list[mid]:
            left_boundary = mid
        elif needle == sorted_list[mid]:
            return mid
            found = True
        else:
            right_boundary = mid
            
        if right_boundary - left_boundary < 2:
            if needle == sorted_list[right_boundary]: # because we never ceil to get the mid
                return mid
            else:
                found = False
                if mid - 1 > 0:
                    print("Not found. Closest values:", sorted_list[mid-1], sorted_list[mid])
                else:
                    print("Not found. Closest value:", sorted_list[mid])
                return False

        
sample_sorted_lists = []
sample_needles = []

for sample in sample_lists:
    sample_sorted_lists.append(sort_list(sample))

for sample in sample_sorted_lists:
    needle = rd.choice(sample)
    print("\nRunning test with a sorted list of", len(sample), "items. Needle: ", needle)
    
    %time find_value_index(sample, needle)


Running test with a sorted list of 10 items. Needle:  38.9287594465
CPU times: user 7 µs, sys: 1 µs, total: 8 µs
Wall time: 10 µs

Running test with a sorted list of 100 items. Needle:  72.4389192771
CPU times: user 7 µs, sys: 1e+03 ns, total: 8 µs
Wall time: 11 µs

Running test with a sorted list of 1000 items. Needle:  13.7151070323
CPU times: user 11 µs, sys: 0 ns, total: 11 µs
Wall time: 13.1 µs