In [3]:
import random
In [4]:
# Input: Number and a list of numbers that comes sorted
#(in case it would not, just make the sort algorithm part of the below function)
# Output: Index of the number that was put in
In [5]:
# pseudocode:
# iterate over the input list
# each time, compare the number for the searched one
# if they are identical
# check for the position of that element within the list
# if there's no match
# return 'not found'
In [44]:
def search_function6(chosen_number, input_list):
#not necessary in this case, because list already sorted, but in case it would not be:
input_list.sort()
if chosen_number >= input_list[0] and chosen_number <= input_list[int(len(input_list)-1)]:
end_number = input_list[int(len(input_list)-1)]
start_number = input_list[0]
sub = 1
while chosen_number < end_number:
sub = sub + 1
end_number = input_list[int(len(input_list)-sub)]
start_number = input_list[0+sub]
if end_number == chosen_number:
print(int(len(input_list))-(sub))
else:
print("Number not found in list")
In [ ]:
# In a previous code version for the search function, I tested only lowering the end_number, without increasing the start_number at the same time.
# However as the version that alters both variables at once performed better, timewise
#(I guess, because the range to search through decreases quicker than if you only alter the upper boundary)
# I chose the code with one more variable over that with one less.
In [51]:
# http://stackoverflow.com/questions/14608015/how-to-check-if-a-specific-integer-is-in-a-list
# To not only compare the runtime of the code with differently-sized datasets, but also to a code that has uses
# the index function I wrote this snippet to compare to as well
def search_function_with_index(chosen_number2, input_list2):
if chosen_number2 in input_list2:
print(input_list2.index(chosen_number2))
else:
print("Number not within the list")
In [52]:
test_list_10 = list(range(1,11))
test_list_100 = list(range(1,101))
test_list_1000 = list(range(1,1001))
In [54]:
%time search_function_with_index(2, test_list_10)
%time search_function6(2, test_list_10)
# I'm surprised that my self-written code runs faster (58 microseconds)
# than the one that uses the pre-set index function (95 microseconds)!!!
In [65]:
%time search_function_with_index(74, test_list_100)
%time search_function6(74, test_list_100)
# This time, I'm surprised that for both codes -- irrespective of being self-written or not -- the runtime is faster
# than in the previous test, although the dataset is bigger this time.
# I also tested by searching for 34, where my code was faster (mine:37 microseconds vs with_index: 48 microseconds)
# And for 2, where my code performs worse than the other one (105 microseconds vs 60)
# As well as for 91, where my code performs better (16 microseconds vs 33)
# I"m surprised about this inconsistency. I would have expected that having a number closer to the start (2)
# or end (91) would lead in both cases to one code being more efficient than the other
# rather than having one perform better on the lower and and one perform better on the upper end
In [68]:
%time search_function_with_index(931, test_list_1000)
%time search_function6(931, test_list_1000)
# Test searching for 2: the with_index one runs faster (30 vs 410 microseconds)
# Test searching for 501: the with_index one runs faster (49 vs 215 microseconds)
# Test searching for 931: the with_index one runs faster (90 vs 118 microseconds)
# It's makes sense that both codes need longer to run over a 1000 element dataset than
# over the previous 100 element dataset.
# Contrary to the runtime tests for the sorting function, it seems that for the my search function there's a
# possible correlation between dataset-size and runtime, given the fact that the runtime for the 1000-element
# dataset is roughly ten times longer than the runtime for the 100-element dataset.
# However now I'm even more confused why in previous cases my code was more efficient than the one using the
# preset index function
In [ ]: