In [1]:
import sys, math, random, logging, time
import numpy as np
import pandas as pd
exinp = """1324
3415436
1023422
03517
3555""".split()
debug = True
logging.basicConfig(level=logging.INFO)
log = logging.getLogger('sorttest')
In [2]:
class MinLoc(object):
def __init__(self):
self.num = 20
self.ind = -1
def count_sort(nums, reverse=False):
counts = [0]*10
minevn = MinLoc()
minodd = MinLoc()
for i, n in enumerate(nums):
counts[n] += 1
if n % 2 == 0 and n < minevn.num:
minevn.num = n
minevn.ind = i
elif n % 2 == 1 and n < minodd.num:
minodd.num = n
minodd.ind = i
minodd.num += 10
minind = min(minevn, minodd, key=lambda a: a.num)
counts[minind.num % 10] -= 1
nums2 = list(nums)
mn = nums2.pop(minind.ind)
sm = 0
if reverse:
for n in range(10):
i = 10 - (n+1)
sm += counts[i]
counts[i] = sm
else:
for n in range(10):
sm += counts[n]
counts[n] = sm
log.debug("counts: %s", counts)
log.debug("nums2: %s", nums2)
ret = [0]*len(nums2)
for n in nums2:
ret[counts[n] - 1] = n
counts[n] -= 1
if reverse:
ret.append(mn)
else:
ret.insert(0, mn)
log.debug('ret: %s', ret)
return ret
In [3]:
def count_sort_simpler(nums, dig=1):
counts = [0]*10
a = 10**(dig-1)
for n in nums:
d = (n // a)%10
counts[d] += 1
sm = 0
for n in range(9):
sm += counts[n]
counts[n] = sm
counts[9] = 0
ret = [0]*len(nums)
for n in nums:
d = (n // a)%10 - 1
ret[counts[d]] = n
counts[d] += 1
return ret
def radix_sort(nums, digs):
for i in range(digs):
nums = count_sort_simpler(nums, i+1)
return nums
In [4]:
for ex in exinp:
exar = [int(n) for n in ex]
numarr = count_sort_simpler(exar)
num = sum(c*10**i for i,c in enumerate(numarr))
print(num)
arr = [random.randrange(10000) for _ in range(100)]
sarr = radix_sort(arr, 4)
In [5]:
def quicksort(arr):
_qs(arr, 0, len(arr) - 1)
return arr
def _qs(arr, start, pivot):
if (start >= pivot):
return
# i is the position to switch pivot to at the end,
# anything before it is below pivot
i = start
for j in range(start, pivot):
if arr[j] < arr[pivot]:
arr[j], arr[i] = arr[i], arr[j]
i += 1
arr[i], arr[pivot] = arr[pivot], arr[i]
_qs(arr, start, i-1)
_qs(arr, i+1, pivot)
In [6]:
arr = np.random.randn(1000)
arr[0:5]
quicksort(arr)
print("Head:", arr[0:10], ", tail:", arr[-10:])
print(quicksort([2,1]))
print(quicksort([3,2,1]))
print(quicksort([0,1]))
print(quicksort([5,5,3,5,5]))
print(quicksort(np.random.randn(10)))
In [7]:
def mergesort(arr):
ret = _ms(arr, 0, len(arr) - 1)
return ret
def _ms(arr, start, end):
if end > start:
mid = (end + start)//2
arr1 = _ms(arr, start, mid)
arr2 = _ms(arr, mid + 1, end)
i,j = 0, 0
ret = []
while i <= (mid - start) and j <= (end - mid - 1):
if arr1[i] <= arr2[j]:
ret.append(arr1[i])
i += 1
else:
ret.append(arr2[j])
j += 1
if i <= (mid - start):
ret.extend(arr1[i:])
else:
ret.extend(arr2[j:])
return ret
else:
return [arr[start]]
In [8]:
def test_sort(arr):
lastel = arr[0]
for a in arr:
if lastel > a:
raise Exception("array is not actually sorted")
lastel = a
In [9]:
print(mergesort(np.random.randn(10)))
test_sort(mergesort(np.random.randn(100000)));
test_sort(quicksort(np.random.randn(100000)))
test_sort(sorted(np.random.randn(100000)))
In [10]:
def longest_proper_suffix(st):
lps = [0]*len(st)
l = 0
for i in range(1, len(st)):
if st[l] == st[i]:
l += 1
else:
while l > 0 and st[0:l] != st[i-l+1:i+1]:
l -= 1
lps[i] = l
return lps
print(longest_proper_suffix('abaaa'))
print(longest_proper_suffix('myprefixmypre'))
print(longest_proper_suffix('aaababaaabababababbbababaaaabb'))
print(longest_proper_suffix('aaaaa'))
print(longest_proper_suffix('ababababababa'))
In [11]:
st = 'aaabaabaaa'
pt = 'abaaa'
log.setLevel(logging.INFO)
def string_search_kmp(st, pt):
lps = longest_proper_suffix(pt)
log.debug("suffixes: %s", lps)
sti, pti, matches = 0, 0, []
pl, sl = len(pt), len(st)
while sti - pti <= sl - pl:
log.debug("str index, pattern index: [%d, %d]", sti, pti)
for i in range(pti, pl):
if st[sti+i] != pt[i]:
log.debug("index %d in string doesn't match %d in pattern, exiting",
sti+i, i)
log.debug("matching pref length: %d", lps[i-1])
pti = 0 if i == 0 else lps[i-1]
sti += 1 if i == 0 else (i-pti)
break
if i + 1 == pl:
log.debug("adding a match starting at place: %d", sti)
matches.append(sti)
pti = lps[i]
sti += (i+1-pti)
return matches
print(string_search_kmp(st, pt))
In [12]:
log.setLevel(logging.INFO)
print(string_search_kmp("AABAACAADAABAABA", "AABA"))
print(string_search_kmp("AAAAAAAAAAAAAAAAAB", "AAAAB"))
print(string_search_kmp("ABABABCABABABCABABABC", "ABABAC"))
# log.setLevel(logging.DEBUG)
print(string_search_kmp("AAAAABAAABA" , "AAAA"))
In [13]:
def fib_search(arr, x):
al = len(arr)
# avoid worst case
if arr[-1] == x:
return al - 1
elif arr[-1] < x or arr[0] > x:
return None
elif arr[0] == x:
return 0
fibs, lf, fib_i = [1,1], 1, 1
while lf < al:
lf = fibs[fib_i] + fibs[fib_i-1]
fibs.append(lf)
fib_i += 1
log.debug("Fibs: %s", fibs)
return _fibsearch(arr, al, 0, fib_i-2, fibs, x)
def _fibsearch(arr, len_arr, start, fib_i, fibs, x):
if fib_i < 0:
return start if arr[start] == x else None
arr_i_mid = min(len_arr - 1, fibs[fib_i] + start)
arr_v_mid = arr[arr_i_mid]
if x == arr_v_mid:
return arr_i_mid
elif x > arr_v_mid:
log.debug("Index value %d is less than x, reducing fib index to %d, setting start ind to %d",
arr_i_mid, fib_i - 1, arr_i_mid)
return _fibsearch(arr, len_arr, arr_i_mid, fib_i - 1, fibs, x)
else:
log.debug("Index value %d is greater than x, reducing fib index to %d, setting start ind to %d",
arr_i_mid, fib_i - 2, start)
return _fibsearch(arr, len_arr, start, fib_i - 2, fibs, x)
In [14]:
log.setLevel(logging.DEBUG)
ra = sorted(np.random.randint(100, size=101))
print(ra)
search_val = 20
ri = fib_search(ra, search_val)
print(ri)
try:
# assert ri == ra.index(10), f"found index does not match value from .index call: {ri}"
assert ra[ri] == search_val
except TypeError:
assert search_val not in ra
print(fib_search([1,3,4,5,7,18,88], 1))
In [15]:
def find_instances(arr, num):
la = len(arr)
l, r = -1, la
return [i for i in range(
_findlower(arr, num, l, r) + 1,
_findupper(arr, num, l, r))]
def _findlower(arr, num, l, r):
log.debug("find limit: l:%d, r:%d", l, r)
if l == r:
return l
m = math.ceil((l + r)/2)
if arr[m] < num:
return _findlower(arr, num, m, r)
else:
return _findlower(arr, num, l, m-1)
def _findupper(arr, num, l, r):
log.debug("find limit: l:%d, r:%d", l, r)
if l == r:
return l
m = math.floor((l + r)/2)
if arr[m] > num:
return _findupper(arr, num, l, m)
else:
return _findupper(arr, num, m+1, r)
In [16]:
log.setLevel(logging.DEBUG)
print(find_instances([3,3,5,5,5,5,7], 5))
print(find_instances([3,3,5,5,5,5,7], 4))
In [17]:
level = 0
def unsorted_search(arr, num):
global level
level = 0
return _us(arr, num, 0, len(arr)-1, 0)
def _us(arr, num, l, r, lev):
global level
level = max(lev, level)
if l > r:
return -1
m = (l+r)//2
if arr[m] == num:
return m
else:
step1 = _us(arr, num, l, m-1, lev+1)
if step1 == -1:
step2 = _us(arr, num, m+1, r, lev+1)
return max(step1, step2)
else:
return step1
def recSearch( arr, x):
global level
level = 0
return _recSearch(arr, 0, len(arr)-1, x, 0)
def _recSearch( arr, l, r, x, lev):
global level
level = max(level, lev)
if r < l:
return -1
if arr[l] == x:
return l
if arr[r] == x:
return r
return _recSearch(arr, l+1, r-1, x, lev+1)
In [20]:
la = np.random.randint(10000, size=990000)
# log.debug("array: %s", la)
times = [time.time()]
print(unsorted_search(la, 50))
print("max level:", level)
times.append(time.time())
try:
print(recSearch(la, 50))
except RecursionError:
print('Execution failed with recursion error')
print("max level:", level)
times.append(time.time())
df = pd.DataFrame({'start': times[0:2], 'end': times[1:3]}, index=['recursive', 'linear'])
df -= times[0]
df['diff'] = df['end'] - df['start']
df
Out[20]:
In [29]:
def contains(st, pat):
pl = len(pat)
end = len(st) - pl
if end < 0:
return -1
return _contains(st, pat, 0, end, pl)
def _contains(st, pat, l, r, pl):
if l > r:
return -1
m = (l+r)//2
if _test(st, pat, m, 0, pl):
return m
else:
return (_contains(st, pat, l, m-1, pl) or
_contains(st, pat, m+1, r, pl))
def _test(st, pat, sti, pti, lng):
log.debug("testing indices: string: %d, pattern: %d", sti, pti)
if pti == lng:
return True
elif st[sti] != pat[pti]:
return False
else:
return _test(st, pat, sti+1, pti+1, lng)`
In [31]:
s = "THIS IS A TEST TEXT"
p = "TEST"
print(contains(s, p))
s = "geeksforgeeks"
p = "quiz"
print(contains(s, p))
In [33]:
np.random.randint(50, size=5)
Out[33]: