In [7]:
from simple_vp import *
import time
import matplotlib.pyplot as plt
%matplotlib inline
In [26]:
def main():
rn = lambda: random.randint(0, 10000)
aset = [(rn(), rn()) for i in range(40000)]
q = (rn(), rn())
rad = 9990
distance = lambda a, b: math.sqrt(sum([((x-y)**2) for x, y in zip(a, b)]))
s = time.time()
print("creating...")
root = VpNode(aset, distance=distance)
print("done", time.time() - s)
s = time.time()
print("searching...")
se = VpSearch(root, q, rad, 30)
#out = se.search()
out = se.knn()
for k, v in sorted(se.stat.items()):
print(k, v)
print("out: %s" % len(out))
print("done", time.time() - s)
try:
projx = lambda x: map(lambda y: y[0], x)
projy = lambda x: map(lambda y: y[1], x)
fig, ax = plt.subplots()
ax.scatter(list(projx(aset)), list(projy(aset)), s = 20, alpha=0.1)
ax.scatter([q[0]], [q[1]], s = 40, color='g')
ax.scatter(list(projx(out)), list(projy(out)), s = 10, color='r')
ax.annotate("query", xy=q)
plt.show()
except:
pass
if __name__ == '__main__':
main()
In [56]:
def levenshtein(a,b):
"Calculates the Levenshtein distance between a and b."
n, m = len(a), len(b)
if n > m:
# Make sure n <= m, to use O(min(n,m)) space
a,b = b,a
n,m = m,n
current = range(n+1)
for i in range(1,m+1):
previous, current = current, [i]+[0]*n
for j in range(1,n+1):
add, delete = previous[j]+1, current[j-1]+1
change = previous[j-1]
if a[j-1] != b[i-1]:
change = change + 1
current[j] = min(add, delete, change)
return current[n]
def main(file_name):
f = open(file_name)
next(f)
aset = [w.strip() for w in f]
rad = 4.4
distance = levenshtein
s = time.time()
print("\ninput set %s points" % len(aset))
print("creating tree...")
root = VpNode(aset, distance=distance)
print("created: %s nodes" % VpNode.ids)
print("done in s: %s" % (time.time() - s))
print("searching...")
while True:
q = input(">>")
s = time.time()
se = VpSearch(root, q, rad, 10)
out = se.knn()
print(se.stat)
print("founded %s results:" % len(out))
count = 1
for i in out:
print('result {}: {}'.format(count, i))
count += 1
print("\n".join(out))
print("done in s: %s" % (time.time() - s))
In [ ]:
main('README.md')
In [ ]: