In [7]:
    
from simple_vp import *
import time
import matplotlib.pyplot as plt
%matplotlib inline
    
In [26]:
    
def main():
    rn = lambda: random.randint(0, 10000)
    aset = [(rn(), rn()) for i in range(40000)]
    q = (rn(), rn())
    rad = 9990
    distance = lambda a, b: math.sqrt(sum([((x-y)**2) for x, y in zip(a, b)]))
    s = time.time()
    print("creating...")
    root = VpNode(aset, distance=distance)
    print("done", time.time() - s)
    s = time.time()
    print("searching...")
    se = VpSearch(root, q, rad, 30)
    #out = se.search()
    out = se.knn()
    for k, v in sorted(se.stat.items()):
        print(k, v)
    print("out: %s" % len(out))
    print("done", time.time() - s)
    try:
        projx = lambda x: map(lambda y: y[0], x)
        projy = lambda x: map(lambda y: y[1], x)
        fig, ax = plt.subplots()
        ax.scatter(list(projx(aset)), list(projy(aset)), s = 20, alpha=0.1)
        ax.scatter([q[0]], [q[1]], s = 40, color='g')
        ax.scatter(list(projx(out)), list(projy(out)), s = 10, color='r')
        ax.annotate("query", xy=q)
        plt.show()
    except:
        pass
if __name__ == '__main__':
    main()
    
    
    
    
In [56]:
    
def levenshtein(a,b):
    "Calculates the Levenshtein distance between a and b."
    n, m = len(a), len(b)
    if n > m:
        # Make sure n <= m, to use O(min(n,m)) space
        a,b = b,a
        n,m = m,n
    current = range(n+1)
    for i in range(1,m+1):
        previous, current = current, [i]+[0]*n
        for j in range(1,n+1):
            add, delete = previous[j]+1, current[j-1]+1
            change = previous[j-1]
            if a[j-1] != b[i-1]:
                change = change + 1
            current[j] = min(add, delete, change)
    return current[n]
def main(file_name):
    f = open(file_name)
    next(f)
    aset = [w.strip() for w in f]
    rad = 4.4
    distance = levenshtein
    s = time.time()
    print("\ninput set %s points" % len(aset))
    print("creating tree...")
    root = VpNode(aset, distance=distance)
    print("created: %s nodes" % VpNode.ids)
    print("done in s: %s" % (time.time() - s))
    print("searching...")
    while True:
        q = input(">>")
        s = time.time()
        se = VpSearch(root, q, rad, 10)
        out = se.knn()
        print(se.stat)
        print("founded %s results:" % len(out))
        
        count = 1
        for i in out:
            print('result {}: {}'.format(count, i))
            count += 1
        print("\n".join(out))
        print("done in s: %s" % (time.time() - s))
    
In [ ]:
    
main('README.md')
    
    
In [ ]: