notebook.community

Edit and run



In [17]:

    
# %load main.py
import math
import xml.etree.ElementTree as ET
import pprint



In [18]:

    
CIRCLE_TAG_NAME = '{http://www.w3.org/2000/svg}circle'
GROUP_TAG_NAME = '{http://www.w3.org/2000/svg}g'


pp = pprint.PrettyPrinter(indent=4)



In [19]:

    
def circle_to_point(circle):
    return (float(circle.attrib['cx']),
            float(circle.attrib['cy']))



In [20]:

    
def read_svg_file(svg_file_name):
    return ET.parse(svg_file_name)



In [21]:

    
def get_all_points(tree):
    return [circle_to_point(circle)
            for circle in tree.iter(CIRCLE_TAG_NAME)]



In [22]:

    
def get_point_by_id(tree, point_id):
    return [circle_to_point(circle)
            for circle in tree.iter(CIRCLE_TAG_NAME)
            if 'id' in circle.attrib
            if circle.attrib['id'] == point_id]



In [23]:

    
def get_group_by_id(tree, group_id):
    return [circle
            for group in tree.iter(GROUP_TAG_NAME)
            if 'id' in group.attrib
            if group.attrib['id'] == group_id
            for circle in get_all_points(group)]



In [24]:

    
svg_tree = read_svg_file('./points2.svg')
[pivot] = get_point_by_id(svg_tree, 'pivot')
[closest] = get_point_by_id(svg_tree, 'closest')
points = get_group_by_id(svg_tree, 'points')



In [25]:

    
def distance(point1, point2):
    x1, y1 = point1
    x2, y2 = point2

    dx = x1 - x2
    dy = y1 - y2

    return math.sqrt(dx * dx + dy * dy)



In [26]:

    
def closest_point(all_points, new_point):
    best_point = None
    best_distance = None

    for current_point in all_points:
        current_distance = distance(new_point, current_point)

        if best_distance is None or current_distance < best_distance:
            best_distance = current_distance
            best_point = current_point

    return best_point



In [27]:

    
k = 2

构造平衡kd树算法： https://www.cnblogs.com/21207-iHome/p/6084670.html

输入：k维空间数据集T={x1,x2,...,xN}，其中xi=(x(1)i,x(2)i,...,x(k)i),i=1,2,...,N; 输出：kd树

（1）开始：构造根结点，根结点对应于包含T的k维空间的超矩形区域。选择x(1)为坐标轴，以T中所有实例的x(1)坐标的中位数为切分点，将根结点对应的超矩形区域切分为两个子区域。切分由通过切分点并与坐标轴x(1)垂直的超平面实现。由根结点生成深度为1的左、右子结点：左子结点对应坐标x(1)小于切分点的子区域，右子结点对应于坐标x(1)大于切分点的子区域。将落在切分超平面上的实例点保存在根结点。

（2）重复。对深度为j的结点，选择x(l)为切分的坐标轴，l=j%k+1，以该结点的区域中所有实例的x(l)坐标的中位数为切分点，将该结点对应的超矩形区域切分为两个子区域。切分由通过切分点并与坐标轴x(l)垂直的超平面实现。由该结点生成深度为j+1的左、右子结点：左子结点对应坐标x(l)小于切分点的子区域，右子结点对应坐标x(l)大于切分点的子区域。将落在切分超平面上的实例点保存在该结点。



In [28]:

    
def build_kdtree(points, depth=0):
    n = len(points)

    if n <= 0:
        return None

    axis = depth % k

    sorted_points = sorted(points, key=lambda point: point[axis])

    return {
        'point': sorted_points[n // 2],
        'left': build_kdtree(sorted_points[:n // 2], depth + 1),
        'right': build_kdtree(sorted_points[n//2 + 1:], depth + 1)
    }



In [29]:

    
kdtree = build_kdtree(points)

搜索kd树

　　利用kd树可以省去对大部分数据点的搜索，从而减少搜索的计算量。下面以搜索最近邻点为例加以叙述：给定一个目标点，搜索其最近邻，首先找到包含目标点的叶节点；然后从该叶结点出发，依次回退到父结点；不断查找与目标点最近邻的结点，当确定不可能存在更近的结点时终止。这样搜索就被限制在空间的局部区域上，效率大为提高。

　　用kd树的最近邻搜索：　　输入：已构造的kd树；目标点x；输出：x的最近邻。

（1）在kd树中找出包含目标点x的叶结点：从根结点出发，递归的向下访问kd树。若目标点当前维的坐标值小于切分点的坐标值，则移动到左子结点，否则移动到右子结点。直到子结点为叶结点为止；

（2）以此叶结点为“当前最近点”；

（3）递归的向上回退，在每个结点进行以下操作：

　　（a）如果该结点保存的实例点比当前最近点距目标点更近，则以该实例点为“当前最近点”；

　　（b）当前最近点一定存在于该结点一个子结点对应的区域。检查该子结点的父结点的另一个子结点对应的区域是否有更近的点。具体的，检查另一个子结点对应的区域是否与以目标点为球心、以目标点与“当前最近点”间的距离为半径的超球体相交。如果相交，可能在另一个子结点对应的区域内存在距离目标更近的点，移动到另一个子结点。接着，递归的进行最近邻搜索。如果不相交，向上回退。

（4）当回退到根结点时，搜索结束。最后的“当前最近点”即为x的最近邻点。



In [30]:

    
def kdtree_naive_closest_point(root, point, depth=0, best=None):
    if root is None:
        return best

    axis = depth % k

    next_best = None
    next_branch = None

    if best is None or distance(point, best) > distance(point, root['point']):
        next_best = root['point']
    else:
        next_best = best

    if point[axis] < root['point'][axis]:
        next_branch = root['left']
    else:
        next_branch = root['right']

    return kdtree_naive_closest_point(next_branch, point, depth + 1, next_best)



In [31]:

    
def closer_distance(pivot, p1, p2):
    if p1 is None:
        return p2

    if p2 is None:
        return p1

    d1 = distance(pivot, p1)
    d2 = distance(pivot, p2)

    if d1 < d2:
        return p1
    else:
        return p2



In [32]:

    
def kdtree_closest_point(root, point, depth=0):
    if root is None:
        return None

    axis = depth % k

    next_branch = None
    opposite_branch = None

    if point[axis] < root['point'][axis]:
        next_branch = root['left']
        opposite_branch = root['right']
    else:
        next_branch = root['right']
        opposite_branch = root['left']

    best = closer_distance(point,
                           kdtree_closest_point(next_branch,
                                                point,
                                                depth + 1),
                           root['point'])

    if distance(point, best) > abs(point[axis] - root['point'][axis]):
        best = closer_distance(point,
                               kdtree_closest_point(opposite_branch,
                                                    point,
                                                    depth + 1),
                               best)

    return best



In [ ]: