In [17]:
# %load main.py
import math
import xml.etree.ElementTree as ET
import pprint
In [18]:
CIRCLE_TAG_NAME = '{http://www.w3.org/2000/svg}circle'
GROUP_TAG_NAME = '{http://www.w3.org/2000/svg}g'
pp = pprint.PrettyPrinter(indent=4)
In [19]:
def circle_to_point(circle):
return (float(circle.attrib['cx']),
float(circle.attrib['cy']))
In [20]:
def read_svg_file(svg_file_name):
return ET.parse(svg_file_name)
In [21]:
def get_all_points(tree):
return [circle_to_point(circle)
for circle in tree.iter(CIRCLE_TAG_NAME)]
In [22]:
def get_point_by_id(tree, point_id):
return [circle_to_point(circle)
for circle in tree.iter(CIRCLE_TAG_NAME)
if 'id' in circle.attrib
if circle.attrib['id'] == point_id]
In [23]:
def get_group_by_id(tree, group_id):
return [circle
for group in tree.iter(GROUP_TAG_NAME)
if 'id' in group.attrib
if group.attrib['id'] == group_id
for circle in get_all_points(group)]
In [24]:
svg_tree = read_svg_file('./points2.svg')
[pivot] = get_point_by_id(svg_tree, 'pivot')
[closest] = get_point_by_id(svg_tree, 'closest')
points = get_group_by_id(svg_tree, 'points')
In [25]:
def distance(point1, point2):
x1, y1 = point1
x2, y2 = point2
dx = x1 - x2
dy = y1 - y2
return math.sqrt(dx * dx + dy * dy)
In [26]:
def closest_point(all_points, new_point):
best_point = None
best_distance = None
for current_point in all_points:
current_distance = distance(new_point, current_point)
if best_distance is None or current_distance < best_distance:
best_distance = current_distance
best_point = current_point
return best_point
In [27]:
k = 2
输入:k维空间数据集T={x1,x2,...,xN},其中xi=(x(1)i,x(2)i,...,x(k)i),i=1,2,...,N; 输出:kd树
(1)开始:构造根结点,根结点对应于包含T的k维空间的超矩形区域。选择x(1)为坐标轴,以T中所有实例的x(1)坐标的中位数为切分点,将根结点对应的超矩形区域切分为两个子区域。切分由通过切分点并与坐标轴x(1)垂直的超平面实现。由根结点生成深度为1的左、右子结点:左子结点对应坐标x(1)小于切分点的子区域,右子结点对应于坐标x(1)大于切分点的子区域。将落在切分超平面上的实例点保存在根结点。
(2)重复。对深度为j的结点,选择x(l)为切分的坐标轴,l=j%k+1,以该结点的区域中所有实例的x(l)坐标的中位数为切分点,将该结点对应的超矩形区域切分为两个子区域。切分由通过切分点并与坐标轴x(l)垂直的超平面实现。由该结点生成深度为j+1的左、右子结点:左子结点对应坐标x(l)小于切分点的子区域,右子结点对应坐标x(l)大于切分点的子区域。将落在切分超平面上的实例点保存在该结点。
In [28]:
def build_kdtree(points, depth=0):
n = len(points)
if n <= 0:
return None
axis = depth % k
sorted_points = sorted(points, key=lambda point: point[axis])
return {
'point': sorted_points[n // 2],
'left': build_kdtree(sorted_points[:n // 2], depth + 1),
'right': build_kdtree(sorted_points[n//2 + 1:], depth + 1)
}
In [29]:
kdtree = build_kdtree(points)
利用kd树可以省去对大部分数据点的搜索,从而减少搜索的计算量。下面以搜索最近邻点为例加以叙述:给定一个目标点,搜索其最近邻,首先找到包含目标点的叶节点;然后从该叶结点出发,依次回退到父结点;不断查找与目标点最近邻的结点,当确定不可能存在更近的结点时终止。这样搜索就被限制在空间的局部区域上,效率大为提高。
用kd树的最近邻搜索: 输入: 已构造的kd树;目标点x; 输出:x的最近邻。
(1) 在kd树中找出包含目标点x的叶结点:从根结点出发,递归的向下访问kd树。若目标点当前维的坐标值小于切分点的坐标值,则移动到左子结点,否则移动到右子结点。直到子结点为叶结点为止;
(2) 以此叶结点为“当前最近点”;
(3) 递归的向上回退,在每个结点进行以下操作:
(a) 如果该结点保存的实例点比当前最近点距目标点更近,则以该实例点为“当前最近点”;
(b) 当前最近点一定存在于该结点一个子结点对应的区域。检查该子结点的父结点的另一个子结点对应的区域是否有更近的点。具体的,检查另一个子结点对应的区域是否与以目标点为球心、以目标点与“当前最近点”间的距离为半径的超球体相交。如果相交,可能在另一个子结点对应的区域内存在距离目标更近的点,移动到另一个子结点。接着,递归的进行最近邻搜索。如果不相交,向上回退。
(4) 当回退到根结点时,搜索结束。最后的“当前最近点”即为x的最近邻点。
In [30]:
def kdtree_naive_closest_point(root, point, depth=0, best=None):
if root is None:
return best
axis = depth % k
next_best = None
next_branch = None
if best is None or distance(point, best) > distance(point, root['point']):
next_best = root['point']
else:
next_best = best
if point[axis] < root['point'][axis]:
next_branch = root['left']
else:
next_branch = root['right']
return kdtree_naive_closest_point(next_branch, point, depth + 1, next_best)
In [31]:
def closer_distance(pivot, p1, p2):
if p1 is None:
return p2
if p2 is None:
return p1
d1 = distance(pivot, p1)
d2 = distance(pivot, p2)
if d1 < d2:
return p1
else:
return p2
In [32]:
def kdtree_closest_point(root, point, depth=0):
if root is None:
return None
axis = depth % k
next_branch = None
opposite_branch = None
if point[axis] < root['point'][axis]:
next_branch = root['left']
opposite_branch = root['right']
else:
next_branch = root['right']
opposite_branch = root['left']
best = closer_distance(point,
kdtree_closest_point(next_branch,
point,
depth + 1),
root['point'])
if distance(point, best) > abs(point[axis] - root['point'][axis]):
best = closer_distance(point,
kdtree_closest_point(opposite_branch,
point,
depth + 1),
best)
return best
In [ ]: