In [ ]:
import itertools
import random
import numpy as np
from scipy.spatial import distance

from scipy.spatial.distance import cdist
from scipy.optimize import linear_sum_assignment

In [ ]:
def minimization_function(list1, list2):
    return sum(np.diag(distance.cdist(list1, list2)))

def find_minimal_mapping(p0, p1):
    min_score = np.inf
    min_score_idx = -1
    min_permutation = p1
    idx = 0
    for perm in itertools.permutations(p1):
        score = minimization_function(perm, p0)
        if score < min_score:
            min_score = score
            min_score_idx = idx
            min_permutation = perm
            if score == 0:
                break
        idx += 1
    return min_score, min_score_idx, min_permutation

In [ ]:
from math import factorial

def lexicographic_index(p):
    """
    Return the lexicographic index of the permutation `p` among all
    permutations of its elements. `p` must be a sequence and all elements
    of `p` must be distinct.

    >>> lexicographic_index('dacb')
    19
    >>> from itertools import permutations
    >>> all(lexicographic_index(p) == i
    ...     for i, p in enumerate(permutations('abcde')))
    True
    """
    result = 0
    for j in range(len(p)):
        k = sum(1 for i in p[j + 1:] if i < p[j])
        result += k * factorial(len(p) - j - 1)
    return result

def scipy_find_minimal_mapping(p0, p1):
    C = cdist(p0, p1)

    _, assignment = linear_sum_assignment(C)

    p1_reordered = [p1[idx] for idx in assignment]
    return minimization_function(p0, p1_reordered), lexicographic_index(assignment), p1_reordered

In [ ]:
def generate_random_test_points(number_of_points=5, dimension=2, shuffle_points=True, noise=(1.0 / 20.0),
                                num_rerandomed_points=1):
    correct_points = [[random.random() for _ in range(dimension)] for _ in range(0, number_of_points)]
    offsets = [[(random.random()) * noise for _ in range(dimension)] for _ in range(0, number_of_points)]
    input_points = np.array(correct_points) + np.array(offsets)

    if shuffle_points:
        perms = list(itertools.permutations(input_points))
        input_points = perms[random.randint(0, len(perms) - 1)]

    if num_rerandomed_points > len(input_points):  # Bound the number of randomized points to the number of points
        num_rerandomed_points = len(input_points)
    # Generate a random sampling of point indicies
    indicies = random.sample(range(0, len(input_points)), num_rerandomed_points)
    # Loop through each index and re-randomize each element in the point
    for index in indicies:
        for idx in range(len(input_points[index])):
            input_points[index][idx] = random.random()

    return np.array(correct_points).tolist(), np.array(input_points).tolist()

In [ ]:
import time

for n in range(2, 10):
    start_time = time.time()
    scipy_find_minimal_mapping(*generate_random_test_points(number_of_points=n))
    duration = time.time() - start_time
    print('n={0} took {1} seconds'.format(n, duration))

In [ ]:
import time

for n in range(2, 10):
    rand_input = generate_random_test_points(number_of_points=n)
    start_time = time.time()
    a, b, c = find_minimal_mapping(*rand_input)
    duration = time.time() - start_time
    print('n={0} took {1} seconds with solution {2}'.format(n, duration, (a, b, c)))
    
    start_time = time.time()
    a, b, c = scipy_find_minimal_mapping(*rand_input)
    scipy_duration = time.time() - start_time
    print('n={0} took {1} seconds with solution {2}'.format(n, scipy_duration, (a, b, c)))

In [ ]:
import sys

# noinspection PyUnusedLocal
def greedy_find_minimal_mapping(p0, p1, order):
    assert len(p0) == len(p1) and len(p0) == len(order), "greedy_find_minimal_mapping requires all list to be equal " \
                                                         "in length "
    assert sorted(list(set(order))) == list(range(len(order))), "greedy_find_minimal_mapping order should contain " \
                                                                "unique values between 0 and n-1, inclusively, " \
                                                                "where n is the number of items "
    # Sort by order
    indices = list(range(len(order)))
    indices.sort(key=order.__getitem__)
    sorted_p0 = list(map(p0.__getitem__, indices))
    sorted_p1 = list(map(p1.__getitem__, indices))

    map_order = []
    for idx, p in enumerate(sorted_p1):
        min_dist = sys.float_info.max
        min_idx = -1
        for idxx, pp in enumerate(sorted_p0):
            dist = distance.euclidean(p, pp)
            if dist < min_dist:
                min_dist = dist
                min_idx = idx
        map_order.append(min_idx)
        del sorted_p1[min_idx]

    p1_reordered = [p1[idx] for idx in map_order]

    return minimization_function(p0, p1_reordered), lexicographic_index(map_order), p1_reordered

In [ ]:
n=6
order = list(range(0, n))
random.shuffle(order)
input_data = (list(generate_random_test_points(number_of_points=n)) + [order])
print(input_data)
a, b, c = greedy_find_minimal_mapping(*input_data)
print('---------------')
print((a,b,c))

If we compare greedy to global, we find that greedy general and systematically finds a solution which reduces the overall error less due to the later order items being non-ideal. This difference is likely to be lower with lower overall error.


In [ ]:
import time

iterations = 1000
for n in range(2, 9):
    scipy = []
    greedy = []
    for i in range(0, iterations):
        rand_input = generate_random_test_points(number_of_points=n)
        order = list(range(0, n))
        random.shuffle(order)
        input_data = (list(generate_random_test_points(number_of_points=n)) + [order])

        a0, b0, c0 = scipy_find_minimal_mapping(*rand_input)

        a1, b1, c1 = greedy_find_minimal_mapping(*input_data)
        
        scipy.append(a0)
        greedy.append(a1)
    
    print('n={0} scipy has mean dist {1} while greedy has mean dist {2}'.format(n, np.mean(scipy), np.mean(greedy)))

In [ ]: