In [2]:
from pyspark import SparkContext
sc =SparkContext()
import cv2
import numpy as np
import sys

def getDistance(pontoA,pontoB,sizeLine):
    if pontoA[1] == 1:
        return 0
    if pontoB[1] == 0:
        return np.iinfo(np.int32).max
    pontoAi = pontoA[0]/sizeLine
    pontoBi = pontoB[0]/sizeLine
    pontoAj = pontoA[0]%sizeLine
    pontoBj = pontoB[0]%sizeLine
        
    return ((pontoAi-pontoBi)**2 + (pontoAj-pontoBj)**2)
    

myImage = [0,0,0,0,0 , 0,0,0,0,0 , 0,0,1,0,0 , 0,0,0,0,0 , 0,0,0,0,0]
rddImage = sc.parallelize(myImage)
rddImageWithId = rddImage.zipWithIndex().map(lambda (x,y):(y,x))
rddImageObject = rddImageWithId.filter(lambda (x,y): y!=0)
rddImageBackG = rddImageWithId.filter(lambda(x,y):y==0)
rddImageBackAlgo = rddImageBackG.cartesian(rddImageObject)

print rddImageBackAlgo.map(lambda (x,y):(x[0],getDistance(x,y,5))).reduceByKey(lambda x,y: min(x,y)).union(rddImageObject).collect(),"\n"


[(0, 8), (1, 5), (2, 4), (3, 5), (4, 8), (5, 5), (6, 2), (7, 1), (8, 2), (9, 5), (10, 4), (11, 1), (13, 1), (14, 4), (15, 5), (16, 2), (17, 1), (18, 2), (19, 5), (20, 8), (21, 5), (22, 4), (23, 5), (24, 8), (12, 1)] 


In [ ]: