In [1]:
from collections import OrderedDict
from IPython.display import HTML, display
from matplotlib import pyplot as plt
from datetime import datetime
import folium
import io
# import simplejson
# import urllib
import numpy as np
import pandas as pd
import utm
# import webbrowser
% matplotlib inline
In [2]:
Σ = sum
In [3]:
def calc_euclidean(
origin: tuple, destiny: tuple
) -> float:
return np.sqrt(
(destiny[0]-origin[0])**2 +
(destiny[1]-origin[1])**2
)
In [4]:
def calc_rectangle(
origin: tuple, destiny: tuple
) -> float:
return (
abs(destiny[0]-origin[0]) +
abs(destiny[1]-origin[1])
)
In [5]:
def linear_regression(
x: pd.DataFrame, y: pd.DataFrame
) -> dict:
"""
ŷ = a + b*x
where:
b = Σ [ (xi - ̄x)(yi - ̄y) ] / Σ [ (xi - ̄x)²]
a = ̄y - b * ̄x
"""
x_bar = x.mean()
y_bar = y.mean()
n = x.shape[0]
b = (
Σ([(x[i] - x_bar)*(y[i] - y_bar) for i in range(n)]) /
Σ([(x[i] - x_bar)**2 for i in range(n)])
)
a = y_bar - b * x_bar
return {'a': a, 'b': b}
In [6]:
def R2(x: pd.DataFrame, y: pd.DataFrame) -> float:
"""
R² = {(1/N) * Σ[(xi-̄x) * (yi - ̄y)] / (σx * σy)}²
where:
σx = sqrt[Σ (xi - ̄x)² / N]
σy = sqrt[Σ (yi - ̄y)² / N]
"""
x_bar = x.mean()
y_bar = y.mean()
n = x.shape[0]
σx = np.sqrt(Σ([(x[i] - x_bar)**2 / n for i in range(n)]))
σy = np.sqrt(Σ([(y[i] - y_bar)**2 / n for i in range(n)]))
return (
(1/n) *
Σ([(x[i] - x_bar) * (y[i] - y_bar) for i in range(n)]) /
(σx * σy)
)**2
In [7]:
places = [
'Badajoz, Teatro López de Ayala',
'Badajoz, Puente de la Universidad',
'Badajoz, Centro comercial El Faro',
'Badajoz, Palacio de Congresos Manuel Rojas',
'Badajoz, Centro de Salud Valdepasillas',
'Badajoz, Puerta del Pilar',
'Badajoz, La Alcazaba',
'Badajoz, Plaza Alta',
'Badajoz, Puerta Palma',
'Badajoz, Puente Real'
]
In [8]:
# Location data from googlemaps
positions_latlon = []
# 1 - Teatro López de Ayala
positions_latlon.append((38.8762837,-6.9745847))
# 2. Puente de la Universidad
positions_latlon.append((38.882257, -6.9861811))
# 3. Centro comercial “El Faro”
positions_latlon.append((38.884088, -7.0242667))
# 4. Palacio de Congresos “Manuel Rojas”,
positions_latlon.append((38.8758768,-6.9704163))
# 5. Fuente de Valdepasillas
positions_latlon.append((38.864476, -6.9780557))
# 6. Puerta del Pilar
positions_latlon.append((38.8750098, -6.9733397))
# 7. La Alcazaba
positions_latlon.append((38.8834097, -6.9707085))
# 8. Plaza Alta
positions_latlon.append((38.8813045, -6.9704468))
# 9. Puerta Palma
positions_latlon.append((38.8805376, -6.9777905))
# 10. Puente Real
positions_latlon.append((38.8781387, -6.9965576))
In [9]:
positions_decimal = [
utm.from_latlon(*loc)[:2] for loc in positions_latlon
]
positions_decimal
Out[9]:
In [10]:
map_osm = folium.Map(location=positions_latlon[1], zoom_start=14)
for i, p in enumerate(positions_latlon):
folium.Marker(p, popup=places[i]).add_to(map_osm)
map_osm
Out[10]:
In [11]:
matrix_real_distance = pd.DataFrame({
i: [0]*10 for i in range(10)
})
matrix_euclidean_distance = pd.DataFrame({
i: [0]*10 for i in range(10)
})
matrix_rectangle_distance = pd.DataFrame({
i: [0]*10 for i in range(10)
})
In [12]:
# matrix de distancias reais
data_real_csv = """
0;1800;6600;1100;2000;350;1600;1800;900;2600
1800;0;4300;2800;2900;1900;1800;3500;1200;2100
6600;4300;0;7300;5900;6000;5600;7300;5400;4500
1100;2800;7300;0;2300;450;2200;1800;1500;3100
2000;2900;5900;2300;0;2200;3400;4200;2800;2400
350;1900;6000;450;2200;0;1800;1900;1100;2600
1600;1800;5600;2200;3400;1800;0;2200;650;2800
1800;3500;7300;1800;4200;1900;2200;0;1000;3100
900;1200;5400;1500;2800;1100;650;1000;0;2200
2600;2100;4500;3100;2400;2600;2800;3100;2200;0
"""
matrix_real_distance = pd.read_csv(
io.StringIO(data_real_csv),
index_col=False, header=None,
sep=';'
)
# check values
_m = matrix_real_distance
_n = matrix_real_distance.shape[0]
for i in range(_n):
for j in range(_n):
assert _m.iloc[i, j] == _m.iloc[j, i]
In [13]:
for i1, loc1 in enumerate(positions_decimal):
for i2, loc2 in enumerate(positions_decimal):
# euclidean distance
matrix_euclidean_distance.iloc[i1, i2] = (
calc_euclidean(loc1, loc2)
)
# rectangle distance
matrix_rectangle_distance.iloc[i1, i2] = (
calc_rectangle(loc1, loc2)
)
# input()
In [14]:
print('REAL DISTANCE')
display(matrix_real_distance)
print('EUCLIDEAN DISTANCE')
display(matrix_euclidean_distance)
print('RECTANGLE DISTANCE')
display(matrix_rectangle_distance)
In [15]:
n = matrix_real_distance.shape[0]
In [16]:
# REAL
series_real_distance = pd.Series(OrderedDict(
('%s_%s' % (i, j),
matrix_real_distance.iloc[i, j])
for i in range(n)
for j in range(i+1, n)
))
print('Primeiras 5 linhas ...')
series_real_distance.head()
Out[16]:
In [17]:
# EUCLIDEAN
euclidean_analysis = pd.DataFrame(OrderedDict(
('%s_%s' % (i, j),
[matrix_euclidean_distance.iloc[i, j]])
for i in range(n)
for j in range(i+1, n)
), index=['X']).T
euclidean_analysis['Y'] = series_real_distance
print('EUCLIDEAN - Primeiras 5 linhas ...')
display(euclidean_analysis.head())
In [18]:
param_euclidean = linear_regression(
euclidean_analysis['X'], euclidean_analysis['Y']
)
print(
'Linear Regression:\n a: %s, b: %s' %
(param_euclidean['a'], param_euclidean['b'])
)
In [19]:
# R²
param_euclidean['r2'] = R2(
euclidean_analysis['X'], euclidean_analysis['Y']
)
print(
'Euclidean R²: %s' % param_euclidean['r2']
)
In [20]:
# RETANGLE
retangle_analysis = pd.DataFrame(OrderedDict(
('%s_%s' % (i, j),
[matrix_rectangle_distance.iloc[i, j]])
for i in range(n)
for j in range(i+1, n)
), index=['X']).T
retangle_analysis['Y'] = series_real_distance
print('RETANGLE - Primeiras 5 linhas ...')
display(retangle_analysis.head())
In [21]:
param_retangle = linear_regression(
retangle_analysis['X'], retangle_analysis['Y']
)
print(
'Retangle Linear Regression:\n a: %s, b: %s' %
(param_retangle['a'], param_retangle['b'])
)
In [22]:
param_retangle['r2'] = R2(
retangle_analysis['X'], retangle_analysis['Y']
)
print(
'Retangle R²: %s' % param_retangle['r2']
)
In [23]:
f_euclidean = lambda x: (
param_euclidean['a'] + param_euclidean['b'] * x
)
f_retangle = lambda x: (
param_retangle['a'] + param_retangle['b'] * x
)
In [24]:
euclidean_analysis['Y_Adjusted'] = (
f_euclidean(euclidean_analysis['X'])
)
euclidean_analysis['Diff(%)'] = (
(1-euclidean_analysis['Y_Adjusted'] /
euclidean_analysis['Y'])*100
)
print('Quadro final da análise de distância euclideana')
display(euclidean_analysis)
In [25]:
retangle_analysis['Y_Adjusted'] = (
f_retangle(retangle_analysis['X'])
)
retangle_analysis['Diff(%)'] = (
(1-retangle_analysis['Y_Adjusted'] /
retangle_analysis['Y'])*100
)
print('Quadro final da análise de distância retangular')
display(retangle_analysis)
In [26]:
_line = np.linspace(0, euclidean_analysis['Y'].max(), 1000)
In [27]:
_data = euclidean_analysis.sort_values(by='Y')
print('Euclidean')
plt.plot(
_data['X'],
_data['Y'], 'o',
label='X,Y'
)
plt.plot(_line, f_euclidean(_line), label='LR')
plt.legend()
plt.grid()
plt.show()
In [28]:
_data = retangle_analysis.sort_values(by='Y')
print('Retangle')
plt.plot(
_data['X'],
_data['Y'], 'o',
label='X,Y'
)
plt.plot(_line, f_retangle(_line), label='LR')
plt.legend()
plt.grid()
plt.show()