This notebook computes the average distance between the generated recording locations and the actual recording locations used in the analysis.
In [44]:
from geo.models import SampleLocation
from database.models import Site
from shapely.geometry import shape, MultiPoint
import geopandas
import pandas
import numpy
from django.db import connection
In [16]:
def get_geodataframe(queryset, modification=None, crs={'+init':'epsg:31254'}):
query = queryset.query.sql_with_params()
if modification:
query = (modification, query[1])
return geopandas.read_postgis(query[0], connection,
geom_col='geometry',
params=query[1],
index_col='id',
crs=crs)
In [19]:
generated = get_geodataframe(SampleLocation.objects.all())
actual = get_geodataframe(Site.objects.filter(id__lte=30))
In [48]:
distance_array = numpy.zeros(30)
distances = pandas.DataFrame({'id': generated.index, 'name': actual.sort_index().name, 'distance': distance_array}).set_index('id')
for i in range(1, 31):
x1 = generated[generated.index == i].geometry.as_matrix()[0].coords.xy[0][0]
x2 = actual[actual.index == i].geometry.as_matrix()[0].coords.xy[0][0]
y1 = generated[generated.index == i].geometry.as_matrix()[0].coords.xy[1][0]
y2 = actual[actual.index == i].geometry.as_matrix()[0].coords.xy[1][0]
distance_array[i - 1] = numpy.sqrt((x2 - x1)**2 + (y2 - y1)**2)
distances['distance'] = distance_array
In [49]:
distances
Out[49]:
In [66]:
distances.distance.mean().round(0)
Out[66]:
In [67]:
distances.distance.std().round(0)
Out[67]: