The DARIAH app contains a visualization of the number of member country contribution on a map.
We show the map using Leaflet, which loads files containing the boundaries. These files are in geojson format.
Here we bundle all the necessary information of all European countries in one file.
Per country that is:
We have obtained data from the github repo mledoze/countries. We use these files:
We have compiled manually a selection of European countries from
and transformed it to the file
The bundle we are producing will be a geojson file with as little information as needed. We also will round the coordinates and weed out duplicate points, in order to reduce the file size.
For Kosovo we have made manual adjustments:
In [1]:
EU_FILE = 'europe_countries.csv'
GEO_DIR = 'geojson'
COUNTRIES = 'all_countries.json'
OUTFILE = '../../../client/src/js/helpers/europe.geo.js'
CENTER_PRECISION = 1
In [2]:
import sys, collections, json
In [3]:
eu_countries = {}
with open(EU_FILE) as f:
for line in f:
if line[0] == '#': continue
fields = line.strip().split(';')
if len(fields) == 3:
(name, iso2, iso3) = fields
eu_countries[iso2] = dict(iso3=iso3, name=name)
for (i, (iso2, info)) in enumerate(sorted(eu_countries.items())):
print('{:>2} {} {} {}'.format(i+1, iso2, info['iso3'], info['name']))
In [4]:
with open(COUNTRIES) as f:
countries = json.load(f)
print('Total number of countries: {}'.format(len(countries)))
i = 0
coord_fmt = '{{:>{}.{}f}}'.format(4+CENTER_PRECISION, CENTER_PRECISION)
pair_fmt = '({}, {})'.format(coord_fmt, coord_fmt)
line_fmt = '{{:>2}} {{}} {} {{}}'.format(pair_fmt)
for country in countries:
iso2 = country['cca2']
if iso2 in eu_countries:
i += 1
(lat, lng) = country['latlng']
info = eu_countries[iso2]
info['lat'] = round(lat, CENTER_PRECISION)
info['lng'] = round(lng, CENTER_PRECISION)
print('Found info for {} European countries'.format(i))
for (i, (iso2, info)) in enumerate(sorted(eu_countries.items())):
print(line_fmt.format(
i+1, iso2,
info['lat'], info['lng'],
info['name'],
))
In [5]:
def n_points(tp, data):
if tp == 'll': return len(data)
if tp == 'Polygon': return sum(len(ll) for ll in data)
if tp == 'MultiPolygon': return sum(sum(len(ll) for ll in poly) for poly in data)
return -1
def n_ll(tp, data):
if tp == 'Polygon': return len(data)
if tp == 'MultiPolygon': return sum(len(poly) for poly in data)
return -1
In [6]:
for iso2 in eu_countries:
info = eu_countries[iso2]
with open('{}/{}.geo.json'.format(GEO_DIR, info['iso3'])) as f:
geoinfo = json.load(f)
geometry = geoinfo['features'][0]['geometry']
info['geometry'] = geometry
total_ng = 0
total_nl = 0
total_np = 0
for (i, (iso2, info)) in enumerate(sorted(eu_countries.items())):
geo = info['geometry']
shape = geo['type']
data = geo['coordinates']
ng = 1 if shape == 'Polygon' else len(data)
np = n_points(shape, data)
nl = n_ll(shape, data)
total_ng += ng
total_nl += nl
total_np += np
print('{:>2} {} {:<25} {:<15} {:>2} poly, {:>3} linear ring, {:>5} point'.format(
i+1, iso2,
info['name'],
shape,
ng, nl, np,
))
print('{:<47}{:>2} poly, {:>3} linear ring, {:>5} point'.format(
'TOTAL', total_ng, total_nl, total_np,
))
We are going to reduce the information in the boundaries in a number of ways. A shape is organized as follows:
Multipolygon: a set of Polygons Polygon: a set of linear rings Linear rings: a list of coordinates, of which the last is equal to the first Coordinate: a longitude and a latitude
For coordinates we use a resolution of GEO_PRECISION digits behind the decimal point. We round the coordinates. This may cause repetition of identical points in a shape. We weed those out. We must take care that we do not weed out the first and last points.
If a linear ring has too few points, we just ignore it. That is, a linear ring must have at least MIN_POINTS in order to pass.
If a linear ring has too many points, we weed them out, until there are MAX_POINTS left.
If a multipolygon has too many polygons, we retain only MAX_MULTI of them. We order the polygons by the number of points they contain, and we retain the richest ones.
In [7]:
# maximal
GEO_PRECISION = 3 # number of digits in coordinates of shapes
MIN_POINTS = 1 # minimum number of points in a linear ring
MAX_POINTS = 500 # maximum number of points in a linear ring
MAX_POLY = 100 # maximum number of polygons in a multipolygon
In [8]:
# minimal
GEO_PRECISION = 1 # number of digits in coordinates of shapes
MIN_POINTS = 10 # minimum number of points in a linear ring
MAX_POINTS = 12 # maximum number of points in a linear ring
MAX_POLY = 5 # maximum number of polygons in a multipolygon
In [9]:
# medium
GEO_PRECISION = 1 # number of digits in coordinates of shapes
MIN_POINTS = 15 # minimum number of points in a linear ring
MAX_POINTS = 60 # maximum number of points in a linear ring
MAX_POLY = 7 # maximum number of polygons in a multipolygon
In [10]:
def weed_ll(ll):
new_ll = tuple(collections.OrderedDict(
((round(lng, GEO_PRECISION), round(lat, GEO_PRECISION)), None) for (lng, lat) in ll
).keys())
if len(new_ll) > MAX_POINTS:
new_ll = new_ll[::(int(len(new_ll) / MAX_POINTS) + 1)]
return new_ll + (new_ll[0],)
def weed_poly(poly):
new_poly = tuple(weed_ll(ll) for ll in poly)
return tuple(ll for ll in new_poly if len(ll) >= MIN_POINTS)
def weed_multi(multi):
new_multi = tuple(weed_poly(poly) for poly in multi)
return tuple(sorted(new_multi, key=lambda poly: -n_points('Polygon', poly))[0:MAX_POLY])
def weed(tp, data):
if tp == 'll': return weed_ll(data)
if tp == 'Polygon': return weed_poly(data)
if tp == 'MultiPolygon': return weed_multi(data)
In [11]:
ll = [
[8.710255,47.696808],
[8.709721,47.70694],
[8.708332,47.710548],
[8.705,47.713051],
[8.698889,47.713608],
[8.675278,47.712494],
[8.670555,47.711105],
[8.670277,47.707497],
[8.673298,47.701771],
[8.675554,47.697495],
[8.678595,47.693344],
[8.710255,47.696808],
]
ll2 = [
[8.710255,47.696808],
[9.709721,47.70694],
[10.708332,47.710548],
[11.705,47.713051],
[12.698889,47.713608],
[13.675278,47.712494],
[14.670555,47.711105],
[15.670277,47.707497],
[16.673298,47.701771],
[17.675554,47.697495],
[18.678595,47.693344],
[19.710255,47.696808],
[20.710255,47.696808],
[8.710255,47.696808],
]
poly = [ll, ll2]
In [12]:
print(weed_ll(ll))
print('=====')
print(weed_ll(ll2))
print('=====')
print(weed_poly(poly))
In [13]:
wtotal_ng = 0
wtotal_nl = 0
wtotal_np = 0
for (i, (iso2, info)) in enumerate(sorted(eu_countries.items())):
geo = info['geometry']
shape = geo['type']
data = geo['coordinates']
new_data = weed(shape, data)
geo['coordinates'] = new_data
data = new_data
ng = 1 if shape == 'Polygon' else len(data)
np = n_points(shape, data)
nl = n_ll(shape, data)
wtotal_ng += ng
wtotal_nl += nl
wtotal_np += np
print('{:>2} {} {:<25} {:<15} {:>2} poly, {:>3} linear ring, {:>5} point'.format(
i+1, iso2,
info['name'],
shape,
ng, nl, np,
))
print('{:<47}{:>2} poly, {:>3} linear ring, {:>5} point'.format(
'TOTAL after weeding', wtotal_ng, wtotal_nl, wtotal_np,
))
print('{:<47}{:>2} poly, {:>3} linear ring, {:>5} point'.format(
'TOTAL', total_ng, total_nl, total_np,
))
print('{:<47}{:>2} poly, {:>3} linear ring, {:>5} point'.format(
'IMPROVEMENT', total_ng - wtotal_ng, total_nl - wtotal_nl, total_np - wtotal_np,
))
In [16]:
features = dict(
type='FeatureCollection',
features=[],
)
for (iso2, info) in sorted(eu_countries.items()):
feature = dict(
type='Feature',
properties=dict(
iso2=iso2,
lng=info['lng'],
lat=info['lat'],
),
geometry=info['geometry'],
)
features['features'].append(feature)
with open(OUTFILE, 'w') as f:
f.write('''
/**
* European country borders
*
* @module europe_geo_js
*/
/**
* Contains low resulution geographical coordinates of borders of European countries.
* These coordinates can be drawn on a map, e.g. by [Leaflet](http://leafletjs.com).
*
* More information, and the computation itself is in
* [countries.ipynb](/api/file/tools/country_compose/countries.html)
* a Jupyer notebook that you can run for yourself, if you want to tweak the
* resolution and precision of the border coordinates.
*/
''')
f.write('export const countryBorders = ')
json.dump(features, f)
In [ ]: