In [80]:
import pandas as pd
import numpy as np
import collections
from salem import datasets
import salem
from cleo import Map
import geopandas as gpd
import shapely.geometry as shpg
import shapely.ops
import matplotlib.pyplot as plt
from glob import glob
import os
In [81]:
t_path = 'C:\\Users\\jlandman\\Desktop\\GlaThiDa_2014\\T.csv'
tt_path = 'C:\\Users\\jlandman\\Desktop\\GlaThiDa_2014\\TT.csv'
ttt_path = 'C:\\Users\\jlandman\\Desktop\\GlaThiDa_2014\\TTT.csv'
rgi_dir = 'C:\\Users\\jlandman\\Desktop\\rgi50\\'
In [82]:
pdrgi = gpd.read_file(rgi_path)
In [104]:
t = pd.read_csv(t_path, skiprows=[0,1,3], encoding='iso8859_15', sep=';',low_memory=False)
tt = pd.read_csv(tt_path, skiprows=[0,1,3], encoding='iso8859_15', sep=';',low_memory=False)
ttt = pd.read_csv(ttt_path, skiprows=[0,1,3], encoding='iso8859_15', sep=';',low_memory=False)
In [105]:
t = t.dropna( axis=0,how='all')
tt = tt.dropna( axis=0,how='all')
ttt = ttt.dropna( axis=0,how='all')
t.head(1)
Out[105]:
In [106]:
duplicates = [item for item, count in collections.Counter(t.GLACIER_NAME.values).items() if count > 1]
duplicates = [x for x in duplicates if str(x) != 'nan'] # remove nan entry as it also repeats
In [107]:
checkframe = pd.DataFrame([], columns =['name','lat','lon'])
ix=0
for dup in duplicates:
latitudes = t[t.GLACIER_NAME==dup].LAT.values
longitudes = t[t.GLACIER_NAME==dup].LON.values
ct=0
for i in range(1,len(latitudes)):
if latitudes[0] != latitudes[i] or longitudes[0]!=longitudes[i]:
checkframe.loc[ix, 'name'] = dup
checkframe.loc[ix, 'lat'] = latitudes[i]
checkframe.loc[ix, 'lon'] = longitudes[i]
ix+=1
if ct==0:
checkframe.loc[ix, 'name'] = dup
checkframe.loc[ix, 'lat'] = latitudes[0]
checkframe.loc[ix, 'lon'] = longitudes[0]
ix+=1
ct+=1
print(checkframe)
In [108]:
def haversine(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between one point
on the earth and an array of points (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
c = 2 * np.arcsin(np.sqrt(a))
r = 6371000 # Radius of earth in meters
return c * r
In [109]:
r_shp = rgi_dir + '00_rgi50_regions\\00_rgi50_O1Regions.shp'
rgi_r = salem.utils.read_shapefile(r_shp)
rgi_r[['Primary_ID','Secondary_']]
region_candidates = []
for k, row in checkframe.iterrows():
gp = shpg.Point(row.lon, row.lat)
rgi_reg = []
for i, r in rgi_r.iterrows():
if r.geometry.contains(gp):
rgi_reg.append(r['Secondary_'])
assert len(rgi_reg) == 1
region_candidates.append(rgi_reg[0])
checkframe['RGI_REG'] = region_candidates
In [110]:
from matplotlib.backends.backend_pdf import PdfPages
unique_names = set(checkframe.name.values)
curr = 1 # current plot
total = len(unique_names) # total number of plots ()
with PdfPages('checkcoord.pdf') as pdf:
for name in unique_names:
region = checkframe[checkframe.name==name].RGI_REG.iloc[0]
print(region)
rgi_shp = list(glob(os.path.join(rgi_dir, "*", region + '_rgi50_*.shp')))[0]
pdrgi = salem.utils.read_shapefile(rgi_shp, cached=True)
lon, lat = np.nanmean(checkframe[checkframe.name == name].lon.values), np.nanmean(checkframe[checkframe.name == name].lat.values)
pdrgi['DIST'] = haversine(lon, lat, pdrgi.CenLon.values, pdrgi.CenLat.values)
sortrgi = pdrgi.sort_values(by='DIST')
# For GoogleMap we need a lon lat range to generate the map
mmlon = [np.nanmin(checkframe[checkframe.name == name].lon.values), np.nanmax(checkframe[checkframe.name == name].lon.values)]
mmlat = [np.nanmin(checkframe[checkframe.name == name].lat.values), np.nanmax(checkframe[checkframe.name == name].lat.values)]
for i in np.arange(0,7):
rgig = sortrgi.iloc[i]
# In case the glacier is a MultiPolygon we account for this here:
if rgig.geometry.type == 'Polygon':
x, y = rgig.geometry.exterior.xy
elif rgig.geometry.type == 'MultiPolygon':
# buffer is necessary as some multi-polygons are self-intersecting
allparts = [p.buffer(0) for p in rgig.geometry]
rgig.geometry = shapely.ops.cascaded_union(allparts)
x, y = rgig.geometry.exterior.xy
mmlon = [np.min(np.append(mmlon, x)), np.max(np.append(mmlon, x))]
mmlat = [np.min(np.append(mmlat, y)), np.max(np.append(mmlat, y))]
# Prepare the figure
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
ax1.set_title(name)
local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat)
local_map = Map(local.grid, countries=False, nx=640)
local_map.set_lonlat_countours()
# Plot glaciers
colors = ['red', 'orange', 'green', 'blue', 'purple', 'magenta','red', 'orange', 'green', 'blue', 'purple', 'magenta']
for i in np.arange(0,7):
rgig = sortrgi.iloc[i]
# In case the glacier is a MultiPolygon we (again) account for this here:
if rgig.geometry.type == 'Polygon':
x, y = rgig.geometry.exterior.xy
elif rgig.geometry.type == 'MultiPolygon':
# buffer is necessary as some multi-polygons are self-intersecting
allparts = [p.buffer(0) for p in rgig.geometry]
rgig.geometry = shapely.ops.cascaded_union(allparts)
x, y = rgig.geometry.exterior.xy
px, py = rgig.CenLon, rgig.CenLat
local_map.set_geometry(shpg.Point(px, py), markersize=6, linewidth=0, color='black')
local_map.set_geometry(rgig.geometry.exterior, color=colors[i], linewidth=3, label=rgig.RGIId)
# Plot the point
for j in range(len(checkframe[checkframe.name == name].lon.values)):
px, py = checkframe[checkframe.name == name].lon.values[j], checkframe[checkframe.name == name].lat.values[j] #local.transform(lon, lat)
local_map.set_geometry(shpg.Point(px, py), edgecolor='k', marker='x', linewidth=4, markersize=100, zorder=50,
color=colors[j], text='%s,%s' % (px,py))
local_map.set_rgb(local.get_vardata())
local_map.visualize(ax=ax1, addcbar=False)
local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat, maptype='terrain')
local_map.set_rgb(local.get_vardata())
local_map.visualize(ax=ax2, addcbar=False)
plt.subplots_adjust(left=0.04, right=0.80, top=0.94, bottom=0.07)
plt.legend(bbox_to_anchor=(1.02, 1.), fontsize=18, loc=2, borderaxespad=0, frameon=False, numpoints=1,
scatterpoints=1)
pdf.savefig(fig)
plt.close()
if curr % 5 == 0:
print("%s / %s plots done." % (curr, total))
curr += 1
In [111]:
coord_correct = {'RHONE':(46.6200, 8.400000), # took the coordinates furthe upstream
'OBERSULZBACHKEES':(47.1110, 12.2931), # unclear if it makes sense that coordinates are separate; sources are different
'VERNAGTFERNER':(46.880, 10.820), # took the coordinates further upstream
'HALLSTAETTER':(47.482, 13.615), # took the more central coordinates
'VERMUNTGLETSCHER':(46.8533, 10.125), # took the more central and upstream coordinates
'MIAOERGOU':(), # no idea: sometimes "miaoergou" also refers to the glaciers of the whole mountain crest
'QIYI':(39.2375, 97.7564), # took the one within the glacier outlines
'FINDELEN':(46.0000, 7.8700), # took the one within the glacier outlines
'GEFRORENE WAND':(47.0721, 11.6831), # selected a totally new coordinate (two present coordinates are almost outside the polygon)
'UNTERSULZBACHKEES':(47.1300, 12.35)} # took the upstream coordinate
# '54':() was removed as two different glaciers are meant (both named "54")
# 'HAXILEGEN':(), was removed as probably different glaciers are meant (rename, add extra info!?)