Modules


In [80]:
import pandas as pd
import numpy as np
import collections
from salem import datasets
import salem
from cleo import Map
import geopandas as gpd
import shapely.geometry as shpg
import shapely.ops
import matplotlib.pyplot as plt
from glob import glob
import os

Paths


In [81]:
t_path = 'C:\\Users\\jlandman\\Desktop\\GlaThiDa_2014\\T.csv'
tt_path = 'C:\\Users\\jlandman\\Desktop\\GlaThiDa_2014\\TT.csv'
ttt_path = 'C:\\Users\\jlandman\\Desktop\\GlaThiDa_2014\\TTT.csv'
rgi_dir = 'C:\\Users\\jlandman\\Desktop\\rgi50\\'

Read files


In [82]:
pdrgi = gpd.read_file(rgi_path)

In [104]:
t = pd.read_csv(t_path, skiprows=[0,1,3], encoding='iso8859_15', sep=';',low_memory=False)
tt = pd.read_csv(tt_path, skiprows=[0,1,3], encoding='iso8859_15', sep=';',low_memory=False)
ttt = pd.read_csv(ttt_path, skiprows=[0,1,3], encoding='iso8859_15', sep=';',low_memory=False)

Remove NaN lines


In [105]:
t = t.dropna( axis=0,how='all')
tt = tt.dropna( axis=0,how='all')
ttt = ttt.dropna( axis=0,how='all')
t.head(1)


Out[105]:
GlaThiDa_ID POLITICAL_UNIT GLACIER_NAME SOURCE_ID ID LAT LON YEAR AREA MEAN_SLOPE ... SURVEY_DATE SURVEY_METHOD NUMBER_OF_SURVEY_POINTS NUMBER_OF_SURVEY_PROFILES TOTAL_LENGTH_OF_SURVEY_PROFILES INTERPOLATION_METHOD INVESTIGATOR SPONSORING_AGENCY REFERENCES REMARKS
0 1.0 SE ISFALLSGLAC WGI SE4B000E0006 67.915 18.5680 1979.0 1.3 NaN ... 19790399.0 GPRt NaN NaN NaN NaN Schytt V. and others University of Iceland Björnsson, H., (1981). Geogr. Ann. NaN
1 2.0 SE RABOTS GLACIAER WGI SE4B000E1016 67.894 18.5344 1979.0 4.1 NaN ... 19790399.0 GPRt NaN 10.0 NaN NaN Schytt V. and others University of Iceland Björnsson, H., (1981). Geogr. Ann. NaN
2 3.0 SE STORGLACIAEREN WGI SE4B000E0005 67.900 18.5700 1979.0 3.1 NaN ... 19790399.0 GPRt NaN NaN NaN NaN Schytt V. and others University of Iceland Björnsson, H., (1981). Geogr. Ann. NaN
3 4.0 US SOUTH CASCADE WGI US2M00264006 48.370 -121.0500 1975.0 2.0 NaN ... 19759999.0 DRIh NaN NaN NaN NaN NaN NaN Driedger, C.L., and Kennard, P.M., (1986a). An... NaN
4 5.0 CA ATHABASCA WGMS 7 52.200 -117.2500 9999.0 3.8 NaN ... 99999999.0 NaN NaN NaN NaN NaN NaN NaN Driedger, C.L., and Kennard, P.M., (1986a). An... NaN

5 rows × 24 columns

Find duplicate entries with differing coordinates


In [106]:
duplicates = [item for item, count in collections.Counter(t.GLACIER_NAME.values).items() if count > 1]
duplicates = [x for x in duplicates if str(x) != 'nan'] # remove nan entry as it also repeats

In [107]:
checkframe = pd.DataFrame([], columns =['name','lat','lon'])

ix=0
for dup in duplicates:
    latitudes = t[t.GLACIER_NAME==dup].LAT.values
    longitudes = t[t.GLACIER_NAME==dup].LON.values

    ct=0
    for i in range(1,len(latitudes)):
        if latitudes[0] != latitudes[i] or longitudes[0]!=longitudes[i]:
            checkframe.loc[ix, 'name'] = dup
            checkframe.loc[ix, 'lat'] = latitudes[i]
            checkframe.loc[ix, 'lon'] = longitudes[i]
            ix+=1
            
            if ct==0:
                checkframe.loc[ix, 'name'] = dup
                checkframe.loc[ix, 'lat'] = latitudes[0]
                checkframe.loc[ix, 'lon'] = longitudes[0]
                ix+=1
                ct+=1  
        
print(checkframe)


QIYI 39.2375 39.23 97.7564 97.75
UNTERSULZBACHKEES 47.1333 47.13 12.35 12.35
VERNAGTFERNER 46.88 46.877 10.82 10.817
HAXILEGEN 43.72 43.7 84.47 84.4
HAXILEGEN 43.72 43.72 84.47 84.38
HAXILEGEN 43.72 43.7 84.47 84.42
VERMUNTGLETSCHER 46.8533 46.85 10.125 10.13
GEFRORENE WAND 47.0653 47.0733 11.6792 11.685
GEFRORENE WAND 47.0653 47.067 11.6792 11.667
RHONE 46.62 46.5833 8.4 8.387138
FINDELEN 46.0 45.9979 7.87 7.8465
HALLSTAETTER 47.48 47.482 13.62 13.615
54 45.02 49.733 79.83 87.883
OBERSULZBACHKEES 47.1133 47.111 12.3017 12.2931
MIAOERGOU 43.05 43.03 94.33 94.33
                 name      lat      lon
0                QIYI    39.23    97.75
1                QIYI  39.2375  97.7564
2   UNTERSULZBACHKEES    47.13    12.35
3   UNTERSULZBACHKEES  47.1333    12.35
4       VERNAGTFERNER   46.877   10.817
5       VERNAGTFERNER    46.88    10.82
6           HAXILEGEN     43.7     84.4
7           HAXILEGEN    43.72    84.47
8           HAXILEGEN    43.72    84.38
9           HAXILEGEN     43.7    84.42
10   VERMUNTGLETSCHER    46.85    10.13
11   VERMUNTGLETSCHER  46.8533   10.125
12     GEFRORENE WAND  47.0733   11.685
13     GEFRORENE WAND  47.0653  11.6792
14     GEFRORENE WAND   47.067   11.667
15              RHONE  46.5833  8.38714
16              RHONE    46.62      8.4
17           FINDELEN  45.9979   7.8465
18           FINDELEN       46     7.87
19       HALLSTAETTER   47.482   13.615
20       HALLSTAETTER    47.48    13.62
21                 54   49.733   87.883
22                 54    45.02    79.83
23   OBERSULZBACHKEES   47.111  12.2931
24   OBERSULZBACHKEES  47.1133  12.3017
25          MIAOERGOU    43.03    94.33
26          MIAOERGOU    43.05    94.33

Decide manually which coordinates are "correct"


In [108]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between one point 
    on the earth and an array of points (specified in decimal degrees)
    """
    
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    r = 6371000 # Radius of earth in meters
    return c * r

In [109]:
r_shp = rgi_dir + '00_rgi50_regions\\00_rgi50_O1Regions.shp'
rgi_r = salem.utils.read_shapefile(r_shp)

rgi_r[['Primary_ID','Secondary_']]

region_candidates = []
for k, row in checkframe.iterrows():
    gp = shpg.Point(row.lon, row.lat)
    rgi_reg = []
    for i, r in rgi_r.iterrows():
        if r.geometry.contains(gp):
            rgi_reg.append(r['Secondary_'])
    assert len(rgi_reg) == 1
    region_candidates.append(rgi_reg[0])
    
checkframe['RGI_REG'] = region_candidates

In [110]:
from matplotlib.backends.backend_pdf import PdfPages

unique_names = set(checkframe.name.values)

curr = 1  # current plot
total = len(unique_names)  # total number of plots ()

with PdfPages('checkcoord.pdf') as pdf:
    for name in unique_names:
        region = checkframe[checkframe.name==name].RGI_REG.iloc[0]
        print(region)
        rgi_shp = list(glob(os.path.join(rgi_dir, "*", region + '_rgi50_*.shp')))[0]
        pdrgi = salem.utils.read_shapefile(rgi_shp, cached=True)
        lon, lat = np.nanmean(checkframe[checkframe.name == name].lon.values), np.nanmean(checkframe[checkframe.name == name].lat.values)
        pdrgi['DIST'] = haversine(lon, lat, pdrgi.CenLon.values, pdrgi.CenLat.values)
        sortrgi = pdrgi.sort_values(by='DIST')
        
        # For GoogleMap we need a lon lat range to generate the map
        mmlon = [np.nanmin(checkframe[checkframe.name == name].lon.values), np.nanmax(checkframe[checkframe.name == name].lon.values)]
        mmlat = [np.nanmin(checkframe[checkframe.name == name].lat.values), np.nanmax(checkframe[checkframe.name == name].lat.values)]
        
        for i in np.arange(0,7):
            rgig = sortrgi.iloc[i]
            # In case the glacier is a MultiPolygon we account for this here:
            if rgig.geometry.type == 'Polygon':
                x, y = rgig.geometry.exterior.xy
            elif rgig.geometry.type == 'MultiPolygon':
                # buffer is necessary as some multi-polygons are self-intersecting
                allparts = [p.buffer(0) for p in rgig.geometry] 
                rgig.geometry = shapely.ops.cascaded_union(allparts)
                x, y = rgig.geometry.exterior.xy

            mmlon = [np.min(np.append(mmlon, x)), np.max(np.append(mmlon, x))]
            mmlat = [np.min(np.append(mmlat, y)), np.max(np.append(mmlat, y))]
            
        # Prepare the figure
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
        ax1.set_title(name)
        
        local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat) 
        local_map = Map(local.grid, countries=False, nx=640)
        local_map.set_lonlat_countours()

            
        # Plot glaciers
        colors = ['red', 'orange', 'green', 'blue', 'purple', 'magenta','red', 'orange', 'green', 'blue', 'purple', 'magenta']
        for i in np.arange(0,7):
            rgig = sortrgi.iloc[i]
            # In case the glacier is a MultiPolygon we (again) account for this here:
            if rgig.geometry.type == 'Polygon':
                x, y = rgig.geometry.exterior.xy
            elif rgig.geometry.type == 'MultiPolygon':
                # buffer is necessary as some multi-polygons are self-intersecting
                allparts = [p.buffer(0) for p in rgig.geometry] 
                rgig.geometry = shapely.ops.cascaded_union(allparts)
                x, y = rgig.geometry.exterior.xy
            
            px, py = rgig.CenLon, rgig.CenLat 
            local_map.set_geometry(shpg.Point(px, py), markersize=6, linewidth=0, color='black') 
            local_map.set_geometry(rgig.geometry.exterior, color=colors[i], linewidth=3, label=rgig.RGIId) 

        # Plot the point
        for j in range(len(checkframe[checkframe.name == name].lon.values)):
            px, py = checkframe[checkframe.name == name].lon.values[j], checkframe[checkframe.name == name].lat.values[j] #local.transform(lon, lat)
            local_map.set_geometry(shpg.Point(px, py), edgecolor='k', marker='x', linewidth=4, markersize=100, zorder=50,
                                   color=colors[j], text='%s,%s' % (px,py))
        
        local_map.set_rgb(local.get_vardata())
        local_map.visualize(ax=ax1, addcbar=False)

        local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat, maptype='terrain')
        local_map.set_rgb(local.get_vardata())
        local_map.visualize(ax=ax2, addcbar=False)
        plt.subplots_adjust(left=0.04, right=0.80, top=0.94, bottom=0.07)
        plt.legend(bbox_to_anchor=(1.02, 1.), fontsize=18, loc=2, borderaxespad=0, frameon=False, numpoints=1,
                       scatterpoints=1)
        pdf.savefig(fig)
        plt.close()
        
        if curr % 5 == 0:
            print("%s / %s plots done." % (curr, total))
        curr += 1


11
11
C:\Users\jlandman\Anaconda3\lib\site-packages\pandas\core\generic.py:2698: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
11
11
11
5 / 12 plots done.
10
13
13
11
13
10 / 12 plots done.
11
11

In [111]:
coord_correct = {'RHONE':(46.6200, 8.400000),  # took the coordinates furthe upstream
                'OBERSULZBACHKEES':(47.1110, 12.2931), # unclear if it makes sense that coordinates are separate; sources are different
                'VERNAGTFERNER':(46.880, 10.820), # took the coordinates further upstream
                'HALLSTAETTER':(47.482, 13.615),  # took the more central coordinates
                'VERMUNTGLETSCHER':(46.8533, 10.125), # took the more central and upstream coordinates
                'MIAOERGOU':(),  # no idea: sometimes "miaoergou" also refers to the glaciers of the whole mountain crest
                'QIYI':(39.2375, 97.7564),  # took the one within the glacier outlines
                'FINDELEN':(46.0000, 7.8700), # took the one within the glacier outlines
                'GEFRORENE WAND':(47.0721, 11.6831), # selected a totally new coordinate (two present coordinates are almost outside the polygon)
                'UNTERSULZBACHKEES':(47.1300, 12.35)} # took the upstream coordinate

# '54':() was removed as two different glaciers are meant (both named "54")
#  'HAXILEGEN':(), was removed as probably different glaciers are meant (rename, add extra info!?)