Modules



In [80]:

    
import pandas as pd
import numpy as np
import collections
from salem import datasets
import salem
from cleo import Map
import geopandas as gpd
import shapely.geometry as shpg
import shapely.ops
import matplotlib.pyplot as plt
from glob import glob
import os

Paths



In [81]:

    
t_path = 'C:\\Users\\jlandman\\Desktop\\GlaThiDa_2014\\T.csv'
tt_path = 'C:\\Users\\jlandman\\Desktop\\GlaThiDa_2014\\TT.csv'
ttt_path = 'C:\\Users\\jlandman\\Desktop\\GlaThiDa_2014\\TTT.csv'
rgi_dir = 'C:\\Users\\jlandman\\Desktop\\rgi50\\'

Read files



In [82]:

    
pdrgi = gpd.read_file(rgi_path)



In [104]:

    
t = pd.read_csv(t_path, skiprows=[0,1,3], encoding='iso8859_15', sep=';',low_memory=False)
tt = pd.read_csv(tt_path, skiprows=[0,1,3], encoding='iso8859_15', sep=';',low_memory=False)
ttt = pd.read_csv(ttt_path, skiprows=[0,1,3], encoding='iso8859_15', sep=';',low_memory=False)

Remove NaN lines



In [105]:

    
t = t.dropna( axis=0,how='all')
tt = tt.dropna( axis=0,how='all')
ttt = ttt.dropna( axis=0,how='all')
t.head(1)









    Out[105]:






  
    
      
      GlaThiDa_ID
      POLITICAL_UNIT
      GLACIER_NAME
      SOURCE_ID
      ID
      LAT
      LON
      YEAR
      AREA
      MEAN_SLOPE
      ...
      SURVEY_DATE
      SURVEY_METHOD
      NUMBER_OF_SURVEY_POINTS
      NUMBER_OF_SURVEY_PROFILES
      TOTAL_LENGTH_OF_SURVEY_PROFILES
      INTERPOLATION_METHOD
      INVESTIGATOR
      SPONSORING_AGENCY
      REFERENCES
      REMARKS
    
  
  
    
      0
      1.0
      SE
      ISFALLSGLAC
      WGI
      SE4B000E0006
      67.915
      18.5680
      1979.0
      1.3
      NaN
      ...
      19790399.0
      GPRt
      NaN
      NaN
      NaN
      NaN
      Schytt V. and others
      University of Iceland
      Björnsson, H., (1981).  Geogr. Ann.
      NaN
    
    
      1
      2.0
      SE
      RABOTS GLACIAER
      WGI
      SE4B000E1016
      67.894
      18.5344
      1979.0
      4.1
      NaN
      ...
      19790399.0
      GPRt
      NaN
      10.0
      NaN
      NaN
      Schytt V. and others
      University of Iceland
      Björnsson, H., (1981).  Geogr. Ann.
      NaN
    
    
      2
      3.0
      SE
      STORGLACIAEREN
      WGI
      SE4B000E0005
      67.900
      18.5700
      1979.0
      3.1
      NaN
      ...
      19790399.0
      GPRt
      NaN
      NaN
      NaN
      NaN
      Schytt V. and others
      University of Iceland
      Björnsson, H., (1981).  Geogr. Ann.
      NaN
    
    
      3
      4.0
      US
      SOUTH CASCADE
      WGI
      US2M00264006
      48.370
      -121.0500
      1975.0
      2.0
      NaN
      ...
      19759999.0
      DRIh
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      Driedger, C.L., and Kennard, P.M., (1986a). An...
      NaN
    
    
      4
      5.0
      CA
      ATHABASCA
      WGMS
      7
      52.200
      -117.2500
      9999.0
      3.8
      NaN
      ...
      99999999.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      Driedger, C.L., and Kennard, P.M., (1986a). An...
      NaN
    
  

5 rows × 24 columns

Find duplicate entries with differing coordinates



In [106]:

    
duplicates = [item for item, count in collections.Counter(t.GLACIER_NAME.values).items() if count > 1]
duplicates = [x for x in duplicates if str(x) != 'nan'] # remove nan entry as it also repeats



In [107]:

    
checkframe = pd.DataFrame([], columns =['name','lat','lon'])

ix=0
for dup in duplicates:
    latitudes = t[t.GLACIER_NAME==dup].LAT.values
    longitudes = t[t.GLACIER_NAME==dup].LON.values

    ct=0
    for i in range(1,len(latitudes)):
        if latitudes[0] != latitudes[i] or longitudes[0]!=longitudes[i]:
            checkframe.loc[ix, 'name'] = dup
            checkframe.loc[ix, 'lat'] = latitudes[i]
            checkframe.loc[ix, 'lon'] = longitudes[i]
            ix+=1
            
            if ct==0:
                checkframe.loc[ix, 'name'] = dup
                checkframe.loc[ix, 'lat'] = latitudes[0]
                checkframe.loc[ix, 'lon'] = longitudes[0]
                ix+=1
                ct+=1  
        
print(checkframe)









    



QIYI 39.2375 39.23 97.7564 97.75
UNTERSULZBACHKEES 47.1333 47.13 12.35 12.35
VERNAGTFERNER 46.88 46.877 10.82 10.817
HAXILEGEN 43.72 43.7 84.47 84.4
HAXILEGEN 43.72 43.72 84.47 84.38
HAXILEGEN 43.72 43.7 84.47 84.42
VERMUNTGLETSCHER 46.8533 46.85 10.125 10.13
GEFRORENE WAND 47.0653 47.0733 11.6792 11.685
GEFRORENE WAND 47.0653 47.067 11.6792 11.667
RHONE 46.62 46.5833 8.4 8.387138
FINDELEN 46.0 45.9979 7.87 7.8465
HALLSTAETTER 47.48 47.482 13.62 13.615
54 45.02 49.733 79.83 87.883
OBERSULZBACHKEES 47.1133 47.111 12.3017 12.2931
MIAOERGOU 43.05 43.03 94.33 94.33
                 name      lat      lon
0                QIYI    39.23    97.75
1                QIYI  39.2375  97.7564
2   UNTERSULZBACHKEES    47.13    12.35
3   UNTERSULZBACHKEES  47.1333    12.35
4       VERNAGTFERNER   46.877   10.817
5       VERNAGTFERNER    46.88    10.82
6           HAXILEGEN     43.7     84.4
7           HAXILEGEN    43.72    84.47
8           HAXILEGEN    43.72    84.38
9           HAXILEGEN     43.7    84.42
10   VERMUNTGLETSCHER    46.85    10.13
11   VERMUNTGLETSCHER  46.8533   10.125
12     GEFRORENE WAND  47.0733   11.685
13     GEFRORENE WAND  47.0653  11.6792
14     GEFRORENE WAND   47.067   11.667
15              RHONE  46.5833  8.38714
16              RHONE    46.62      8.4
17           FINDELEN  45.9979   7.8465
18           FINDELEN       46     7.87
19       HALLSTAETTER   47.482   13.615
20       HALLSTAETTER    47.48    13.62
21                 54   49.733   87.883
22                 54    45.02    79.83
23   OBERSULZBACHKEES   47.111  12.2931
24   OBERSULZBACHKEES  47.1133  12.3017
25          MIAOERGOU    43.03    94.33
26          MIAOERGOU    43.05    94.33

Decide manually which coordinates are "correct"



In [108]:

    
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between one point 
    on the earth and an array of points (specified in decimal degrees)
    """
    
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    r = 6371000 # Radius of earth in meters
    return c * r



In [109]:

    
r_shp = rgi_dir + '00_rgi50_regions\\00_rgi50_O1Regions.shp'
rgi_r = salem.utils.read_shapefile(r_shp)

rgi_r[['Primary_ID','Secondary_']]

region_candidates = []
for k, row in checkframe.iterrows():
    gp = shpg.Point(row.lon, row.lat)
    rgi_reg = []
    for i, r in rgi_r.iterrows():
        if r.geometry.contains(gp):
            rgi_reg.append(r['Secondary_'])
    assert len(rgi_reg) == 1
    region_candidates.append(rgi_reg[0])
    
checkframe['RGI_REG'] = region_candidates



In [110]:

    
from matplotlib.backends.backend_pdf import PdfPages

unique_names = set(checkframe.name.values)

curr = 1  # current plot
total = len(unique_names)  # total number of plots ()

with PdfPages('checkcoord.pdf') as pdf:
    for name in unique_names:
        region = checkframe[checkframe.name==name].RGI_REG.iloc[0]
        print(region)
        rgi_shp = list(glob(os.path.join(rgi_dir, "*", region + '_rgi50_*.shp')))[0]
        pdrgi = salem.utils.read_shapefile(rgi_shp, cached=True)
        lon, lat = np.nanmean(checkframe[checkframe.name == name].lon.values), np.nanmean(checkframe[checkframe.name == name].lat.values)
        pdrgi['DIST'] = haversine(lon, lat, pdrgi.CenLon.values, pdrgi.CenLat.values)
        sortrgi = pdrgi.sort_values(by='DIST')
        
        # For GoogleMap we need a lon lat range to generate the map
        mmlon = [np.nanmin(checkframe[checkframe.name == name].lon.values), np.nanmax(checkframe[checkframe.name == name].lon.values)]
        mmlat = [np.nanmin(checkframe[checkframe.name == name].lat.values), np.nanmax(checkframe[checkframe.name == name].lat.values)]
        
        for i in np.arange(0,7):
            rgig = sortrgi.iloc[i]
            # In case the glacier is a MultiPolygon we account for this here:
            if rgig.geometry.type == 'Polygon':
                x, y = rgig.geometry.exterior.xy
            elif rgig.geometry.type == 'MultiPolygon':
                # buffer is necessary as some multi-polygons are self-intersecting
                allparts = [p.buffer(0) for p in rgig.geometry] 
                rgig.geometry = shapely.ops.cascaded_union(allparts)
                x, y = rgig.geometry.exterior.xy

            mmlon = [np.min(np.append(mmlon, x)), np.max(np.append(mmlon, x))]
            mmlat = [np.min(np.append(mmlat, y)), np.max(np.append(mmlat, y))]
            
        # Prepare the figure
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
        ax1.set_title(name)
        
        local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat) 
        local_map = Map(local.grid, countries=False, nx=640)
        local_map.set_lonlat_countours()

            
        # Plot glaciers
        colors = ['red', 'orange', 'green', 'blue', 'purple', 'magenta','red', 'orange', 'green', 'blue', 'purple', 'magenta']
        for i in np.arange(0,7):
            rgig = sortrgi.iloc[i]
            # In case the glacier is a MultiPolygon we (again) account for this here:
            if rgig.geometry.type == 'Polygon':
                x, y = rgig.geometry.exterior.xy
            elif rgig.geometry.type == 'MultiPolygon':
                # buffer is necessary as some multi-polygons are self-intersecting
                allparts = [p.buffer(0) for p in rgig.geometry] 
                rgig.geometry = shapely.ops.cascaded_union(allparts)
                x, y = rgig.geometry.exterior.xy
            
            px, py = rgig.CenLon, rgig.CenLat 
            local_map.set_geometry(shpg.Point(px, py), markersize=6, linewidth=0, color='black') 
            local_map.set_geometry(rgig.geometry.exterior, color=colors[i], linewidth=3, label=rgig.RGIId) 

        # Plot the point
        for j in range(len(checkframe[checkframe.name == name].lon.values)):
            px, py = checkframe[checkframe.name == name].lon.values[j], checkframe[checkframe.name == name].lat.values[j] #local.transform(lon, lat)
            local_map.set_geometry(shpg.Point(px, py), edgecolor='k', marker='x', linewidth=4, markersize=100, zorder=50,
                                   color=colors[j], text='%s,%s' % (px,py))
        
        local_map.set_rgb(local.get_vardata())
        local_map.visualize(ax=ax1, addcbar=False)

        local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat, maptype='terrain')
        local_map.set_rgb(local.get_vardata())
        local_map.visualize(ax=ax2, addcbar=False)
        plt.subplots_adjust(left=0.04, right=0.80, top=0.94, bottom=0.07)
        plt.legend(bbox_to_anchor=(1.02, 1.), fontsize=18, loc=2, borderaxespad=0, frameon=False, numpoints=1,
                       scatterpoints=1)
        pdf.savefig(fig)
        plt.close()
        
        if curr % 5 == 0:
            print("%s / %s plots done." % (curr, total))
        curr += 1









    



11
11






    



C:\Users\jlandman\Anaconda3\lib\site-packages\pandas\core\generic.py:2698: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value






    



11
11
11
5 / 12 plots done.
10
13
13
11
13
10 / 12 plots done.
11
11



In [111]:

    
coord_correct = {'RHONE':(46.6200, 8.400000),  # took the coordinates furthe upstream
                'OBERSULZBACHKEES':(47.1110, 12.2931), # unclear if it makes sense that coordinates are separate; sources are different
                'VERNAGTFERNER':(46.880, 10.820), # took the coordinates further upstream
                'HALLSTAETTER':(47.482, 13.615),  # took the more central coordinates
                'VERMUNTGLETSCHER':(46.8533, 10.125), # took the more central and upstream coordinates
                'MIAOERGOU':(),  # no idea: sometimes "miaoergou" also refers to the glaciers of the whole mountain crest
                'QIYI':(39.2375, 97.7564),  # took the one within the glacier outlines
                'FINDELEN':(46.0000, 7.8700), # took the one within the glacier outlines
                'GEFRORENE WAND':(47.0721, 11.6831), # selected a totally new coordinate (two present coordinates are almost outside the polygon)
                'UNTERSULZBACHKEES':(47.1300, 12.35)} # took the upstream coordinate

# '54':() was removed as two different glaciers are meant (both named "54")
#  'HAXILEGEN':(), was removed as probably different glaciers are meant (rename, add extra info!?)

	GlaThiDa_ID	POLITICAL_UNIT	GLACIER_NAME	SOURCE_ID	ID	LAT	LON	YEAR	AREA	MEAN_SLOPE	...	SURVEY_DATE	SURVEY_METHOD	NUMBER_OF_SURVEY_POINTS	NUMBER_OF_SURVEY_PROFILES	TOTAL_LENGTH_OF_SURVEY_PROFILES	INTERPOLATION_METHOD	INVESTIGATOR	SPONSORING_AGENCY	REFERENCES	REMARKS
0	1.0	SE	ISFALLSGLAC	WGI	SE4B000E0006	67.915	18.5680	1979.0	1.3	NaN	...	19790399.0	GPRt	NaN	NaN	NaN	NaN	Schytt V. and others	University of Iceland	Björnsson, H., (1981). Geogr. Ann.	NaN
1	2.0	SE	RABOTS GLACIAER	WGI	SE4B000E1016	67.894	18.5344	1979.0	4.1	NaN	...	19790399.0	GPRt	NaN	10.0	NaN	NaN	Schytt V. and others	University of Iceland	Björnsson, H., (1981). Geogr. Ann.	NaN
2	3.0	SE	STORGLACIAEREN	WGI	SE4B000E0005	67.900	18.5700	1979.0	3.1	NaN	...	19790399.0	GPRt	NaN	NaN	NaN	NaN	Schytt V. and others	University of Iceland	Björnsson, H., (1981). Geogr. Ann.	NaN
3	4.0	US	SOUTH CASCADE	WGI	US2M00264006	48.370	-121.0500	1975.0	2.0	NaN	...	19759999.0	DRIh	NaN	NaN	NaN	NaN	NaN	NaN	Driedger, C.L., and Kennard, P.M., (1986a). An...	NaN
4	5.0	CA	ATHABASCA	WGMS	7	52.200	-117.2500	9999.0	3.8	NaN	...	99999999.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Driedger, C.L., and Kennard, P.M., (1986a). An...	NaN