Modules


In [1]:
import pandas as pd
import numpy as np
import unidecode
from difflib import SequenceMatcher
import salem
import matplotlib.pyplot as plt 
import re
import os
import shapely.geometry as shpg
import shapely.ops
import collections
from shapely.geometry import Point, shape
import fiona
% matplotlib inline

File paths


In [2]:
f_path = 'C:\\Users\\jlandman\\Desktop\\database_Fischer_et_al._2015_The_Cryosphere.txt'                 # Fischer database with swiss coordinates 
fll_path = 'C:\\Users\\jlandman\\Desktop\\SGI2010wgs84_shapefiles\\parameters_SGI2010.csv'               # Fischer database with lat/lon
a_path = 'C:\\Users\\jlandman\\Desktop\\DOI-WGMS-FoG-2015-11\\WGMS-FoG-2015-11-A-GENERAL-INFORMATION.csv'# FoG: A GENERAL
b_path = 'C:\\Users\\jlandman\\Desktop\\DOI-WGMS-FoG-2015-11\\WGMS-FoG-2015-11-B-STATE.csv'              # FoG: B STATE
d_path = 'C:\\Users\\jlandman\\Desktop\\DOI-WGMS-FoG-2015-11\\WGMS-FoG-2015-11-D-CHANGE.csv'             # FoG: D CHANGE
gl_path = 'C:\\Users\\jlandman\\Desktop\\glims_db_20160429\\glims_polygons_swiss_alps.txt'               # GLIMS DBF file
ch_adm_path = "C:\\Users\\jlandman\\Desktop\\CHE_adm_shp\\CHE_adm1.shp"                                  # Swiss Kanton borders
rgi_CE = 'C:\\Users\\jlandman\\Desktop\\rgi50\\11_rgi50_CentralEurope\\11_rgi50_CentralEurope.shp'       # RGI 5.0 Central Europe

Read files


In [3]:
pdf = pd.read_csv(f_path, sep = '\t', encoding='iso-8859-1')
pdfll = pd.read_csv(fll_path, sep= ';', encoding='iso-8859-1')  #, usecols=[2,3,6,14,15]
pda = pd.read_csv(a_path, encoding='iso-8859-1')
pdb = pd.read_csv(b_path, encoding='iso-8859-1')
pdd = pd.read_csv(d_path, encoding='iso-8859-1')
glims = pd.read_csv(gl_path, encoding='iso-8859-1')

Preselect FoG IDs in Switzerland


In [4]:
pda = pda[pda.POLITICAL_UNIT == 'CH']
pdb = pdb[pdb.WGMS_ID.isin(pda.WGMS_ID.values)]
pdd = pdd[pdd.WGMS_ID.isin(pdd.WGMS_ID.values)]

Haversine function


In [5]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between one point 
    on the earth and an array of points (specified in decimal degrees)
    """
    
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    r = 6371000 # Radius of earth in meters
    return c * r

Correct missing underscores and column names


In [6]:
pdf = pdf.rename(columns={'uncertainty_dvol_between_t1_and_t2_mio m3': 'uncertainty_dvol_between_t1_and_t2_mio_m3'})
pdfll = pdfll.rename(columns={'uncertainty_dvol_between_t1_and_t2_mio m3': 'uncertainty_dvol_between_t1_and_t2_mio_m3'})

pdfll = pdfll.rename(columns={'Unnamed: 15': 'Glacier_name_SGI2010'})
pdfll = pdfll.rename(columns={'Unnamed: 16': 'year'})

First approach: find matching glaciers based on FoG => compare names, area and location

Dictionary of strings and symbols that should be replaced prior to calculation of similarity score


In [7]:
name_adjust = {'gletscher':'g', 'glacier':'g' , 'vadret':'v', 'ghiacciaio':'g', 'vedretta':'v', 'ferner':'f', 'ä':'ae', 'ö':'oe', 'ü':'ue', 'é':'e', 'à':'a', 'è':'e', 'ô':'o', 'ß':'ss'}

Introduce FoG column with new names


In [8]:
pda['FoG_compname'] = ''

for initname in [n for n in pda.NAME.values]:
    name = initname.lower()
    for key in name_adjust:
        if key in name:
            name = name.replace(key, name_adjust[key])
    #pda.FoG_compname[pda.NAME == initname] = name
    pda.loc[pda.NAME == initname, 'FoG_compname'] = name

Introduce some new columns


In [9]:
pdf.COMPNAME = ''              # simplified name 
pda.MATCH_RATIO = np.nan       # string matching ratio 
pda.MATCH_NAME = ''            # Name of the FoG glacier that matches Mauro's name best
pda.CLOSEST = ''               # closest FoG glacier point
pda.DIST_TO_CLOSEST = np.nan   # distance of Mauro's point to the closest FoG point 
pda.AREA_CLOSEST = np.nan      # column with the area of Mauro's glacier found by string matching 
pda.AREA_MATCH = np.nan        # column with the area of Mauro's glacier found by string matching
pda.AREA = np.nan              # Area that will be grabbed from the PDB file, if present

Adjust also Mauro's names to make them comparable


In [10]:
for idx,cols in pdf.iterrows():
    # simplify name
    compname_mau = cols.Glacier_name_SGI2010.lower()
    for key in name_adjust:
        if key in compname_mau:
            # replace the umlauts etc.
            compname_mau = compname_mau.replace(key, name_adjust[key])
    
    # delete the bracket stuff in order to improve the ratio
    start = compname_mau.find('(')
    if start != -1:
        compname_mau = compname_mau[0:start]
    compname_mau = compname_mau.replace('*', '')
    compname_mau = compname_mau.strip()

    pdf.loc[pdf.Glacier_name_SGI2010 == cols.Glacier_name_SGI2010, 'COMPNAME'] = compname_mau

In [11]:
pdfll.COMPNAME = ''              # simplified name

In [12]:
for idx,cols in pdfll.iterrows():
    # simplify name
    compname_mau = cols.Names.lower()
    for key in name_adjust:
        if key in compname_mau:
            # replace the umlauts etc.
            compname_mau = compname_mau.replace(key, name_adjust[key])
    
    # delete the bracket stuff in order to improve the ratio
    start = compname_mau.find('(')
    if start != -1:
        compname_mau = compname_mau[0:start]
    compname_mau = compname_mau.replace('*', '')
    compname_mau = compname_mau.strip()

    pdfll.loc[pdfll.Names == cols.Names, 'COMPNAME'] = compname_mau

Find matching glaciers for the 159 swiss glaciers in PDA


In [ ]:
#if os.path.exists('assigned_automated.csv'):
#    pass

#else:
for fidx,fcols in pda.iterrows():

    # create an AREA column entry (from the PDB ("state") table)
    area_match = np.nan
    try: # take the latest area entry
        area_match = pdb[pdb.WGMS_ID == fcols.WGMS_ID].AREA.values[~np.isnan(pdb[pdb.WGMS_ID == fcols.WGMS_ID].AREA.values)][-1]
    except IndexError:
        area_match = np.nan
    pda.loc[pda.WGMS_ID == fcols.WGMS_ID, 'AREA'] = area_match


    # find biggest ratio of string matching and insert
    ratio = 0.0
    name = ''
    for cname in pdfll['COMPNAME'].values:
        curr_ratio = SequenceMatcher(None, cname, fcols.FoG_compname).ratio()

        if curr_ratio > ratio or ratio==0.0:  #the latter in order to catch the initial case
            ratio = curr_ratio
            name = cname

    pda.loc[pda.NAME == fcols.NAME, 'MATCH_RATIO'] = ratio
    pda.loc[pda.NAME == fcols.NAME, 'MATCH_NAME'] = name

    # insert the area (at t2, because this is the latest) of the glacier found by string matching
    pda.loc[pda.NAME == fcols.NAME, 'AREA_MATCH'] = pdfll[pdfll['COMPNAME'] == name]['area(km2)'].iloc[0]


    #find closest pdf glacier
    dist = np.nan
    close_name = ''
    for pdf_idx, pdf_cols in pdfll.iterrows():
        lat = pdfll[pdfll.Names == pdf_cols.Names]['y WGS84)'].values[0]
        lon = pdfll[pdfll.Names == pdf_cols.Names]['Location(x WGS84)'].values[0]
        curr_dist = haversine(lon, lat, fcols.LONGITUDE, fcols.LATITUDE)

        if curr_dist < dist or pd.isnull(dist): # the second is for the initial loop
            dist = curr_dist
            close_name = pdf_cols.COMPNAME

    pda.loc[pda.NAME == fcols.NAME, 'DIST_TO_CLOSEST'] = dist
    pda.loc[pda.NAME == fcols.NAME, 'CLOSEST'] = close_name

    print(fcols.NAME)
    # insert the area (at t2, because this is the latest) of the glacier ehic is found to be the closest
    pda.loc[pda.NAME == fcols.NAME, 'AREA_CLOSEST'] = pdfll[pdfll['COMPNAME'] == close_name]['area(km2)'].iloc[0]


    pda.to_csv('assigned_automated.csv')

In [14]:
pda.to_csv('assigned_automated.csv')

Okay the automated assignment is not always good (high polygon density, too many similar names...) => Check everything manually


In [15]:
i = 'BREITHORN'

In [16]:
ID = pda[pda.NAME == i].WGMS_ID.iloc[0]

In [17]:
pda[pda.NAME == i]#[['LATITUDE', 'LONGITUDE']]


Out[17]:
POLITICAL_UNIT NAME WGMS_ID RIVER_BASIN FREE_POSITION LOCAL_CODE LOCAL_PSFG GEN_LOCATION SPEC_LOCATION LATITUDE ... GEO-REGION_CODE GEO-SUBREGION_CODE FoG_compname AREA MATCH_RATIO MATCH_NAME AREA_MATCH DIST_TO_CLOSEST CLOSEST AREA_CLOSEST
711 CH BREITHORN 2311 4R014 4M 19 NaN NaN NaN 46.48 ... CEU CEU-01 breithorn NaN 0.947368 breithorng 2.71188 143.257626 tschingelhorn-ne 0.01787

1 rows × 28 columns


In [18]:
pdb[pdb.where(~pd.isnull(pdb.AREA)).WGMS_ID == ID].tail(10)#[['NAME', 'LENGTH', 'AREA']]


Out[18]:
POLITICAL_UNIT NAME WGMS_ID YEAR HIGHEST_ELEVATION MEDIAN_ELEVATION LOWEST_ELEVATION ELEVATION_UNC LENGTH LENGTH_UNC AREA AREA_UNC SURVEY_DATE SURVEY_PLATFORM_METHOD PUB_IN_FOG INVESTIGATOR SPONS_AGENCY REFERENCE REMARKS

In [360]:
pdfll[pdfll['Names'].str.contains('mont collon', case=False)]


Out[360]:
ID acquisition date Location(x WGS84) y WGS84) Location(x CH1903 y CH1903) area(km2) min_elevation (masl) max_elevation (masl) median_elevation (masl) average_elevation (masl) length (km) average_slope (degree) aspect Names Glacier_name_SGI2010 year COMPNAME
1081 B73_13 2010 7.50573 45.976799 605183.4 91865.6 0.16359 3420 3615 3572 3561.0 0.373 25.8 NW Mont Collon N-I* B73/13 2010 mont collon n-i
1082 B73_14 2010 7.49025 45.975102 603802.9 91572.5 5.43500 2168 3649 3086 3066.0 5.106 16.2 NW MONT COLLON GLACIER DU (Teilgl. von B73/32n) B73/14 2010 mont collon g du
1092 B73_31n 2010 7.50832 45.982201 605385.7 92357.7 0.02378 3067 3252 3169 3166.0 0.183 42.7 N Mont Collon-N-II* B73/31n 2010 mont collon-n-ii

In [225]:
links = {
    'A NEUVE GL. L\'':(['A NEUVE GLACIER DE L\'-S','A NEUVE GLACIER DE L\'-N'],['a neuve g de l\'-s','a neuve g de l\'-n'],['B85/07','B85/08'],True), # no area in FoG
    'ADLER':('ADLERGLETSCHER (Teilgl. von B56/03)','adlerg','B56/14n', True),  # no area in FoG, but quite obvious
    'ALBIGNA':('ALBIGNA VADREC D\' (Nr. 116)','albigna vadrec d\'', 'C84/16', True),  # ok, error in area in pdb  
    'ALLALIN':('Allalingletscher* (Teilgl. von B52/66n)','allaling', 'B52/29', True), #ok area 9.68/9.17
    'ALPETLI(KANDER)':('KANDERFIRN (Teilgl. von A55B/29n; Nr. 109)','kanderfirn','A55B/13',True),  # ok area 14/12
    'ALTELS':(['Altels-S','Altels-NW','Altels-SE'],['altels-s','altels-nw','altels-se'],['A55C/04','A55C/03','A55C/18n'],True),     # no area in FoG 
    'AMMERTEN':(['Ammerten-W','AMMERTEN-E (Nr.111)'],['ammerteng-w','ammerteng-e'],['A55F/07n','A55F/01'],True),   # ok area 1.89/(0.55+0.22)
    'AROLLA (BAS)':('MONT COLLON GLACIER DU (Teilgl. von B73/32n)','mont collon g du','B73/14',True),  # no equivalent in Mauro's DB (is included in Glacier du Mont Collon B73/14)
    'BALMHORN':('BALMHORNGLETSCHER (Teilgl. von A55B/42n)','balmhorng','A55B/18',True),  # ok (area: 1.7/1.9)
    'BASODINO':('BASODINO GH. DEL (Nr. 104)','basodino gh. del','C14/10',True),  # area bigger in Mauro's DB, even for only BASODINO GH. DEL (Nr. 104) (1.84/1.89). What to do with basodino-N and basodino-NW?
    'BELLA TOLA':('BELLA TOLA GLETSCHER (Nr. 21)','bella tola g','B61/02',True),  # ok even though areas 0.31/0.07
    'BIDER':('BIDERGLETSCHER','biderg','B53/08',True),  # ok (no area in FoG, but unique)
    'BIFERTEN':('BIFERTENFIRN (Nr. 77)','bifertenfirn','A50I/12',True),  # ok (area: 2.86/2.5)
    'BIRCH':('BIRCHGLETSCHER','birchg','B32/06',True),   # ok even though area 0.54/0.22
    'BIS':('BISGLETSCHER (Nr. 107)','bisg','B58/08',True),    # ok even though area 4.79/3.82
    'BLUEMLISALP':('BLÜMLISALPGLETSCHER (Nr. 64)','bluemlisalpg','A55B/02',True), # ok due to lat/lon area 2.22/2.98
    'BODMER':('BODMER','bodmerg','C02/02',True),   # Link should be okay but area 0.64/0.32
    'BOVEYRE':('BOVEIRE GLACIER DE (Nr. 41)','boveire g de','B84/04',True),  # ok (area 1.99/1.62)
    'BREITHORN':('BREITHORNGLETSCHER','breithorng','A54M/19',True),   # Wetterlückengletscher (not in FoG) is added and oberer breithorngletscher becomes separate, no area/length in FoG for BREITHORN
    'BRENEY':('BRENAY GLACIER DU (Nr. 36)','brenay g du','B82/19',True),   # ok areas 9.8/7.1
    'BRESCIANA':('BRESCIANA VADRECC DI (Nr. 103)','bresciana vadrecc di','C44/02',True),  # ok even though area 0.94/0.48
    'BRUNEGG':('BRUNEGGGLETSCHER (Teilgl. von B60/09; Nr. 20)','bruneggg','B60/20n',True),  # ok areas 6.1/5.5
    'BRUNNI':('BRUNNIFIRN (Nr. 72)','brunnifirn','A51D/15',True),  # ok areas 2.99/2.31
    'CALDERAS':('CALDERAS VADRET (Nr. 95)','calderas v','E35/17',True),   # ok even though areas 1.2/0.66
    'CAMBRENA':('Cambrena Vadret dal (Teilgl. von C93/09)','cambrena v dal','C93/11n',True),  # ok area 1.77/1.26; Cambrena-E* (Teilgl. von C93/09) C93/08 excluded due to FoG lat/lon lying upstream 
    'CAVAGNOLI':('CAVAGNÖÖ GH. DEL (Nr. 119)','cavagnoeoe gh. del','C14/17',True), # ok due to lat/lon area 1.32/0.44 (ok as compared with SGI1973)
    'CHEILLON':('CHEILON GLACIER DE (Nr. 29)','cheilon g de','B74/08',True), # okay even though area 4.73/3.6. Spelling?
    'CLARIDENFIRN':(['CLARIDEN FIRN-II','CLARIDEN FIRN-IV*','CLARIDEN FIRN-I','CLARIDEN FIRN-I (Spitzalpelifirn)*','CLARIDEN FIRN-III*'],['clariden firn-ii','clariden firn-iv','clariden firn-i','clariden firn-iii'],['A50I/19','A50I/20','A50I/23n','A50I/24n'],True),  # area in FoG-D:5.12, but consistent over time even though volume changes!?
    'CORBASSIERE':('Corbassière (Teilgl. von B83/03)*','corbassiere','B83/15n',True),  # ok. unclear if also Combin de Corbassière-E (Teilgl. von B83/03)* is meant (area: 0.4km2), but separated by ridge
    'CORNO':('CORNO GH. DEL (Nr. 120)','corno gh. del','C34/01',True),  # ok even though area 0.27/0.1
    'CROSLINA':('CROSLINA GRANDE GH. DI (Nr. 121)','croslina grande gh. di','C32/02',True), # ok even though area 0.47/0.11 (lat/lon!)
    'DAMMA':('DAMMAGLETSCHER (Nr. 70)','dammag','A51F/10',True),  # ok even though area 6.32/4.24
    'DOLENT GL. DU':('DOLENT GLACIER DU','dolent g du','B85/04',True),  # ok even though noe area given in FoG
    'DUNGEL':('TUNGELGLETSCHER (Teilgl. von A56D/09n, Nr. 112)','tungelg','A56D/01',True),  #ok area 1.2/0.93
    'EIGER':('EIGERGLETSCHER (Nr. 59)','eigerg','A54M/03',True),   # ok area 2.27/1.53
    'EIGER (WEST)':('Eiger Südwestgrat*','eiger suedwestgrat','A54M/02',True),  # unclear, possibly  A54M/02
    'EN DARREY':('EN DARREY GLACIER DE L\' (Nr. 30)','en darrey g de l\'','B74/11',True), #ok (areas 1.86/1.28)
    'FEE NORTH':(['FEEGLETSCHER-S-I','FEEGLETSCHER-S-II','Feegletscher-N-I (Alphubel)* (Teilgl. von B53/16n)','Feegletscher-N-I (Täschhorn)* (Teilgl. von B53/16n)','Feegletscher-N-I (Dom)* (Teilgl. von B53/16n)','FEEGLETSCHER-N-II',],['feeg-s-i','feeg-s-ii','feeg-n-i','feeg-n-i','feeg-n-i','feeg-n-ii'],['B53/14n','B53/15n','B53/17n','B53/18n','B53/19n','B53/20n'],True),  # Feechopf-W* B55/17 ???
    'FERPECLE':('FERPÈCLE GLACIER DE (Nr. 25)','ferpecle g de','B72/11',True),  # ok areas 9.79/9.0
    'FIESCHER':('FIESCHERGLETSCHER VS (Teilgl. von B40/14n, Nr. 4)','fiescherg vs','B40/07',True), # ok area 33.06/29.48
    'FINDELEN':('Findelengletscher * (Teilgl. von B56/03)','findeleng','B56/16n',True),  # ok even though area 19/14
    'FIRNALPELI':(['FIRNALPELIGLETSCHER-E (Nr. 75)','FIRNALPELIFIRN'],['firnalpelig-e','firnalpelifirn'],['A51H/13','A51H/23n'],True),  # FoG lat/lon unclear; must be this combination due to elevation extent
    'FORNO':('FORNO VADREC DEL (Nr. 102)','forno vadrec del','C83/12',True), # ok area 8.7/5.3; NICHT Ofenhorn-W* (lat/lon!)
    'GAMCHI':('GAMCHIGLETSCHER (Nr. 61)','gamchig','A55A/04',True),  # ok area (1.73/1.23)
    'GAULI':('GAULIGLETSCHER (Teilgl. von A54I/19n) Nr. 52','gaulig','A54I/05',True),  # ok (area 13.7/11.4)
    'GELTEN':('GELTENGLETSCHER-W','gelteng-w','A56D/05',True),  # ok should receive the length changes from "GELTEN" entry
    'GIETRO':('GIETRO GLACIER DU (Nr. 37)','gietro g du','B82/14',True),  # ok (area 5.54/5.16)
    'GLAERNISCH':('GLÄRNISCHFIRN (Nr. 80)','glaernischfirn','A50K/04',True), # ok (area 2.09/1.41)
    'GORNER':('GRENZGLETSCHER (Teilgl. von B56/07)','grenzg','B56/22n',True),  # problem: gorner not in Mauro's DB => it's named as "Grenz" there
    'GRAND DESERT':('GRAND DESERT (Nendaz*) (Nr. 31)','grand desert','B75/06',True),  # ok area 1.85/1.06
    'GRAND PLAN NEVE':('PLAN NEVE-E (Nr. 45)','plan neve-e','B17/03',True), # is plan neve-e (area 0.12/0.18) due to lat/lon of FoG point, elevation confirms
    'GRIES':('GRIESGLETSCHER (Nr. 3)','griesg','B45/04',True),  # ok (area 4.83/4.78)
    'GRIESS(KLAUSEN)':(['GRIESSFIRN-I (Nr. 74)','Griessfirn-II*'],['griessfirn-i','griessfirn-ii'],['A51C/02','A51C/01'],True),  # probably more:   and  
    'GRIESSEN(OBWA.)':('GRIESSENFIRN','GRIESSENFIRN','A51H/02',True),  # ok (area 1.27/0.86)
    'GRIESSERNU':('GRIESSERNU GLETSCHER','griessernu g','C02/06',True), # ok even though no area in FoG
    'GROSSER ALETSCH':('GROSSER ALETSCH GLETSCHER (Teilgl. von B36/49n) Nr. 5','grosser aletsch g','B36/26',True), # ok (area 81.3/78.3)
    'GRUBEN':(['GRÜEBUGLETSCHER-N-II (Teilgl. von B51/17n)','GRÜEBUGLETSCHER-S (Teilgl. von B51/17n)','GRÜEBUGLETSCHER-N-I  (Teilgl. von B51/17n)'],['grueebug-n-ii','grueebug-s','grueebug-n-i'],['B51/04','B51/05','B51/16n'],True), # ok area (1.32/(0.18+0.94+0.08)), NAME SHOULD  BE CHANGED IN FoG
    'GUTZ':('GUTZGLETSCHER','gutzg','A54L/02',True),  # ok even though no area in FoG
    'HANGENDE':('HANGENDE GLETSCHER','hangende g', 'B52/27',True), # ok due to lat/lon
    'HINDRE SCHMADRI':('HINDRE SCHMADRIGLETSCHER','hindre schmadrig','A54M/16',True),  # ok no area in FoG
    'HOHLAUB':(['Hohlaubgrat-E* (Teilgl. von B52/67n)','Hohlaub Gletscher* (Teilgl. von B52/67n)'],['hohlaubgrat-e','hohlaub g'],['B52/31','B52/32'],True), # both  and 
    'HOMATTU':(['HOMATTUGLETSCHER-II','HOMATTUGLETSCHER-I'],['homattug-ii','homattug-i'],['B47/05','C04/01'],True), # ok no area in FoG  
    'HUEFI':('HÜFIFIRN (Nr. 73)','huefifirn','A51D/10',True),  # ok (area 13.73/12.72)
    'KALTWASSER':('CHALTWASSERGLETSCHER (Nr. 7)','chaltwasserg','B47/04',True),  # ok (areas 18.5/1.49)
    'KEHLEN':('CHELENGLETSCHER (Totalgl.; Nr. 68)','cheleng','A51F/15',True),  # ok even though area 1.73/3.15
    'KESSJEN':('CHESSJENGLETSCHER-E (Nr. 12)','chessjeng-e','B52/33',True), #ok (areas 0.19/0.19) there is now chessjengl.-w!
    'LAEMMERN (WILDSTRUBEL)':('WILDSTRUBELGLETSCHER (Teilgl. von A55C/24n) Nr. 63','wildstrubelg','A55C/13',True), # ok even though area 3.15/2.34
    'LANG':('Langgletscher (Totalgl.; Nr. 18)','langg','B31/19n',True),  # ok areas 10.03/8.26
    'LAVAZ':(['LAVAZ GLATSCHER DA (Nr. 82)','Lavaz-W*'],['lavaz glatscher da','lavaz-w'],['A14F/15','A14F/16'],True),  # ok area 1-76/(0.7+0.09)
    'LENTA':('LÄNTAGLETSCHER (Nr. 84)','laentag','A14D/17',True),  # ok area 1.4/0.81
    'LIMMERN':('LIMMERNFIRN (Nr. 78)','limmernfirn','A50I/06',True), # ok (area 2.41/1.89)
    'LISCHANA':(['TRIAZZA VADRET DA'],['triazza v da'],['E02/05'],True),  # vadret da triazza is one of the two remnants (one is no longer mapped); LEAVE BRACKETS IN ORDER TO ASSIGN PARENT ID CORRECTLY!
    'MAIGHELS EAST BRANCH':('MAIGHELS GLATSCHER DA-E','maighels glatscher da-e','A14I/04',True),  # ok but no area in FoG
    'MAIGHELS WEST BRANCH':('MAIGHELS GLATSCHER DA-W','maighels glatscher da-w','A14I/05',True),  # ok but no area in FoG
    'MARTINETS':('MARTINETS GLACIER DES (Nr. 46)','martinets g des','B17/08',True), # ok area 0.59/0.36
    'MINSTIGER':('MINSTIGERGLETSCHER','minstigerg','B41/07',True),  # ok areas 3.09/2.25
    'MITTELALETSCH':('MITTELALETSCHGLETSCHER (Teilgl. von B36/49n) Nr. 106','mittelaletschg','B36/21',True), # ok area 8.5/6.8
    'MOIRY':('MOIRY GLACIER DE (Nr. 24)','moiry g de','B64/02',True), # ok area 6.11/4.89
    'MOMING':('MOMING GLACIER DE (Nr. 23)','moming g de','B62/10',True),  # ok , maybe also Pointe Nord de Moming-SE* and Blanc de Moming-W*
    'MONT DURAND':('MONT DURAND GLACIER DU (Nr. 35)','mont durand g du','B82/36',True),  # ok area 7.59/6.09
    'MONT FORT (ZINAL)':(['PETIT M. FORT GLACIER DU','M. FORT GLACIER DU','Mont Fort-E*'],['petit m. fort g du','m. fort g du','mont fort-e'],['B75/07','B75/09','B75/16n'],True),  # not clear, probably PETIT M. FORT GLACIER DU (B75/07), but area is still too low
    'MONT MINE':('MONT MINÉ GLACIER DU (Nr. 26)','mont mine g du','B72/15',True), # ok areas 10.3/9.9
    'MONTO MORO GL.':('Monte Moro-W*','monte moro-w','B52/21',True),  # only a remnant obviously
    'MORTERATSCH':('MORTERATSCH VADRET DA (Totalgl.; Nr. 94)','morteratsch v da','E22/03',True), # ok areas 17/15
    'MURTEL':('MURTEL VADRET DAL','murtel v dal', 'E23/16',True),  # ok, NOT E24/04!!! 
    'MUTT':('MUTTGLETSCHER (Nr. 2)','muttg','B44/03',True), # ok, area 0.57/0.36
    'MUTTEN':('Muttengletscher* (Teilgl. von A51E/23)','mutteng','A51E/56n',True), #ok, no areas in FoG
    'OB.GRINDELWALD':('OBERER GRINDELWALDGLETSCHER (Nr. 57)','oberer grindelwaldg','A54L/04',True), # ok areas 10.07/8.41
    'OBERAAR':('OBERAARGLETSCHER (Teilgl. von A54G/35n) Nr. 50','oberaarg','A54G/03',True), # ok areas 5.23/4.10
    'OBERALETSCH':('OBERALETSCH GLETSCHER (Totalgl.; Nr. 6)','oberaletsch g','B36/01',True),  # ok areas 21/17
    'OFENTAL':('OFENTAL GLETSCHER (Nr. 9)','ofental g','B52/17',True),  #ok even though areas 0.4/0.04...possibly one remnant missing in Mauro's DB (see swisstopo)
    'OTEMMA':('Otemma (Teilgl. von B82/27)*','otemma','B82/51n',True),  # ok areas 16.55/12.59
    'PALUE':('Palü Vadret da (Teilgl. von C93/04)','palue v da','C93/10n',True),  # ok areas 6.62/5.26
    'PANEYROSSE':('PANEIROSSE GLACIER DE (Nr. 44)','paneirosse g de','B17/02',True),  # ok areas 0.45/0.3
    'PARADIES':('PARADIESGLETSCHER (Nr. 86)','PARADIESGLETSCHER (Nr. 86)','A13N/06',True),  # ok 4.6/1.8
    'PARADISINO (CAMPO)':('CAMP VEDREIT DA (Nr. 101)','camp vedreit da','C95/01',True),  # ok area 0.55/0.26
    'PIERREDAR':('PIERREDAR GLACIER DE (Nr. 49)','pierredar g de','B16/05',True), # ok areas 0.67/0.3
    'PIZOL':('PIZOLGLETSCHER (Nr. 81)','pizolg','A50D/01',True), # ok areas 0.32/0.08
    'PLAINE MORTE':(['GLACIER DE LA PLAINE MORTE (Nr. 65)','PLAINE MORTE-W GLACIER DE LA','PLAINE MORTE-E GLACIER DE LA'],['g de la plaine morte','plaine morte-w g de la','plaine morte-e g de la'],['A55F/03','B23/09n','B24/04n'],True), # no area in FoG
    'PLATTALVA':('GRIESSFIRN-N (Plattalva, Nr. 114)','griessfirn-n','A50I/07',True),  # ok area 0.71/0.33
    'PORCHABELLA':('PORCHABELLA VADRET DA (Nr. 88)','porchabella v da','A12E/04',True),  # ok area 2.59/1.67
    'PRAPIO':('PRAPIO GLACIER DU (Nr. 48)','prapio g du','B16/03',True),   # ok area 0.36/0.21
    'PUNTEGLIAS':('Bündner Tödi*','buendner toedi','A14M/08',True),  #  sounds strange but ok area 0.93/0.67
    'RHONE':('Rhonegletscher* (Teilgl. von B43/03)','rhoneg','B43/12n',True),  # ok areas 15.8/15.31
    'RIED':('RIEDGLETSCHER (Nr. 17)','riedg','B54/03',True),  # ok areas 8.26/7.31
    'ROSEG':('ROSEG VADRET DA (Nr. 92)','roseg v da','E23/11',True),  # ok areas 8.71/6.81
    'ROSENLAUI':('ROSENLAUIGLETSCHER (Nr. 56)','rosenlauig','A54J/02',True),  # ok areas 5.9/5.4
    'ROSSBODEN':('ROSSBODEGLETSCHER (Nr. 105)','rossbodeng','C02/04',True),  # ok areas 1.89/1.18
    'ROTFIRN NORD':('Schattmigstock* (Rotfirn-N, Nr. 69)','schattmigstock','A51F/13',True),  #  ok area 1.21/0.91
    'ROTTAL':('ROTTALGLETSCHER-NW (Teilgl. von B52/02)','rottalg-nw','B52/37n',True),  #  ok no area in FoG
    'SALEINA':('Saleina* (Teilgl. von B85/16)','saleina','B85/29n',True),  # is , but area is too high (6.54) compared to FoG (5.03)
    'SANKT ANNA':('ST. ANNAFIRN (Nr. 67)','st. annafirn','A51E/12',True),  # ok even though areas 0.41/0.22 (might be debris problem)
    'SARDONA':('Sardonagletscher-II*','sardonag-ii','A15B/06n',True),  # should be Sardonagletscher-II*, but area is too high (0.45, FoG only 0.38)
    'SCALETTA':('SCALETTAGLETSCHER (Nr. 115)','scalettag','A12D/03',True),  # ok even though area 0.66/0.21 (debris?)
    'SCHOENBUEHL GL.':(['SCHÖNBÜHLGLETSCHER-SE','SCHÖNBÜHLGLETSCHER-NW'],['schoenbuehlg-se','schoenbuehlg-nw'],['B36/31','B36/56n'],True),  # FoG-D area (1957):1.43/(0.57+0.43)
    'SCHWARZ':('SCHWARZGLETSCHER (Nr. 62)','schwarzg','A55C/05',True),  #  okay areas 1.6/1.09
    'SCHWARZBACH':('SCHWARZBACHFIRN','schwarzbachfirn','A51E/08',True),  #  okay (not area in FoG)
    'SCHWARZBERG':('Schwarzberggletscher* (Teilgl. von B52/24)','schwarzbergg','B52/63n',True),  #  ok area 5.17/5.33
    'SESVENNA':('Sesvenna Vadret da-E (Teilgl. von E03/04)','sesvenna v da-e','E03/11n',True),  #  ok area 0.67/0.33
    'SEEWJINEN':('SEEWJINEN GLETSCHER','seewjinen g','B52/22',True),  # ok no area in FoG
    'SEX ROUGE':('SEX ROUGE GLACIER DU (Nr. 47)','sex rouge g du','B16/01',True), # ok even though area 0.72/0.27
    'SILLERN':('SILLERE GLETSCHER','sillere g','A55B/11',True),  #  ok (no area in FoG)
    'SILVRETTA':('SILVRETTAGLETSCHER (Nr. 90)','silvrettag','A10G/05',True),  # ok areas 2.74/2.67
    'SIRWOLTE':('Griessernuhorn-N*','griessernuhorn-n','C03/04',True),  #should be Griessernuhorn-N* (c03/04) => NAME CHANGE IN FoG
    'STEIN':('STEINGLETSCHER (Nr. 53)','steing','B47/01',True),  # ok even though area in Mauro's DB (5.68) slightly bigger than in FoG (5.6)
    'STEINLIMMI':('STEINLIMIGLETSCHER (Nr. 54)','steinlimig','A54E/13',True),  # ok areas 2.21/1.59
    'SULZ':('HINTERSULZFIRN (Nr. 79)','hintersulzfirn','A50I/02',True),  #  must be HINTERSULZFIRN (Nr. 79), A50I/02,  (lat/lon), but area is bigger (0.26) than in FoG (0.2)
    'SURETTA':(['SURETTAGLETSCHER-E (Piz Por*)','SURETTAGLETSCHER-W (Hauptgl., Nr. 87)'],['surettag-e','surettag-w'],['A13I/01','A13I/02'],True), # must be ,  and maybe also Suretta Lückli*. FoG point unclear
    'TIATSCHA':('TIATSCHA VADRET (La Cudera, Nr. 96)','tiatscha v','E50/07',True), # okay areas 2.11/1.82
    'TIEFEN':('TIEFENGLETSCHER (Nr. 66)','tiefeng','A51E/37', True), # ok even though area 3.17/1.99
    'TOURNELON BLANCE':(['Tournelon Blanc-SE*','Tournelon Blanc-E*','Tournelon Blanc-NE*','Col du Tournelon Blanc*'],['tournelon blanc-se','tournelon blanc-e','tournelon blanc-ne','col du tournelon blanc'],['B82/42','B82/43','B82/44','B82/53n'],True),  # defined as given polygons as only special event in FoG 
    'TRIENT':('TRIENT GLACIER DU (Nr. 43)','trient g du','B90/02',True),  # ok area 6.58/5.82
    'TRIEST GL.':('DRIESTGLETSCHER','driestg','B36/17',True),  # ok no area in FoG
    'TRIFT (GADMEN)':('TRIFTGLETSCHER (Nr. 55)','triftg','A54E/24',True),  # ok area 15.33/14.9
    'TSANFLEURON':('TSANFLEURON GLACIER DE (Nr. 33)','tsanfleuron g de','B22/01',True),  # ok area 3.78/2.64
    'TSCHIERVA':(['TSCHIERVA VADRETTIN DA','TSCHIERVA VADRET DA (Nr. 93)'],['tschierva vtin da','tschierva v da'],['E23/04','E23/06'],True),  # area is 6.83/(0.4+5.81); FoG lat/lon suggests to include also the vadrettin
    'TSCHINGEL':(['TSCHINGELFIRN (Nr. 60)','Tschingelspitz-S*','Tschingelgrat-S*'],['tschingelfirn','tschingelspitz-s','tschingelgrat-s'],['A54M/21','A54M/51n','A54M/52n'],True),  # area 6.18/(5.22+0.01+0.006)
    'TSEUDET':('TSEUDET GLACIER DE (Nr. 40)','tseudet g de','B84/17',True),  # ok area 1.76/1.43
    'TSIDJIORE NOUVE':('TSIJIORE NOUVE GLACIER DE (Nr. 28)','tsijiore nouve g de','B73/16',True), # ok even though area 3.12/2.72
    'TURTMANN (WEST)':('TURTMANNGLETSCHER (Teilgl. von B60/09)','turtmanng','B60/21n',True),  # chaos in FoG: sometimes brunegggletscher is included in turtmanngletscher (see state table). TURTMANNGLETSCHER (Teilgl. von B60/09) is only about half (5.5) of turtmann-w in FoG (11km2)
    'UNT.GRINDELWALD':(['OBERS ISCHMEER (Teilgl. von A54L/19)','FIESCHERGLETSCHER BE (Teilgl. von A54L/19)'],['obers ischmeer','fiescherg be'],['A54L/31n','A54L/36n'],True),  # leave Unt. Grindelw. and make it parent
    'UNTERAAR':('UNTERAARGLETSCHER (Teilgl.von A54G/50n) Nr. 51','unteraarg','A54G/11',True), # ok area 22.7/22.5
    'VAL TORTA':('VALTORTA VADRET','valtorta v','E46/06',True),  # ok area 0.17/0.06
    'VALLEGGIA':('VALLEGGIA GH. DI (Nr. 117)','valleggia gh. di','C33/08',True),  # ok area 0.59/0.30
    'VALSOREY':('Valsorey (Teilgl. von B84/15)*','valsorey','B84/27n',True),  #  ok area 2.34/1.9
    'VERSTANKLA':('Verstanclagletscher (Teilgl. von A10G/08)','verstanclag','A10G/24n',True), #ok area 1.06/0.71
    'VORAB':('VORAB GLATSCHER DIL (Nr. 85)','vorab glatscher dil','A14P/01',True), # ok area 2.51/1.22, could also be Vorabsattel-W* (but drain ins other valley)
    'VORDRE SCHMADRI':('VORDRE SCHMADRIGLETSCHER','vordre schmadrig','A54M/15',True),  # ok no area in FoG
    'WALLENBUR':('WALLENBURFIRN (Nr. 71)','wallenburf','A51F/24',True), # ok area 1.7/1.41
    'WANNENHORN GL. N':('WANNENHORNGLETSCHER-NW (Teilgl. von B36/57n)','wannenhorng-nw','B36/32',True), # must be this one due to FoG lat/lon
    'WANNENHORN GL. S':('WANNENHORNGLETSCHER-SE (Teilgl. von B36/57n)','wannenhorng-se','B36/33',True), # must be this one due to FoG lat/lon
    'WITENWASSEREN':('WITENWASSERENGLETSCHER','witenwassereng','A51E/20',True), # no FoG area given....(could also include Witenwasseren-W*, but drains in another valley)
    'ZENBAECHEN GL.':('ZENBAECHENGLETSCHER','zenbaecheng','B36/18',True), # no FoG area given
    'ZINAL':('ZINAL GLACIER DE (Nr. 22)','zinal g de','B63/05',True),  # ok area 16/13.3 
    'ZMUTT':('ZMUTTGLETSCHER (Nr. 15)','zmuttg','B57/05',True),  # ok area 17.4/13.7    
}

# removed:
# 'WANNENHORN GL.':('','','',False),  # is only parent glacier, parent IDs for S/N already present, so can be deleted from dict
# 'TAELLIBODEN':('','','',False),  # no longer digitized in Mauro's inventory => seems to be totally disappeared on orthophoto
# 'RAETZLI (PLAINE MORTE)':('GLACIER DE LA PLAINE MORTE (Nr. 65)','g de la plaine morte','A55F/03',True),  # more or less the same as PLAINE MORTE, min_elevation in PDB except for confusing entry by Huss 2011
# 'FLUCHTHORN GL.':('','','', False), # should be Fluchthorn-NE* B52/26 and maybe also Fluchthorn-E* B52/25 (area 0.37/(0.26+0.01)); problem is hangender ferner inbetween; is taken out due to Michi's suggestion to keep old glacier (unknown which it is and include a duplicate)

Begin to establish new dataset containing the columns from the CHANGE file

1) Determine which glaciers glaciers are already in FoG, which are problematic and which need a new ID


In [227]:
take_over_id = {}                                    # those glaciers that can take over a FoG ID
problems = {}                                        # those glaciers where it is unclear whether they can take over an ID 
new_id = pdfll.Glacier_name_SGI2010.values.tolist()  # those glaciers from Mauro's DB that need a new ID (all - (take_over_id + problems))

print(len(new_id))
print('A54E/24' in new_id)
for key,value in links.items():
    # assign take over/problems
    if value[-1] == True:
        take_over_id[key] = value
    else:
        problems[key] = value
        
    # check which still need a new ID
    if value[-1] == True: 
        if isinstance(value[-2], str):
            new_id.remove(value[-2])
        elif isinstance(value[-2], list):
            pass
            for element in value[-2]:
                new_id.remove(element)
        else:
            raise ValueError('%s neither list nor string' % value[-2])
        
        
print(sum([len(problems[key][-2]) for key,value in problems.items()])) # sum of all SGI2010 short names => should be zero for problems
print(sum([len(take_over_id[key][-2]) if isinstance(take_over_id[key][-2],list) else 1 for key,value in take_over_id.items()])) # # sum of all SGI2010 short names
print(len(new_id)) # sum of all SGI2010 short names that need a new ID in FoG


1419
True
0
187
1232

ATTENTION: As long as not all problems are solved, we generate too many new IDs!!

Make some new raw DataFrames and adjust columns


In [338]:
new_pda = pd.DataFrame(columns=pda.columns.values)
new_pdb = pd.DataFrame(columns=pdb.columns.values)
new_pdd = pd.DataFrame(columns=pdd.columns.values)

In [341]:
new_pda = new_pda[['POLITICAL_UNIT', 'NAME', 'WGMS_ID', 'RIVER_BASIN', 'FREE_POSITION',
                                   'LOCAL_CODE', 'LOCAL_PSFG', 'GEN_LOCATION', 'SPEC_LOCATION', 'LATITUDE', 'LONGITUDE', 
                                   'PRIM_CLASSIFIC', 'FORM', 'FRONTAL_CHARS', 'EXPOS_ACC_AREA', 'EXPOS_ABL_AREA', 
                                   'PARENT_GLACIER', 'REMARKS']]#, 'GEO-REGION_CODE', 'GEO-SUBREGION_CODE', 'AREA']]

new_pdb = new_pdb[['POLITICAL_UNIT', 'NAME', 'WGMS_ID', 'YEAR', 'HIGHEST_ELEVATION','MEDIAN_ELEVATION', 
                                   'LOWEST_ELEVATION', 'ELEVATION_UNC', 'LENGTH', 'LENGTH_UNC', 'AREA', 'AREA_UNC', 
                                   'SURVEY_DATE', 'SURVEY_PLATFORM_METHOD', 'INVESTIGATOR', 'SPONS_AGENCY', 
                                   'REFERENCE', 'REMARKS']]

new_pdd = new_pdd[['POLITICAL_UNIT', 'NAME', 'WGMS_ID', 'YEAR', 'LOWER_BOUND','UPPER_BOUND', 
                                   'AREA_SURVEY_YEAR', 'AREA_CHANGE', 'AREA_CHANGE_UNC', 'THICKNESS_CHG', 
                                   'THICKNESS_CHG_UNC', 'VOLUME_CHANGE', 'VOLUME_CHANGE_UNC', 'SURVEY_DATE', 
                                   'SD_PLATFORM_METHOD', 'REFERENCE_DATE', 'RD_PLATFORM_METHOD', 
                                   'INVESTIGATOR', 'SPONS_AGENCY', 'REFERENCE', 'REMARKS']]

Create some new arbitrary IDs, beginning at given number


In [339]:
begin_new_id = 4585  # where to start with assigning new WGMS IDs
new_id_range = list(range(begin_new_id, begin_new_id+len(pdf)+1,1))

Some correction rules to transform Mauro's names to FoG names


In [340]:
name_adjust_FoG = {'gletscher':'', 'ferner':'','gh.':'ghiacciaio', 'gl.':'','teilgl. von':'part of', 'teil ':'part ', ' von ':' of ', '*':'', 'nr.':'no.', 'haupt':'main', 'zus\'gesetzt':'combined', 'nur fläche':'only area', 'nur':'only', 'flaeche':'area', 'sammler':'group of glacierets', 'ost':'e','nord':'n','ä':'ae', 'ö':'oe', 'ü':'ue', 'é':'e', 'à':'a', 'è':'e', 'ô':'o', 'ß':'ss'} # 'firn':'',
hot_list = ['glacier', 'vadret', 'ghiacciaio', 'vedretta', 'glatscher', 'vadret', 'vadrec.'] # vadrec. is important as also 'vadrecc' appears (regex search) 
special_names ={'GLACIER DE LA PLAINE MORTE (Nr. 65)':'PLAINE MORTE, GLACIER DE LA',
             'PLAINE MORTE-W GLACIER DE LA':'PLAINE MORTE GLACIER DE LA-W',
             'PLAINE MORTE-E GLACIER DE LA':'PLAINE MORTE GLACIER DE LA-E'}

Function to convert Mauro's name to FoG format


In [342]:
def name_to_FoG(name, replace_dict, hot_list, special_names):
    
    # check if it is a very special name
    if name in special_names.keys():
        return special_names[name]
    
    # to make it easier comparable
    name = name.lower()
    
    # replace some predefined stuff
    for key in replace_dict.keys():
        #key_regex = re.compile(key)
        #if 'teil' in name:
        #    print(name)
        if key in name:
        #if re.match(key_regex, name):
            name = name.replace(key, replace_dict[key])
        #print(name)
    
    # replace the bracket stuff
    #start = name.find('(')
    #if start != -1:
    #    name = name[0:start]
    #name = name.replace('*', '')
    #name = name.strip()
    
    # if the name is already sorted in the desired way (e.g. 'lai blau glatscher dil'), insert a comma as specified by Michi
    
    
    # reorder the words
    splitname = name.split(' ')
    if len(splitname) >= 3:
        for hotname in hot_list:
            hotname_regex = re.compile(hotname)  # regex search due to "vadrecc" problem
            if re.match(hotname_regex, name) and len(splitname)>1:
                print(name, hotname,splitname)
                try:
                    resort_ix = splitname.index(hotname)
                except:
                    resort_ix = splitname.index(hotname_regex)
                if resort_ix == 0: # e.g. "glacier de BLA"
                    name = splitname[-1]+', '+' '.join(splitname[:-2])
                    print(name)
                elif resort_ix == 1: # e.g. "BLA glacier de"
                    name = splitname[0]+', '+' '.join(splitname[1:])
                else:
                    pass
    
    # strip name (there might be spaces from string rebuilding)
    name = name.strip()
    return name.upper()

Constants that can be inserted glacier-independent


In [343]:
# some constants
POL_UNIT = 'CH'
RGI_REGION = 'Central Europe'
RGI_SUBREGION = 'Alps'
MISS_VAL = 9999
SD_PLATFORM_METHOD = 'aC'
RD_PLATFORM_METHOD = 'aC'
SD_METHOD = 'PL'
RD_METHOD = 'M'
REFERENCE = 'Fischer et al. (2014); Arctic, Antarctic and Alpine Research, 46(4), pp.933-945'
AGENCY = 'Department of Geosciences, University of Fribourg, 1700 Fribourg, Switzerland'
INVESTIGATOR = 'Fischer et al.'
PUB_DATE = int(2016)
LENGTH_UNC_FACTOR = 0.05   # actually 2-5%, but specification says "maximum" error
# ELEV_UNC = 3.0   # vertical accuracy 1-3m for swissimage (see Fischer et al. 2014); not used as also digitizing error and terrain steepness plays a role

Function to determine the area uncertainty (unused)


In [344]:
def area_unc(area):
    AREA_UNC_FACTOR_S = 0.076  # for Small glaciers (<1km2)
    AREA_UNC_FACTOR_M = 0.047  # for Medium glaciers (1-5km2)
    AREA_UNC_FACTOR_L = 0.076  # for Large glaciers (>5km2)
    
    if area <1.:
        area_uncert = area * AREA_UNC_FACTOR_S
    elif 1. <= area <= 5.:
        area_uncert = area * AREA_UNC_FACTOR_M
    elif area > 5.:
        area_uncert = area * AREA_UNC_FACTOR_L
    return area_uncert

Function to create new entries with ID already in FoG


In [345]:
def create_entries_id_present(pda, pdd, pdfll, pdf, new_pda, new_pdb, new_pdd, take_over_dict, new_ids):

    for key,val in take_over_id.items():

        # two new DFs for the different FoG files
        glacier_pdd = pd.DataFrame(columns=new_pdd.columns.values)
        glacier_pda = pd.DataFrame(columns=new_pda.columns.values)
        glacier_pdb = pd.DataFrame(columns=new_pdb.columns.values)

        # present WGMS ID
        gid = pda[pda.NAME == key].WGMS_ID.values[0]

        # Mauros glaciers entry: 
        if isinstance(val[-2], str): # if there is only one equivalent: fill only PDB and PDD and leave PDA as it is
            mg = pdf[pdf.Code_SGI2010 == val[-2]]
            mg_ll = pdfll[pdfll.Glacier_name_SGI2010 == val[-2]]
            
            # set the REMARKS (added at the end)
            REMARKS_pdd = ''
            REMARKS_pdb = ''

            if not mg.empty:
                glacier_pdd.loc[gid, 'POLITICAL_UNIT'] = POL_UNIT
                glacier_pdd.loc[gid, 'NAME'] = pda[pda.WGMS_ID == gid].NAME.iloc[0]
                glacier_pdd.loc[gid, 'WGMS_ID'] = gid
                glacier_pdd.loc[gid, 'YEAR'] = mg.t2_year.iloc[0]
                glacier_pdd.loc[gid, 'LOWER_BOUND'] = MISS_VAL
                glacier_pdd.loc[gid, 'UPPER_BOUND'] = MISS_VAL
                glacier_pdd.loc[gid, 'AREA_SURVEY_YEAR'] = round(mg.area_t2_km2.iloc[0],3)
                glacier_pdd.loc[gid, 'AREA_CHANGE'] = round(mg.area_t2_km2.iloc[0] - mg.area_t1_km2.iloc[0]) * 1000  # *1000: unit difference (1000m2 and km2)
                #glacier_pdd.loc[gid, 'AREA_CHANGE_UNC'] = round((area_unc(mg.area_t1_km2.iloc[0]) + area_unc(mg.area_t2_km2.iloc[0]))* 1000, 2)  # error is addition of both errors for t1 and t2 in worst case; *1000 for unit
                glacier_pdd.loc[gid, 'THICKNESS_CHG'] = round(((mg.dvol_mio_m3_between_t1_and_t2.iloc[0] * 10**6) / ((mg.area_t2_km2.iloc[0] + mg.area_t1_km2.iloc[0]) * 0.5 * 10**6)) * 1000, -1) # *1000 for conversion from m thickness change to mm change
                #glacier_pdd.loc[gid, 'THICKNESS_CHG_UNC'] = np.nan  # berechnen? AREA_UNC fehlt und der Fehler durch Mittelung
                glacier_pdd.loc[gid, 'VOLUME_CHANGE'] = round(mg.dvol_mio_m3_between_t1_and_t2.iloc[0] *1000)   # *1000: unit difference (1000m3 / 1000000m3)
                glacier_pdd.loc[gid, 'VOLUME_CHANGE_UNC'] = round(mg.uncertainty_dvol_between_t1_and_t2_mio_m3.iloc[0]) # *1000:unit difference (1000m3 / 1000000m3)
                glacier_pdd.loc[gid, 'SURVEY_DATE'] = int(mg.t2_year *10000 + 9999)  # in order to fulfill the requirements (month/day unknown)
                glacier_pdd.loc[gid, 'SD_PLATFORM_METHOD'] = SD_PLATFORM_METHOD # must be determined
                REMARKS_pdd = REMARKS_pdd + ' Survey date method: %s.' % SD_METHOD
                glacier_pdd.loc[gid, 'REFERENCE_DATE'] = int(mg.t1_year *10000 + 9999)  # in order to fulfill the requirements (month/day unknown)
                glacier_pdd.loc[gid, 'RD_PLATFORM_METHOD'] = RD_PLATFORM_METHOD   # must be determined
                REMARKS_pdd = REMARKS_pdd + ' Reference date method: %s.' % RD_METHOD
                #glacier_pdd.PUB_IN_FOG = PUB_DATE      # not needed in data submission form
                glacier_pdd.loc[gid, 'INVESTIGATOR'] = INVESTIGATOR
                glacier_pdd.loc[gid, 'SPONS_AGENCY'] = AGENCY
                glacier_pdd.loc[gid, 'REFERENCE'] = REFERENCE
                REMARKS_pdd = REMARKS_pdd + ' ID SGI 1973: %s.' % mg.ID_SGI1973.iloc[0]
                REMARKS_pdd = REMARKS_pdd + ' ID SGI 2010: %s.' % mg.Code_SGI2010.iloc[0]
                # at the very end 
                glacier_pdd.loc[gid, 'REMARKS'] = REMARKS_pdd
                
                new_pdd = pd.concat([new_pdd, glacier_pdd], ignore_index=True)

                
                # fill PDB                                           
                glacier_pdb.loc[gid, 'POLITICAL_UNIT'] = POL_UNIT
                glacier_pdb.loc[gid, 'NAME'] = pda[pda.WGMS_ID == gid].NAME.iloc[0]
                glacier_pdb.loc[gid, 'WGMS_ID'] = gid
                glacier_pdb.loc[gid, 'YEAR'] = mg.t2_year.iloc[0]
                glacier_pdb.loc[gid, 'HIGHEST_ELEVATION'] = mg_ll['max_elevation (masl)'].iloc[0]
                glacier_pdb.loc[gid, 'MEDIAN_ELEVATION'] = mg_ll['median_elevation (masl)'].iloc[0]
                glacier_pdb.loc[gid, 'LOWEST_ELEVATION'] = mg_ll['min_elevation (masl)'].iloc[0]
                #glacier_pdb.loc[gid, 'ELEVATION_UNC'] = np.nan
                glacier_pdb.loc[gid, 'LENGTH'] = round(mg_ll['length (km)'].iloc[0],2)
                #glacier_pdb.loc[gid, 'LENGTH_UNC'] = round(mg_ll['length (km)'].iloc[0] * LENGTH_UNC_FACTOR, -1)  # round to next ten meters
                glacier_pdb.loc[gid, 'AREA'] = round(mg_ll['area(km2)'].iloc[0],3)
                #glacier_pdb.loc[gid, 'AREA_UNC'] = round(area_unc(mg.area_t1_km2.iloc[0]), 2)  # round to 0.01km2
                glacier_pdb.loc[gid, 'SURVEY_DATE'] = mg_ll.year.iloc[0]
                glacier_pdb.loc[gid, 'SURVEY_PLATFORM_METHOD'] = SD_PLATFORM_METHOD
                REMARKS_pdb = REMARKS_pdb + ' Survey date method: %s.' % SD_METHOD
                glacier_pdb.loc[gid, 'INVESTIGATOR'] = INVESTIGATOR
                glacier_pdb.loc[gid, 'SPONS_AGENCY'] = AGENCY
                glacier_pdb.loc[gid, 'REFERENCE'] = REFERENCE
                glacier_pdb.loc[gid, 'REMARKS'] = REMARKS_pdb
                                                           
                new_pdb = pd.concat([new_pdb, glacier_pdb], ignore_index=True)

                                                           
        elif isinstance(val[-2], list):  ## multiple polygons with PARENT_GLACIER

            for sgiid in val[-2]:

                # Mauro's glacier
                mg = pdf[pdf.Code_SGI2010 == sgiid]
                mg_ll = pdfll[pdfll.Glacier_name_SGI2010 == sgiid]

                # Parent glacier ID
                pg_id = gid
                # new ID for the entries themselves and pop the index so that it is not used twice
                new_id = new_ids[0]
                new_ids.pop(0)


                # set the REMARKS (added at the end)
                REMARKS_pdd = ''
                REMARKS_pda = ''
                REMARKS_pdb = ''

                if not mg.empty:
                    glacier_pdd.loc[gid, 'POLITICAL_UNIT'] = POL_UNIT
                    glacier_pdd.loc[gid, 'NAME'] = name_to_FoG(mg.Glacier_name_SGI2010.iloc[0], name_adjust_FoG, hot_list, special_names)
                    glacier_pdd.loc[gid, 'WGMS_ID'] = new_id
                    glacier_pdd.loc[gid, 'YEAR'] = mg.t2_year.iloc[0]
                    glacier_pdd.loc[gid, 'LOWER_BOUND'] = MISS_VAL
                    glacier_pdd.loc[gid, 'UPPER_BOUND'] = MISS_VAL
                    glacier_pdd.loc[gid, 'AREA_SURVEY_YEAR'] = round(mg.area_t2_km2.iloc[0],3)
                    glacier_pdd.loc[gid, 'AREA_CHANGE'] = round((mg.area_t2_km2.iloc[0] - mg.area_t1_km2.iloc[0]) * 1000)  # *1000: unit difference (1000m2 and km2)
                    #glacier_pdd.loc[gid, 'AREA_CHANGE_UNC'] = round((area_unc(mg.area_t1_km2.iloc[0]) + area_unc(mg.area_t2_km2.iloc[0]))* 1000, -1)  # error is addition of both errors for t1 and t2 in worst case; *1000 for unit
                    glacier_pdd.loc[gid, 'THICKNESS_CHG'] = round(((mg.dvol_mio_m3_between_t1_and_t2.iloc[0] * 10**6) / ((mg.area_t2_km2.iloc[0] + mg.area_t1_km2.iloc[0]) * 0.5 * 10**6)) * 1000) # *1000 for conversion from m thickness change to mm change
                    #glacier_pdd.loc[gid, 'THICKNESS_CHG_UNC'] = np.nan  # berechnen? AREA_UNC fehlt und der Fehler durch Mittelung
                    glacier_pdd.loc[gid, 'VOLUME_CHANGE'] = round(mg.dvol_mio_m3_between_t1_and_t2.iloc[0] *1000)   # *1000: unit difference (1000m3 / 1000000m3)
                    glacier_pdd.loc[gid, 'VOLUME_CHANGE_UNC'] = round(mg.uncertainty_dvol_between_t1_and_t2_mio_m3.iloc[0]) # *1000:unit difference (1000m3 / 1000000m3)
                    glacier_pdd.loc[gid, 'SURVEY_DATE'] = int(mg.t2_year *10000 + 9999)  # in order to fulfill the requirements (month/day unknown)
                    glacier_pdd.loc[gid, 'SD_PLATFORM_METHOD'] = SD_PLATFORM_METHOD # must be determined
                    REMARKS_pdd = REMARKS_pdd + ' Survey date method: %s.' % SD_METHOD
                    glacier_pdd.loc[gid, 'REFERENCE_DATE'] = int(mg.t1_year.iloc[0] *10000 + 9999)  # in order to fulfill the requirements (month/day unknown)
                    glacier_pdd.loc[gid, 'RD_PLATFORM_METHOD'] = RD_PLATFORM_METHOD   # must be determined
                    REMARKS_pdd = REMARKS_pdd + ' Reference date method: %s.' % RD_METHOD
                    #glacier_pdd.PUB_IN_FOG = PUB_DATE      # not needed in data submission form
                    glacier_pdd.loc[gid, 'INVESTIGATOR'] = INVESTIGATOR
                    glacier_pdd.loc[gid, 'SPONS_AGENCY'] = AGENCY
                    glacier_pdd.loc[gid, 'REFERENCE'] = REFERENCE
                    REMARKS_pdd = REMARKS_pdd + ' ID SGI 1973: %s.' % mg.ID_SGI1973.iloc[0]
                    REMARKS_pdd = REMARKS_pdd + ' ID SGI 2010: %s.' % mg.Code_SGI2010.iloc[0]
                    # at the very end 
                    glacier_pdd.loc[gid, 'REMARKS'] = REMARKS_pdd

                    new_pdd = pd.concat([new_pdd, glacier_pdd], ignore_index=True)
                                                               
                                                               
                    # Fill PDB
                    glacier_pdb.loc[gid, 'POLITICAL_UNIT'] = POL_UNIT
                    glacier_pdb.loc[gid, 'NAME'] = name_to_FoG(mg_ll.Names.iloc[0], name_adjust_FoG, hot_list, special_names)
                    glacier_pdb.loc[gid, 'WGMS_ID'] = new_id
                    glacier_pdb.loc[gid, 'YEAR'] = mg.t2_year.iloc[0]
                    glacier_pdb.loc[gid, 'HIGHEST_ELEVATION'] = mg_ll['max_elevation (masl)'].iloc[0]
                    glacier_pdb.loc[gid, 'MEDIAN_ELEVATION'] = mg_ll['median_elevation (masl)'].iloc[0]
                    glacier_pdb.loc[gid, 'LOWEST_ELEVATION'] = mg_ll['min_elevation (masl)'].iloc[0]
                    #glacier_pdb.loc[gid, 'ELEVATION_UNC'] = np.nan
                    glacier_pdb.loc[gid, 'LENGTH'] = round(mg_ll['length (km)'].iloc[0], 2)
                    #glacier_pdb.loc[gid, 'LENGTH_UNC'] = mg_ll['length (km)'].iloc[0] * LENGTH_UNC_FACTOR
                    glacier_pdb.loc[gid, 'AREA'] = round(mg_ll['area(km2)'].iloc[0],3)
                    #glacier_pdb.loc[gid, 'AREA_UNC'] = round(area_unc(mg.area_t1_km2.iloc[0]), 2)
                    glacier_pdb.loc[gid, 'SURVEY_DATE'] = mg_ll.year.iloc[0]
                    glacier_pdb.loc[gid, 'SURVEY_PLATFORM_METHOD'] = SD_PLATFORM_METHOD
                    REMARKS_pdb = REMARKS_pdb + ' Survey date method: %s.' % SD_METHOD
                    glacier_pdb.loc[gid, 'INVESTIGATOR'] = INVESTIGATOR
                    glacier_pdb.loc[gid, 'SPONS_AGENCY'] = AGENCY
                    glacier_pdb.loc[gid, 'REFERENCE'] = REFERENCE
                    glacier_pdb.loc[gid, 'REMARKS'] = REMARKS_pdb

                    new_pdb = pd.concat([new_pdb, glacier_pdb], ignore_index=True)


                if not mg_ll.empty:
                    glacier_pda.loc[gid, 'POLTITICAL_UNIT'] = POL_UNIT
                    glacier_pda.loc[gid, 'NAME'] = name_to_FoG(mg_ll.Names.iloc[0].upper(),name_adjust_FoG, hot_list, special_names)
                    glacier_pda.loc[gid, 'WGMS_ID'] = new_id 
                    glacier_pda.loc[gid, 'LATITUDE'] = round(mg_ll['y WGS84)'].iloc[0], 6)
                    glacier_pda.loc[gid, 'LONGITUDE'] = round(mg_ll['Location(x WGS84)'].iloc[0], 6)
                    glacier_pda.loc[gid, 'REGION'] = RGI_REGION
                    glacier_pda.loc[gid, 'SUBREGION'] = RGI_SUBREGION
                    glacier_pda.loc[gid, 'PARENT_GLACIER'] = pg_id
                    glacier_pda.loc[gid, 'SGI_2010_ID'] = sgiid

                new_pda = pd.concat([new_pda, glacier_pda], ignore_index=True)

    return new_pda, new_pdb, new_pdd, new_ids

In [346]:
new_pda, new_pdb, new_pdd, new_id_range = create_entries_id_present(pda, pdd, pdfll, pdf, new_pda, new_pdb, new_pdd, take_over_id, new_id_range)


glacier de la plaine morte (no. 65) glacier ['glacier', 'de', 'la', 'plaine', 'morte', '(no.', '65)']
65), glacier de la plaine morte

Function to create entries with ID that has to be introduced into FoG


In [348]:
def create_entries_new_id(pda, pdd, pdfll, pdf, new_pda, new_pdb, new_pdd, new_id_list, new_ids):

    for id_2010 in new_id_list:

        # two new DFs for the different FoG files
        glacier_pdd = pd.DataFrame(columns=new_pdd.columns.values)
        glacier_pda = pd.DataFrame(columns=new_pda.columns.values)
        glacier_pdb = pd.DataFrame(columns=new_pdb.columns.values)

        # Mauro's entry: 
        mg = pdf[pdf.Code_SGI2010 == id_2010]
        mg_ll = pdfll[pdfll.Glacier_name_SGI2010 == id_2010]
        
        # new ID for the entries themselves and pop the index so that it is not used twice
        new_id = new_ids[0]
        new_ids.pop(0)

        # set the REMARKS (added at the end)
        REMARKS_pdd = ''
        REMARKS_pdb = ''

        glacier_pda.loc[new_id, 'POLTITICAL_UNIT'] = POL_UNIT
        glacier_pda.loc[new_id, 'NAME'] = name_to_FoG(mg_ll.Names.iloc[0].upper(),name_adjust_FoG, hot_list, special_names)
        glacier_pda.loc[new_id, 'WGMS_ID'] = int(new_id) 
        glacier_pda.loc[new_id, 'LATITUDE'] = round(mg_ll['y WGS84)'].iloc[0], 6)
        glacier_pda.loc[new_id, 'LONGITUDE'] = round(mg_ll['Location(x WGS84)'].iloc[0], 6)
        glacier_pda.loc[new_id, 'REGION'] = RGI_REGION
        glacier_pda.loc[new_id, 'SUBREGION'] = RGI_SUBREGION
        glacier_pda.loc[new_id, 'SGI_2010_ID'] = id_2010

        new_pda = pd.concat([new_pda, glacier_pda], ignore_index=True)


        if not mg.empty:
            glacier_pdd.loc[new_id, 'POLITICAL_UNIT'] = POL_UNIT
            glacier_pdd.loc[new_id, 'NAME'] = name_to_FoG(mg_ll.Names.iloc[0].upper(),name_adjust_FoG, hot_list, special_names)
            glacier_pdd.loc[new_id, 'WGMS_ID'] = new_id
            glacier_pdd.loc[new_id, 'YEAR'] = mg.t2_year.iloc[0]
            glacier_pdd.loc[new_id, 'LOWER_BOUND'] = MISS_VAL
            glacier_pdd.loc[new_id, 'UPPER_BOUND'] = MISS_VAL
            glacier_pdd.loc[new_id, 'AREA_SURVEY_YEAR'] = round(mg.area_t2_km2.iloc[0],3)
            glacier_pdd.loc[new_id, 'AREA_CHANGE'] = round((mg.area_t2_km2.iloc[0] - mg.area_t1_km2.iloc[0]) * 1000)  # *1000: unit difference (1000m2 and km2)
            #glacier_pdd.loc[new_id, 'AREA_CHANGE_UNC'] = round((area_unc(mg.area_t1_km2.iloc[0]) + area_unc(mg.area_t2_km2.iloc[0]))* 1000, -1)  # error is addition of both errors for t1 and t2 in worst case; *1000 for unit
            glacier_pdd.loc[new_id, 'THICKNESS_CHG'] = round(((mg.dvol_mio_m3_between_t1_and_t2.iloc[0] * 10**6) / ((mg.area_t2_km2.iloc[0] + mg.area_t1_km2.iloc[0]) * 0.5 * 10**6)) * 1000) # *1000 for conversion from m thickness change to mm change
            #glacier_pdd.loc[new_id, 'THICKNESS_CHG_UNC'] = np.nan  # berechnen? AREA_UNC fehlt und der Fehler durch Mittelung
            glacier_pdd.loc[new_id, 'VOLUME_CHANGE'] = round(mg.dvol_mio_m3_between_t1_and_t2.iloc[0] *1000)   # *1000: unit difference (1000m3 / 1000000m3)
            glacier_pdd.loc[new_id, 'VOLUME_CHANGE_UNC'] = round(mg.uncertainty_dvol_between_t1_and_t2_mio_m3.iloc[0]) # *1000:unit difference (1000m3 / 1000000m3)
            glacier_pdd.loc[new_id, 'SURVEY_DATE'] = int(mg.t2_year *10000 + 9999)  # in order to fulfill the requirements (month/day unknown)
            glacier_pdd.loc[new_id, 'SD_PLATFORM_METHOD'] = SD_PLATFORM_METHOD # must be determined
            REMARKS_pdd = REMARKS_pdd + ' Survey date method: %s.' % SD_METHOD
            glacier_pdd.loc[new_id, 'REFERENCE_DATE'] = int(mg.t1_year *10000 + 9999)  # in order to fulfill the requirements (month/day unknown)
            glacier_pdd.loc[new_id, 'RD_PLATFORM_METHOD'] = RD_PLATFORM_METHOD   # must be determined
            REMARKS_pdd = REMARKS_pdd + ' Reference date method: %s.' % RD_METHOD
            #glacier_pdd.PUB_IN_FOG = PUB_DATE      # not needed in data submission form
            glacier_pdd.loc[new_id, 'INVESTIGATOR'] = INVESTIGATOR
            glacier_pdd.loc[new_id, 'SPONS_AGENCY'] = AGENCY
            glacier_pdd.loc[new_id, 'REFERENCE'] = REFERENCE
            REMARKS_pdd = REMARKS_pdd + ' ID SGI 1973: %s.' % mg.ID_SGI1973.iloc[0]
            REMARKS_pdd = REMARKS_pdd + ' ID SGI 2010: %s.' % mg.Code_SGI2010.iloc[0]
            # at the very end 
            glacier_pdd.loc[new_id, 'REMARKS'] = REMARKS_pdd

            new_pdd = pd.concat([new_pdd, glacier_pdd], ignore_index=True)
                                                          
                                                          
            # Fill PDB
            glacier_pdb.loc[new_id, 'POLITICAL_UNIT'] = POL_UNIT
            glacier_pdb.loc[new_id, 'NAME'] = name_to_FoG(mg_ll.Names.iloc[0].upper(),name_adjust_FoG, hot_list, special_names)
            glacier_pdb.loc[new_id, 'WGMS_ID'] = new_id
            glacier_pdb.loc[new_id, 'YEAR'] = mg.t2_year.iloc[0]
            glacier_pdb.loc[new_id, 'HIGHEST_ELEVATION'] = mg_ll['max_elevation (masl)'].iloc[0]
            glacier_pdb.loc[new_id, 'MEDIAN_ELEVATION'] = mg_ll['median_elevation (masl)'].iloc[0]
            glacier_pdb.loc[new_id, 'LOWEST_ELEVATION'] = mg_ll['min_elevation (masl)'].iloc[0]
            #glacier_pdb.loc[new_id, 'ELEVATION_UNC'] = np.nan
            glacier_pdb.loc[new_id, 'LENGTH'] = round(mg_ll['length (km)'].iloc[0], 2)
            #glacier_pdb.loc[new_id, 'LENGTH_UNC'] = mg_ll['length (km)'].iloc[0] * LENGTH_UNC_FACTOR
            glacier_pdb.loc[new_id, 'AREA'] = round(mg_ll['area(km2)'].iloc[0], 3)
            #glacier_pdb.loc[new_id, 'AREA_UNC'] = round(area_unc(mg.area_t1_km2.iloc[0]), 2)
            glacier_pdb.loc[new_id, 'SURVEY_DATE'] = mg_ll.year.iloc[0]
            glacier_pdb.loc[new_id, 'SURVEY_PLATFORM_METHOD'] = SD_PLATFORM_METHOD
            REMARKS_pdb = REMARKS_pdb + ' Survey date method: %s.' % SD_METHOD
            glacier_pdb.loc[new_id, 'INVESTIGATOR'] = INVESTIGATOR
            glacier_pdb.loc[new_id, 'SPONS_AGENCY'] = AGENCY
            glacier_pdb.loc[new_id, 'REFERENCE'] = REFERENCE
            glacier_pdb.loc[new_id, 'REMARKS'] = REMARKS_pdb

            new_pdb = pd.concat([new_pdb, glacier_pdb], ignore_index=True)
                                                          
    return new_pda, new_pdb, new_pdd, new_ids

In [349]:
(new_pda, new_pdb, new_pdd, new_id_range) = create_entries_new_id(pda, pdd, pdfll, pdf, new_pda, new_pdb, new_pdd, new_id, new_id_range)

Find the GLIMS IDs in PDA

We have already put the same DF into both functions taking over IDs and creating new ones, so we don't need to concat them again


In [351]:
new_pda['GLIMS_ID'] = np.nan

ct=0
for sgi_2010_id in new_pda.SGI_2010_ID.values:
    try:
        new_pda.loc[new_pda[new_pda.SGI_2010_ID == sgi_2010_id].index, 'GLIMS_ID'] = glims[glims.local_id == sgi_2010_id].glac_id.iloc[0]
    except IndexError:
        #print('Not in GLIMS:',new_pda[new_pda.SGI_2010_ID == sgi_2010_id].NAME.iloc[0], sgi_2010_id)
        ct += 1
        continue

# Some glaciers don't have a GLIMS entry due to their size (~0.2km2)
print('%s glaciers are non im GLIMS' % ct)


142 glaciers are non im GLIMS

There are still name duplicates: Try and get the Kanton abbreviations to make them separable more easily (doesn't always work, but at least sometimes...)


In [352]:
# read Kanton borders
fc = fiona.open(ch_adm_path)

In [ ]:
namedupl_pda = [item for item, count in collections.Counter(new_pda.NAME.values).items() if count > 1]

for name in namedupl_pda:
    id_list = new_pda[new_pda.NAME == name].WGMS_ID.values
    
    for wgms_id in id_list:
        print(wgms_id)
        lon = new_pda[new_pda.WGMS_ID == wgms_id].LONGITUDE.iloc[0]
        lat = new_pda[new_pda.WGMS_ID == wgms_id].LATITUDE.iloc[0]
        print(lon, lat)
        point = Point(lon,lat)
        
        for feature in fc:
            if shape(feature['geometry']).contains(point):
                new_pda.loc[new_pda[new_pda.WGMS_ID == wgms_id].index,'NAME'] = name + ' ' + feature['properties']['HASC_1'].split('.')[1]

In [ ]:
namedupl_pdb = [item for item, count in collections.Counter(new_pdb.NAME.values).items() if count > 1]

for name in namedupl_pdb:
    id_list = new_pdb[new_pdb.NAME == name].WGMS_ID.values
    
    for wgms_id in id_list:
        print(wgms_id)
        try:
            lon = new_pda[new_pda.WGMS_ID == wgms_id].LONGITUDE.iloc[0]
            lat = new_pda[new_pda.WGMS_ID == wgms_id].LATITUDE.iloc[0]
            print(lon, lat)
            point = Point(lon,lat)
        except IndexError: # FoG entry already present
            print(name,wgms_id)
            continue
        
        for feature in fc:
            if shape(feature['geometry']).contains(point):
                new_pdb.loc[new_pdb[new_pdb.WGMS_ID == wgms_id].index,'NAME'] = name + ' ' + feature['properties']['HASC_1'].split('.')[1]

In [ ]:
namedupl_pdd = [item for item, count in collections.Counter(new_pdd.NAME.values).items() if count > 1]

for name in namedupl_pdd:
    id_list = new_pdd[new_pdd.NAME == name].WGMS_ID.values
    
    for wgms_id in id_list:
        print(wgms_id)
        try:
            lon = new_pda[new_pda.WGMS_ID == wgms_id].LONGITUDE.iloc[0]
            lat = new_pda[new_pda.WGMS_ID == wgms_id].LATITUDE.iloc[0]
            print(lon, lat)
            point = Point(lon,lat)
        except IndexError: # FoG entry already present
            print(name,wgms_id)
            continue
        
        for feature in fc:
            if shape(feature['geometry']).contains(point):
                new_pdd.loc[new_pdd[new_pdd.WGMS_ID == wgms_id].index,'NAME'] = name + ' ' + feature['properties']['HASC_1'].split('.')[1]

Sort the columns in order to be able to insert this easily into the data submission form


In [356]:
new_pda = new_pda[['POLTITICAL_UNIT', 'NAME', 'WGMS_ID', 'RIVER_BASIN', 'FREE_POSITION', 'LOCAL_CODE', 'LOCAL_PSFG', 'GEN_LOCATION', 'SPEC_LOCATION', 'LATITUDE', 'LONGITUDE', 'PRIM_CLASSIFIC', 'FORM', 'FRONTAL_CHARS', 'EXPOS_ACC_AREA', 'EXPOS_ABL_AREA', 'PARENT_GLACIER', 'REMARKS', 'REGION', 'SUBREGION', 'GLIMS_ID']]
new_pdb = new_pdb[['POLITICAL_UNIT', 'NAME', 'WGMS_ID', 'YEAR', 'HIGHEST_ELEVATION', 'MEDIAN_ELEVATION', 'LOWEST_ELEVATION', 'ELEVATION_UNC', 'LENGTH', 'LENGTH_UNC', 'AREA', 'AREA_UNC', 'SURVEY_DATE', 'SURVEY_PLATFORM_METHOD', 'INVESTIGATOR', 'SPONS_AGENCY', 'REFERENCE', 'REMARKS']]
new_pdd = new_pdd[['POLITICAL_UNIT', 'NAME', 'WGMS_ID', 'YEAR', 'LOWER_BOUND', 'UPPER_BOUND', 'AREA_SURVEY_YEAR', 'AREA_CHANGE', 'AREA_CHANGE_UNC', 'THICKNESS_CHG', 'THICKNESS_CHG_UNC', 'VOLUME_CHANGE', 'VOLUME_CHANGE_UNC', 'SURVEY_DATE', 'SD_PLATFORM_METHOD', 'REFERENCE_DATE', 'RD_PLATFORM_METHOD', 'INVESTIGATOR', 'SPONS_AGENCY', 'REFERENCE', 'REMARKS']]

Convert to Excel format


In [357]:
new_pda.to_excel('c:\\users\\jlandman\\Desktop\\PDA_submission.xlsx', index=False)

In [358]:
new_pdb.to_excel('c:\\users\\jlandman\\Desktop\\PDB_submission.xlsx', index=False)

In [359]:
new_pdd.to_excel('c:\\users\\jlandman\\Desktop\\PDD_submission.xlsx', index=False)

In [209]:
len(list(set(links.keys())-set(new_pda.NAME.values)))


Out[209]:
153

In [211]:
len(list(set(new_pda.NAME.values)-set([name_to_FoG(i,name_adjust_FoG, hot_list, special_names) for i in listlist])))


Out[211]:
1221

Links to RGI

Check is point is within an RGI polygon


In [ ]:
rgi = fiona.open(rgi_CE)
rgi_links = new_pda.copy()
rgi_links_present = pda.copy()
rgi_links['RGI_ID'] = np.nan
rgi_links_present['RGI_ID'] = np.nan

for fog_id in rgi_links.WGMS_ID.values:
    ix = rgi_links.index[rgi_links.WGMS_ID == fog_id]
    print(fog_id)
    lon = rgi_links[rgi_links.WGMS_ID == fog_id].LONGITUDE.iloc[0]
    lat = rgi_links[rgi_links.WGMS_ID == fog_id].LATITUDE.iloc[0]
    print(lon, lat)
    point = Point(lon,lat)

    for feature in rgi:
        if shape(feature['geometry']).contains(point):
            print(feature['properties']['RGIId'], ix)
            rgi_links.loc[ix, 'RGI_ID'] = feature['properties']['RGIId']

            
rgi_links.to_csv('sgi2010_rgi_links.csv')

In [213]:
len(rgi_links.dropna(subset=['RGI_ID']))


Out[213]:
814