Modules


In [1]:
import pandas as pd
import numpy as np
import os

Files


In [2]:
michi = pd.read_csv('C:\\users\\jlandman\\Desktop\\michi.csv', encoding='latin-1', sep=',')
lecl = pd.read_csv('C:\\users\\jlandman\\Desktop\\lecl_extra.csv', encoding='latin-1')
world_links = pd.read_csv('C:\\Users\\jlandman\\Desktop\\Manual_links_WGMS_to_RGI_GlaThiDa_Leclercq_WORLD_20160212.csv', encoding='latin-1')
alps_links = pd.read_csv('C:\\Users\\jlandman\\Desktop\\ALPS_LINKS_FROM_GH.csv', encoding='latin-1')
sgi_links = pd.read_csv('C:\\Users\\jlandman\\Documents\\github\\glaciers-cci5\\glaciers_cci5\\sgi2010_rgi_links.csv', encoding='latin-1')
#ak_links = pd.read_csv('C:\\Users\\jlandman\\Desktop\\LeBris_Paul_to_FG\\Links_GLIMS_FoG_AK_final_v2.csv', encoding='latin-1')
#fog_2016_a = pd.read_csv('C:\\Users\\jlandman\\Desktop\\DOI-WGMS-FoG-2016-08\\WGMS-FoG-2016-08-A-GLACIER.csv', index_col='WGMS_ID', encoding='latin-1')

Some file manipulations


In [3]:
sgi_links = sgi_links.rename(columns={'POLTITICAL_UNIT':'POLITICAL_UNIT'})

In [4]:
# Select status = True in the manually checked files
lecl_true = lecl[lecl['status'] == True]
world_true = world_links[world_links['status'] == True]
alps_true = alps_links[alps_links['status'] == True]

In [5]:
# make WGMS_ID Integers
lecl_true['WGMS_ID'] = lecl_true['WGMS_ID'].astype(int)
world_true['WGMS_ID'] = world_true['WGMS_ID'].astype(int)
alps_true['WGMS_ID'] = alps_true['WGMS_ID'].astype(int)
sgi_links['WGMS_ID'] = sgi_links['WGMS_ID'].astype(int)
#ak_links['WGMS_ID'] = ak_links['WGMS_ID'].astype(int)


C:\Users\jlandman\Anaconda3\lib\site-packages\ipykernel\__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
C:\Users\jlandman\Anaconda3\lib\site-packages\ipykernel\__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
C:\Users\jlandman\Anaconda3\lib\site-packages\ipykernel\__main__.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [6]:
# set WGMS_ID as index
michi_ind = michi.set_index('WGMS_ID')
lecl_ind = lecl_true.set_index('WGMS_ID')
world_ind = world_true.set_index('WGMS_ID')
alps_ind = alps_true.set_index('WGMS_ID')
sgi_ind = sgi_links.set_index('WGMS_ID')
#ak_ind = ak_links.set_index('WGMS_ID')

In [7]:
lecl_ind.rename(columns={'RGI_ID': 'RGI_ID_LEC'}, inplace=True)
world_ind.rename(columns={'RGI_ID': 'RGI_ID_WORLD'}, inplace=True)
alps_ind.rename(columns={'RGI_ID': 'RGI_ID_ALPS'}, inplace=True)
sgi_ind.rename(columns={'RGI_ID': 'RGI_ID_SGI'}, inplace=True)
#ak_ind.rename(columns={'GLIMS_ID': 'GLIMS_ID_AK'}, inplace=True)

In [8]:
sgi_ind.head()


Out[8]:
Unnamed: 0 POLITICAL_UNIT NAME RIVER_BASIN FREE_POSITION LOCAL_CODE LOCAL_PSFG GEN_LOCATION SPEC_LOCATION LATITUDE ... FORM FRONTAL_CHARS EXPOS_ACC_AREA EXPOS_ABL_AREA PARENT_GLACIER REMARKS REGION SUBREGION GLIMS_ID RGI_ID_SGI
WGMS_ID
4585 0 CH GRUEEBU-N-II (PART OF B51/17N) NaN NaN NaN NaN NaN NaN 46.171600 ... NaN NaN NaN NaN 460.0 NaN Central Europe Alps G007989E46172N RGI50-11.02463
4586 1 CH GRUEEBU-S (PART OF B51/17N) NaN NaN NaN NaN NaN NaN 46.167099 ... NaN NaN NaN NaN 460.0 NaN Central Europe Alps G007996E46168N RGI50-11.02464
4587 2 CH GRUEEBU-N-I (PART OF B51/17N) NaN NaN NaN NaN NaN NaN 46.176102 ... NaN NaN NaN NaN 460.0 NaN Central Europe Alps G007986E46177N NaN
4588 3 CH FEE-S-I NaN NaN NaN NaN NaN NaN 46.065800 ... NaN NaN NaN NaN 392.0 NaN Central Europe Alps G007919E46068N RGI50-11.02635
4589 4 CH FEE-S-II NaN NaN NaN NaN NaN NaN 46.059601 ... NaN NaN NaN NaN 392.0 NaN Central Europe Alps G007892E46055N RGI50-11.02635

5 rows × 22 columns


In [9]:
len(sgi_ind)


Out[9]:
1284

Check Michi's list for RGI duplicates (some might result from automatic assignment, see e.g. Feegletscher where Mauro's small polygons fall within one bigger RGI polygon)


In [10]:
print(len(np.unique([i for i in michi.RGI_ID.values if isinstance(i, str)])))
print(len([i for i in michi.RGI_ID.values if isinstance(i, str)]))
print(len(set([i for i in michi.RGI_ID.values if isinstance(i,str)])))
dup_RGI = np.unique([x for x in michi.RGI_ID.values.tolist() if michi.RGI_ID.values.tolist().count(x) > 1])
list(dup_RGI).remove('nan')


898
966
898

In [11]:
michi[michi.RGI_ID.isin(dup_RGI)].sort_values(by='RGI_ID')


Out[11]:
POLITICAL_UNIT NAME WGMS_ID PSFG_ID WGI_ID GLIMS_ID RGI_ID REMARKS
3594 US SPIDER 3941 NaN NaN NaN RGI50-02.00974 NaN
2218 IS SKEIDARARJOEKULL E1 3116 IS0117A IS0000V0117A NaN RGI50-02.00974 NaN
2896 KZ KAVRAYSKIY 1078 NaN NaN NaN RGI50-03.01427 NaN
269 CA LAIKA 1412 CA0720 NaN NaN RGI50-03.01427 NaN
2081 GL FREYA 3350 NaN NaN NaN RGI50-05.20030 NaN
2299 IT CAGAMEI IV 2543 IT0535B IT4L01132008 NaN RGI50-05.20030 NaN
3121 NZ ST.MARY 3022 NaN NZ0711B39000 NaN RGI50-07.00382 NaN
3303 SJ DAUDBREEN 3915 NaN NaN NaN RGI50-07.00382 NaN
3410 SJ WALDEMARBREEN 2307 SJ15403 NaN NaN RGI50-07.00492 NaN
3364 SJ NATHORSTBREEN 3499 SJ13214 SJ4W00103214 NaN RGI50-07.00492 NaN
120 AT KALSER BAERENKOPF K. 2676 NaN NaN NaN RGI50-07.01100 NaN
3320 SJ FRIDTJOVBREEN 3494 SJ13708 SJ4W00103708 NaN RGI50-07.01100 NaN
2918 NO BLAAISEN 1328 NO7421 NO4A000DZ011 NaN RGI50-08.00710 NaN
3509 US HANGING 3389 NaN NaN NaN RGI50-08.00710 NaN
3543 US MC CARTY 3396 NaN NaN NaN RGI50-08.01432 NaN
2925 NO BONDHUSBREA 318 NO20408 NO4A000AJ002 NaN RGI50-08.01432 NaN
2914 NO AUSTRE MEMURUBREEN 1317 NO0053A NO4A000AB033 NaN RGI50-08.01655 NaN
3487 US DOUBLE 3383 NaN NaN NaN RGI50-08.01655 NaN
277 CA MUD 3416 NaN NaN NaN RGI50-08.01736 NaN
2942 NO HARBARDSBREEN 2320 NO30704 NO4A000A2004 NaN RGI50-08.01736 NaN
2994 NO TUNSBERGDALSBREEN 1316 NO3100 NO4A000A4007 NaN RGI50-08.02379 NaN
3324 SJ GRUMANTBREEN 3496 SJ14201 SJ4W00104201 NaN RGI50-08.02379 NaN
214 AT ZETTALUNITZ/MULLWITZ K. 578 AT0508 ATJ131I0S054 NaN RGI50-11.00190 NaN
148 AT MUTMAL F. 506 AT0227 ATJ143O0E108 NaN RGI50-11.00190 NaN
1239 CH PIZOL 417 CH0081 CHR0135000D1 NaN RGI50-11.00638 NaN
3266 SE PASSUSJIETNA W 345 SE0796 SEB000E0Z012 NaN RGI50-11.00638 NaN
117 AT JAMTAL F. 480 AT0106 ATJ143S0N019 NaN RGI50-11.00781 NaN
731 CH FUORCLA D'UREZZAS 5862 NaN NaN G010162E46849N RGI50-11.00781 NaN
1507 CH SURETTA 411 CH0087 CHR013103I 2 NaN RGI50-11.00797 NaN
151 AT OCHSENTALER G. 483 AT0103 ATR013I0L008 NaN RGI50-11.00797 NaN
... ... ... ... ... ... ... ... ...
3189 PK SIACHEN 3666 NaN NaN NaN RGI50-14.07524 NaN
1953 CO BOLIVAR 2783 NaN NaN NaN RGI50-14.07524 NaN
2150 IN SHAUNE GARANG 1048 IN0084 IN5Q220 1984 NaN RGI50-14.12323 NaN
1996 ES INFIERNO E 957 ES2020 ES4O001105 4 NaN RGI50-14.12323 NaN
2206 IS MULAJOEKULL S 3105 IS0311A IS0000V0311B NaN RGI50-14.19543 NaN
2144 IN NEH NAR 3930 NaN NaN NaN RGI50-14.19543 NaN
3251 RU ULLUMALIENDERKU 833 RU3024 RU4G00307004 NaN RGI50-15.03507 NaN
3005 NP AX010 906 NP0005 NP0 XXX00005 NaN RGI50-15.03507 NaN
1902 CN HAILUOGOU 849 CN0031 CN5K6120F003 NaN RGI50-15.07886 NaN
3236 RU MURKAR 776 RU3020 RU4G00307030 NaN RGI50-15.07886 NaN
2180 IS GLJUFURARJOEKULL 3080 IS0103 IS0000V00103 NaN RGI50-15.09026 NaN
1915 CN NAIMONA NYI 3991 NaN NaN NaN RGI50-15.09026 NaN
1918 CN PARLUNG NO. 94 3987 CN0094 CN5O2820A010 NaN RGI50-15.11693 NaN
2176 IS FLAAJOEKULL E 148 3076 IS1930C NaN NaN RGI50-15.11693 NaN
1921 CN PARLUNG ZANGBO: NO. 12 3986 CN0012 CN5O2820B000 NaN RGI50-15.11962 NaN
2175 IS FLAAJOEKULL E 146 3075 IS1930D NaN NaN RGI50-15.11962 NaN
217 BO ZONGO 1503 BO5150 BO00XXX00001 NaN RGI50-16.00543 NaN
2892 KZ DZHAMBUL 1099 NaN NaN NaN RGI50-16.00543 NaN
1986 EC ANTIZANA15ALPHA 1624 EC1DA15 EC1D30370015 NaN RGI50-16.01339 NaN
223 CA ASULKAN 1401 CA0185 NaN NaN RGI50-16.01339 NaN
215 BO CHACALTAYA 1505 BO5180 BO00XXX00002 NaN RGI50-16.01447 NaN
2893 KZ GERASIMOV 1100 NaN NaN NaN RGI50-16.01447 NaN
2856 KE LEWIS 695 KE0008 KEE021D00001 NaN RGI50-16.01638 NaN
3325 SJ HAABERGBREEN 3497 SJ14202 SJ4W00104202 NaN RGI50-16.01638 NaN
3142 PE ARTESONRAJU 3292 PE0003 PEP005C0GH01 NaN RGI50-16.02444 NaN
2257 IT BADILETTO 2491 IT0385 IT4L01104006 NaN RGI50-16.02444 NaN
1884 CL TORO 1 3980 NaN NaN NaN RGI50-17.15113 NaN
2167 IS EYJABAKKAJOEKULL 3069 IS2300 IS0000V02300 NaN RGI50-17.15113 NaN
2169 IS EYVINDSTUNGNAK 3070 IS1627 NaN NaN RGI50-17.15114 NaN
1885 CL TORO 2 3981 NaN NaN NaN RGI50-17.15114 NaN

131 rows × 8 columns


In [12]:
print(len([i for i in michi_ind.RGI_ID.values if isinstance(i, str)]))


966

Make all changes on a copy of the file


In [13]:
michi_update = michi_ind.copy()

Join RGI links of other files based on FoG_ID (Index)


In [14]:
michi_update = michi_update.join([lecl_ind.RGI_ID_LEC, world_ind.RGI_ID_WORLD, alps_ind.RGI_ID_ALPS, sgi_ind.RGI_ID_SGI], how='outer')

In [15]:
michi_update.update(sgi_ind, overwrite=False)

In [16]:
michi_update[pd.isnull(michi_update.POLITICAL_UNIT)]


Out[16]:
POLITICAL_UNIT NAME PSFG_ID WGI_ID GLIMS_ID RGI_ID REMARKS RGI_ID_LEC RGI_ID_WORLD RGI_ID_ALPS RGI_ID_SGI
WGMS_ID
3640 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

In [17]:
for i in range(len(michi_update.RGI_ID_LEC.values)):
    if isinstance(michi_update.RGI_ID_LEC.values[i], str):
        print(michi_update.RGI_ID_LEC.values[i])


RGI50-11.03437
RGI50-11.02923
RGI50-11.02835
RGI50-11.03067
RGI50-11.03503
RGI50-11.01876
RGI50-11.01450
RGI50-11.02979
RGI50-11.02249
RGI50-11.02771
RGI50-11.02728
RGI50-11.02595
RGI50-11.02641
RGI50-11.02490
RGI50-11.01698
RGI50-11.02848
RGI50-11.02607
RGI50-11.02715
RGI50-11.02245
RGI50-11.02144
RGI50-11.02119
RGI50-11.00848
RGI50-11.00638
RGI50-11.00872
RGI50-11.01120
RGI50-11.01193
RGI50-11.01621
RGI50-11.01346
RGI50-11.01270
RGI50-11.01222
RGI50-11.01144
RGI50-11.01328
RGI50-11.02756
RGI50-11.02815
RGI50-11.01987
RGI50-11.01478
RGI50-11.01238
RGI50-11.00781
RGI50-11.00897
RGI50-11.00957
RGI50-11.00343
RGI50-11.00376
RGI50-11.00325
RGI50-11.00787
RGI50-11.00836
RGI50-11.00871
RGI50-11.00929
RGI50-11.00887
RGI50-11.00886
RGI50-11.00958
RGI50-11.00992
RGI50-11.00687
RGI50-11.00746
RGI50-11.00002
RGI50-11.00003
RGI50-11.00181
RGI50-11.00291
RGI50-11.00124
RGI50-11.00106
RGI50-11.00067
RGI50-11.00278
RGI50-11.00164
RGI50-11.00190
RGI50-11.00135
RGI50-11.00110
RGI50-11.00068
RGI50-11.00415
RGI50-11.00459
RGI50-11.00469
RGI50-11.00577
RGI50-11.00548
RGI50-11.00518
RGI50-11.02973
RGI50-11.02351
RGI50-11.01863
RGI50-11.02437
RGI50-11.01974
RGI50-11.02967
RGI50-11.00541
RGI50-11.01946

See all cases where there is more than one link and compare if they are the same


In [18]:
michi_update[michi_update[['RGI_ID_LEC', 'RGI_ID_WORLD', 'RGI_ID_ALPS', 'RGI_ID_SGI']].isnull().sum(axis=1) <3][:20]


Out[18]:
POLITICAL_UNIT NAME PSFG_ID WGI_ID GLIMS_ID RGI_ID REMARKS RGI_ID_LEC RGI_ID_WORLD RGI_ID_ALPS RGI_ID_SGI
WGMS_ID
352 FR GEBROULAZ FR0009 FRN010406E06 NaN RGI50-11.03432 NaN RGI50-11.03437 NaN RGI50-11.03432 NaN
353 FR MER DE GLACE FR0003 FRN010306A01 NaN RGI50-11.02923 NaN RGI50-11.02923 RGI50-11.02923 RGI50-11.02923 NaN
354 FR ARGENTIERE FR0002 FRN010305A08 NaN RGI50-11.02835 NaN RGI50-11.02835 NaN RGI50-11.02835 NaN
356 FR SAINT SORLIN FR0015 FRN010602B09 NaN RGI50-11.03503 NaN RGI50-11.03503 NaN RGI50-11.03503 NaN
359 CH GRIES CH0003 CHN012405004 NaN RGI50-11.01876 NaN RGI50-11.01876 NaN RGI50-11.01876 NaN
389 CH FINDELEN CH0016 CHN012506003 NaN RGI50-11.02789 NaN NaN RGI50-11.02789 RGI50-11.02789 NaN
417 CH PIZOL CH0081 CHR0135000D1 NaN RGI50-11.00638 NaN RGI50-11.00638 RGI50-11.00638 RGI50-11.00638 NaN
463 CH BASODINO CH0104 CHL012104010 NaN RGI50-11.01987 NaN RGI50-11.01987 NaN RGI50-11.01987 NaN
473 CH RHONE CH0001 CHN012403003 NaN RGI50-11.01238 NaN RGI50-11.01238 NaN RGI50-11.01238 NaN
476 AT FILLECK K. AT0601B ATJ143S0A098 NaN RGI50-11.00073 NaN NaN RGI50-11.00073 RGI50-11.00073 NaN
480 AT JAMTAL F. AT0106 ATJ143S0N019 NaN RGI50-11.00781 NaN RGI50-11.00781 NaN RGI50-11.00781 NaN
482 AT VERMUNT G. AT0104 ATR013I0L007 NaN RGI50-11.00807 NaN NaN RGI50-11.00807 RGI50-11.00807 NaN
483 AT OCHSENTALER G. AT0103 ATR013I0L008 NaN RGI50-11.00797 NaN NaN RGI50-11.00797 RGI50-11.00797 NaN
491 AT HINTEREIS F. AT0209 ATJ143O0E125 NaN RGI50-11.00897 NaN RGI50-11.00897 NaN RGI50-11.00897 NaN
507 AT KESSELWAND F. AT0226 ATJ143O0E129 NaN RGI50-11.00787 NaN RGI50-11.00787 NaN RGI50-11.00787 NaN
510 AT LANGTALER F. AT0223 ATJ143O0E072 NaN RGI50-11.00929 NaN RGI50-11.00929 RGI50-11.00929 RGI50-11.00929 NaN
535 AT HALLSTAETTER G. AT1102 ATJ142T0R002 NaN RGI50-11.00002 NaN RGI50-11.00002 RGI50-11.00002 RGI50-11.00002 NaN
566 AT PASTERZE AT0704 ATJ131M0O027 NaN RGI50-11.00106 NaN RGI50-11.00106 NaN RGI50-11.00106 NaN
578 AT ZETTALUNITZ/MULLWITZ K. AT0508 ATJ131I0S054 NaN RGI50-11.00190 NaN RGI50-11.00190 RGI50-11.00190 RGI50-11.00190 NaN
661 IT LUNGA (VEDRETTA) / LANGENF. IT0733 IT4L00112128 NaN RGI50-11.01776 NaN NaN RGI50-11.01776 RGI50-11.01776 NaN

In [19]:
michi_update[~pd.isnull(michi_update.RGI_ID.values)][0:25]# & michi_update[['RGI_ID_LEC', 'RGI_ID_WORLD', 'RGI_ID_ALPS', 'RGI_ID_SGI']].isnull().sum(axis=1) <3]


Out[19]:
POLITICAL_UNIT NAME PSFG_ID WGI_ID GLIMS_ID RGI_ID REMARKS RGI_ID_LEC RGI_ID_WORLD RGI_ID_ALPS RGI_ID_SGI
WGMS_ID
24 CA TIEDEMANN CA2040 CA0 XXX00017 NaN RGI50-02.05862 NaN NaN RGI50-02.05862 NaN NaN
32 CA ALEXANDER CA0133 CA0N00200001 NaN RGI50-01.05355 NaN NaN RGI50-01.05355 NaN NaN
47 CA BRIDGE CA0275 CA0M20000001 NaN RGI50-02.04119 NaN NaN RGI50-02.04119 NaN NaN
53 CA WARD H. I. RISE CA2318 CA0 XXX00003 NaN RGI50-03.04079 NaN NaN RGI50-03.04079 NaN NaN
66 CA BENCH CA0234 CA0 XXX00013 NaN RGI50-02.05986 NaN NaN RGI50-02.05986 NaN NaN
138 US GILMAN US1321 USN000C00063 NaN RGI50-01.20830 NaN NaN RGI50-01.20830 NaN NaN
201 US NISQUALLY US2027 USM000B00129 NaN RGI50-02.00644 NaN NaN RGI50-02.00644 NaN NaN
203 US EMMONS US2022 USM000B00127 NaN RGI50-02.00651 NaN NaN RGI50-02.00651 NaN NaN
232 GL AMITSULOQ ICE CAP GL0004 GL2U1DG16166 NaN RGI50-05.00446 NaN NaN RGI50-05.00446 NaN NaN
238 GL QAPIARFIUP SER. GL0009 GL2U1DB10003 NaN RGI50-05.00115 NaN NaN RGI50-05.00115 NaN NaN
240 GL VALHALTINDE GL0001 GL2U1AG05008 NaN RGI50-05.05149 NaN NaN NaN NaN NaN
286 NO HOEGTUVBREEN NO5507 NO4A000CX007 NaN RGI50-08.02616 NaN NaN NaN NaN NaN
318 NO BONDHUSBREA NO20408 NO4A000AJ002 NaN RGI50-08.01432 NaN NaN RGI50-08.01432 NaN NaN
327 SE PARTEJEKNA SE0763 SEB000E08002 NaN RGI50-08.00928 NaN NaN RGI50-08.00928 NaN NaN
345 SE PASSUSJIETNA W SE0796 SEB000E0Z012 NaN RGI50-11.00638 NaN NaN NaN NaN NaN
346 DE SCHNEEFERNER N DE0001 DE4J14400001 NaN RGI50-11.03974 NaN NaN NaN RGI50-11.03974 NaN
352 FR GEBROULAZ FR0009 FRN010406E06 NaN RGI50-11.03432 NaN RGI50-11.03437 NaN RGI50-11.03432 NaN
353 FR MER DE GLACE FR0003 FRN010306A01 NaN RGI50-11.02923 NaN RGI50-11.02923 RGI50-11.02923 RGI50-11.02923 NaN
354 FR ARGENTIERE FR0002 FRN010305A08 NaN RGI50-11.02835 NaN RGI50-11.02835 NaN RGI50-11.02835 NaN
356 FR SAINT SORLIN FR0015 FRN010602B09 NaN RGI50-11.03503 NaN RGI50-11.03503 NaN RGI50-11.03503 NaN
357 FR SARENNES FR0029 FRN010603A02 NaN RGI50-11.03515 NaN NaN NaN RGI50-11.03515 NaN
359 CH GRIES CH0003 CHN012405004 NaN RGI50-11.01876 NaN RGI50-11.01876 NaN RGI50-11.01876 NaN
389 CH FINDELEN CH0016 CHN012506003 NaN RGI50-11.02789 NaN NaN RGI50-11.02789 RGI50-11.02789 NaN
408 CH SILVRETTA CH0090 CHR013100G 5 NaN RGI50-11.00804 NaN NaN NaN RGI50-11.00804 NaN
410 CH PORCHABELLA CH0088 CHR013102E 4 NaN RGI50-11.00807 NaN NaN NaN NaN NaN

In [20]:
michi_update.columns.values


Out[20]:
array(['POLITICAL_UNIT', 'NAME', 'PSFG_ID', 'WGI_ID', 'GLIMS_ID', 'RGI_ID',
       'REMARKS', 'RGI_ID_LEC', 'RGI_ID_WORLD', 'RGI_ID_ALPS', 'RGI_ID_SGI'], dtype=object)

In [21]:
michi_update[['RGI_ID', 'RGI_ID_LEC', 'RGI_ID_WORLD', 'RGI_ID_ALPS', 'RGI_ID_SGI']] = michi_update[['RGI_ID', 'RGI_ID_LEC', 'RGI_ID_WORLD', 'RGI_ID_ALPS', 'RGI_ID_SGI']].astype(str)

In [22]:
for i in range(len(michi_update.RGI_ID_LEC.values)):
    if isinstance(michi_update.RGI_ID_LEC.values[i], float):
        print(michi_update.RGI_ID_LEC.values[i])

Check where Michi's RGI_ID does not fit my findings


In [23]:
michi_update.loc[(michi_update.RGI_ID != michi_update.RGI_ID_LEC) 
                 & (michi_update.RGI_ID_LEC != 'nan') 
                 & (michi_update.RGI_ID != 'nan')]


Out[23]:
POLITICAL_UNIT NAME PSFG_ID WGI_ID GLIMS_ID RGI_ID REMARKS RGI_ID_LEC RGI_ID_WORLD RGI_ID_ALPS RGI_ID_SGI
WGMS_ID
352 FR GEBROULAZ FR0009 FRN010406E06 NaN RGI50-11.03432 NaN RGI50-11.03437 nan RGI50-11.03432 nan
589 AT HORN K. (ZILLER) AT0402 ATJ143Z0I075 NaN RGI50-11.01776 NaN RGI50-11.00459 nan nan nan

In [24]:
michi_update.loc[(michi_update.RGI_ID != michi_update.RGI_ID_WORLD) 
                 & (michi_update.RGI_ID_WORLD != 'nan') 
                 & (michi_update.RGI_ID != 'nan')]


Out[24]:
POLITICAL_UNIT NAME PSFG_ID WGI_ID GLIMS_ID RGI_ID REMARKS RGI_ID_LEC RGI_ID_WORLD RGI_ID_ALPS RGI_ID_SGI
WGMS_ID
732 KG ABRAMOV KG4101 KGXA31000040 NaN RGI50-13.54430 NaN nan RGI50-13.18096 nan nan

In [25]:
michi_update.loc[(michi_update.RGI_ID != michi_update.RGI_ID_ALPS) 
                 & (michi_update.RGI_ID_ALPS != 'nan') 
                 & (michi_update.RGI_ID != 'nan')]


Out[25]:
POLITICAL_UNIT NAME PSFG_ID WGI_ID GLIMS_ID RGI_ID REMARKS RGI_ID_LEC RGI_ID_WORLD RGI_ID_ALPS RGI_ID_SGI
WGMS_ID

In [26]:
michi_update.loc[(michi_update.RGI_ID != michi_update.RGI_ID_SGI) 
                 & (michi_update.RGI_ID_SGI != 'nan') 
                 & (michi_update.RGI_ID != 'nan')]


Out[26]:
POLITICAL_UNIT NAME PSFG_ID WGI_ID GLIMS_ID RGI_ID REMARKS RGI_ID_LEC RGI_ID_WORLD RGI_ID_ALPS RGI_ID_SGI
WGMS_ID
5868 CH FENGA VADRET DA-N-II (PART OF E73/04) NaN NaN G010233E46896N RGI50-11.0072 NaN nan nan nan RGI50-11.00723

Check if everything went right


In [27]:
# This is Chorabari glacier -> no RGI equivalent, would be confusing
michi_update = michi_update.drop(3640)

In [28]:
michi_update[pd.isnull(michi_update['NAME'])]


Out[28]:
POLITICAL_UNIT NAME PSFG_ID WGI_ID GLIMS_ID RGI_ID REMARKS RGI_ID_LEC RGI_ID_WORLD RGI_ID_ALPS RGI_ID_SGI
WGMS_ID

In [29]:
michi_update[pd.isnull(michi_update['POLITICAL_UNIT'])]


Out[29]:
POLITICAL_UNIT NAME PSFG_ID WGI_ID GLIMS_ID RGI_ID REMARKS RGI_ID_LEC RGI_ID_WORLD RGI_ID_ALPS RGI_ID_SGI
WGMS_ID

Compile new data set by merging the columns together and put remarks:


In [30]:
# Replace back 'nan' to np.nan
michi_update = michi_update.replace('nan', np.nan)

In [31]:
np.unique(michi_update.REMARKS.values)


Out[31]:
array([ nan,  nan,  nan, ...,  nan,  nan,  nan])

In [32]:
michi_update.REMARKS = np.nan

In [33]:
# This is necessary to make the remarks extendable in the next step (convert np.nan to empty string)
michi_update.REMARKS = michi_update.REMARKS.fillna('')

In [34]:
# ATTENTION, ORDER! 
# we FIRST fill the df with the manual links and then only the np.nan values with the automatic links so that 
# manual links (potentially "more correct") are not overwritten by potentially false geometric links
michi_update['RGI_ID_new'] = np.nan
michi_update['RGI_ID_new'] = np.where(~pd.isnull(michi_update.RGI_ID_LEC), michi_update.RGI_ID_LEC, np.nan)
michi_update['RGI_ID_new'] = np.where(pd.isnull(michi_update.RGI_ID_new), michi_update.RGI_ID_WORLD, michi_update.RGI_ID_new)
michi_update['RGI_ID_new'] = np.where(pd.isnull(michi_update.RGI_ID_new), michi_update.RGI_ID_ALPS, michi_update.RGI_ID_new)
# now it's time to fill the remarks column with "RGI linked checked manually", where there is an entry in RGI_ID_new
michi_update.loc[~pd.isnull(michi_update['RGI_ID_new']), 'REMARKS'] = michi_update['REMARKS'] + 'RGI link checked manually'
print(len(michi_update[michi_update.REMARKS == 'RGI link checked manually']))
michi_update['RGI_ID_new'] = np.where(pd.isnull(michi_update.RGI_ID_new), michi_update.RGI_ID_SGI, michi_update.RGI_ID_new)
michi_update.loc[(~pd.isnull(michi_update['RGI_ID_new'])) & (michi_update['REMARKS'].str.len() == 0.), 'REMARKS'] = michi_update['REMARKS'] + 'RGI link automated'
print(len(michi_update[michi_update.REMARKS == 'RGI link checked manually']))


159
159

In [35]:
michi_update.columns.values


Out[35]:
array(['POLITICAL_UNIT', 'NAME', 'PSFG_ID', 'WGI_ID', 'GLIMS_ID', 'RGI_ID',
       'REMARKS', 'RGI_ID_LEC', 'RGI_ID_WORLD', 'RGI_ID_ALPS',
       'RGI_ID_SGI', 'RGI_ID_new'], dtype=object)

In [36]:
michi_update = michi_update[['POLITICAL_UNIT', 'NAME', 'PSFG_ID', 'WGI_ID', 'GLIMS_ID', 'RGI_ID_new', 'REMARKS']]
michi_update


Out[36]:
POLITICAL_UNIT NAME PSFG_ID WGI_ID GLIMS_ID RGI_ID_new REMARKS
WGMS_ID
0 CA WHITE CA2340 CAR014E00015 NaN NaN
1 CA BABY CA0205 CAR014E00009 NaN NaN
2 CA SOUTH ICE CAP CA1961 CA002R700001 NaN NaN
3 CA NADAHINI CA1402 CA0N00300001 NaN NaN
4 CA BERENDON CA0240 CA0N00B00001 NaN NaN
5 CA NEW MOON CA1430 CA0N00100001 NaN NaN
6 CA UTEM CA2190 CA0M22200001 NaN NaN
7 CA ATHABASCA CA0190 CA0A12100001 NaN NaN
8 CA SASKATCHEWAN CA1905 CA0B22200001 NaN NaN
9 CA DEER LAKE CA0420 CAM000C00007 NaN NaN
10 CA BUGABOO CA0290 CA0M12100002 NaN NaN
11 CA BERM CA0245 CAM000C00001 NaN NaN
12 CA HAVOC CA0840 CAM000C00006 NaN NaN
13 CA SURF CA1986 CAM000C00011 NaN NaN
14 CA WAVE CA2330 CAM000C00005 NaN NaN
15 CA TERRIFIC CA2025 CAM000C00009 NaN NaN
16 CA MEIGHEN ICE CAP CA1335 CA002R900001 NaN NaN
17 CA CLENDENNING CA0335 CAN000C00003 NaN NaN
18 CA STAIRCASE CA1973 CA0M20000006 NaN NaN
19 CA SPHINX CA1965 CA0M20000003 NaN NaN
20 CA FLEUR D. NEIGES CA0675 CAM000C00003 NaN NaN
21 CA GRIFFIN CA0784 CA0M20000004 NaN NaN
22 CA THUNDERCLAP CA2035 CA0M20000005 NaN NaN
23 CA KOKANEE CA1190 CA0M13100001 NaN NaN
24 CA TIEDEMANN CA2040 CA0 XXX00017 NaN RGI50-02.05862 RGI link checked manually
25 CA ATAVIST CA0187 CA0 XXX00012 NaN NaN
26 CA APE CA0170 CAM000D00001 NaN NaN
27 CA FYLES CA0698 CAM000C00008 NaN NaN
28 CA NOEICK CA1465 CA0 XXX00015 NaN NaN
29 CA PURGATORY CA1690 CA0 XXX00016 NaN NaN
... ... ... ... ... ... ... ...
5840 CH VILUOCH-W NaN NaN NaN RGI50-11.01274 RGI link automated
5841 CH SCALETTAPASS-E NaN NaN NaN RGI50-11.01182 RGI link automated
5842 CH VALLORGIA VADRET NaN NaN NaN RGI50-11.01181 RGI link automated
5843 CH PUNTOTA VADRET DA-II NaN NaN NaN RGI50-11.01200 RGI link automated
5844 CH PUNTOTA-VADRET DA-III NaN NaN G009977E46689N NaN
5845 CH SARSURA VADRET DA-E (T OF E44/04) NaN NaN NaN RGI50-11.01180 RGI link automated
5846 CH SARSURA VADRET DA (PART OF E44/04) NaN NaN NaN RGI50-11.01165 RGI link automated
5847 CH GLETSCHTAELI NaN NaN NaN NaN
5848 CH GRIALETSCH VADRET DA NaN NaN NaN RGI50-11.01173 RGI link automated
5849 CH CCHILBIRITZEN NaN NaN G009960E46700N RGI50-11.01169 RGI link automated
5850 CH RADOENT VADRET DA NaN NaN NaN NaN
5851 CH SCHWARZHORN GR NaN NaN G009942E46739N NaN
5852 CH CHAMPATSCH-N NaN NaN NaN RGI50-11.01079 RGI link automated
5853 CH JOERIFLESS-S NaN NaN G009984E46766N NaN
5854 CH SAGLIAINS VADRET NaN NaN G010062E46811N RGI50-11.00936 RGI link automated
5855 CH MUNTANELLAS NaN NaN G010071E46804N RGI50-11.00956 RGI link automated
5856 CH MAISAS VADRET DA LAS NaN NaN NaN RGI50-11.00880 RGI link automated
5857 CH ANSCHATSCHA NaN NaN G010120E46812N NaN
5858 CH PLAN RAI NaN NaN NaN RGI50-11.00868 RGI link automated
5859 CH VERMUNT VADRET-E (PART OF E51/04) NaN NaN G010136E46846N RGI50-11.00842 RGI link automated
5860 CH TUOI VADRET NaN NaN G010149E46849N RGI50-11.00829 RGI link automated
5861 CH VERMUNT VADRET-W (PART OF E51/04) NaN NaN G010125E46847N RGI50-11.00838 RGI link automated
5862 CH FUORCLA D'UREZZAS NaN NaN G010162E46849N RGI50-11.00781 RGI link automated
5863 CH UREZZAS VADRET D' NaN NaN G010181E46853N RGI50-11.00809 RGI link automated
5864 CH CHALAUS VADRET DA NaN NaN G010189E46860N RGI50-11.00793 RGI link automated
5865 CH FUTSCHOEL VADRET NaN NaN G010207E46864N RGI50-11.00790 RGI link automated
5866 CH TASNA VADRET DA-E NaN NaN G010256E46863N RGI50-11.00785 RGI link automated
5867 CH FENGA VADRET DA-S-II (PART OF E73/07N) NaN NaN G010239E46874N RGI50-11.00765 RGI link automated
5868 CH FENGA VADRET DA-N-II (PART OF E73/04) NaN NaN G010233E46896N RGI50-11.00723 RGI link automated
5869 CH TURTMANN NaN CHN012600000 NaN NaN

3692 rows × 7 columns


In [37]:
print(len(np.unique([i for i in michi_update.RGI_ID_new.values if isinstance(i, str)])))
print(len([i for i in michi_update.RGI_ID_new.values if isinstance(i, str)]))
print(len(set([i for i in michi_update.RGI_ID_new.values if isinstance(i,str)])))
dup_RGI_update = np.unique([x for x in michi_update.RGI_ID_new.values.tolist() if michi_update.RGI_ID_new.values.tolist().count(x) > 1])
list(dup_RGI_update).remove('nan')
michi_update.loc[michi_update.RGI_ID_new.isin(dup_RGI_update), ['POLITICAL_UNIT','NAME','RGI_ID_new'] ].sort_values(by='RGI_ID_new')#.to_csv('c:\\users\\jlandman\\Desktop\\Wrong_links_FoG_RGI_fuer_Fabi.csv')


941
973
941
Out[37]:
POLITICAL_UNIT NAME RGI_ID_new
WGMS_ID
5862 CH FUORCLA D'UREZZAS RGI50-11.00781
480 AT JAMTAL F. RGI50-11.00781
4964 CH CHLI SPANNORT-III RGI50-11.01006
4963 CH CHLI SPANNORT-I RGI50-11.01006
4954 CH CHLI SPANNORT-II RGI50-11.01006
4966 CH UNTER ROTEGG RGI50-11.01019
4959 CH TITLIS RGI50-11.01019
4900 CH STOESSENFIRN-II RGI50-11.01085
4899 CH STOESSENFIRN-I RGI50-11.01085
4975 CH STEIN (NO. 53) RGI50-11.01144
448 CH STEIN RGI50-11.01144
5116 CH OCHS-N RGI50-11.01346
443 CH UNTERER GRINDELWALD RGI50-11.01346
4610 CH FIESCHER BE (PART OF A54L/19) RGI50-11.01346
4609 CH OBERS ISCHMEER (PART OF A54L/19) RGI50-11.01346
5326 CH NAMENLOS VS RGI50-11.01450
360 CH GROSSER ALETSCH RGI50-11.01450
5016 CH VORD. TIERBERG-NW RGI50-11.01485
5038 CH HINT. TIERBERG-NE RGI50-11.01485
5147 CH HORN-N RGI50-11.01551
5146 CH LOUWIHORN RGI50-11.01551
5244 CH INNER TAL-SW (PART OF B30/20N) RGI50-11.01791
5243 CH UESSER TAL (TOTAL) RGI50-11.01791
5242 CH TELLIN-NE RGI50-11.01791
5329 CH NESTHORN-N RGI50-11.01827
5295 CH BREITHORN-SE RGI50-11.01827
3346 IT CARESER OCCIDENTALE RGI50-11.01834
3345 IT CARESER ORIENTALE RGI50-11.01834
635 IT CARESER RGI50-11.01834
5330 CH BEICHGRAT-SE-III RGI50-11.01905
5312 CH BEICHGRAT-SE-II (PART OF B36/01) RGI50-11.01905
5811 CH CORVATSCH-S (PART OF E23/18) RGI50-11.01962
5810 CH CORVATSCH-N (PART OF E23/18) RGI50-11.01962
4634 CH HOMATTU-I RGI50-11.02389
4633 CH HOMATTU-II RGI50-11.02389
5537 CH DIABLONS -N RGI50-11.02504
5534 CH DIABLONS -S RGI50-11.02504
5542 CH WEISSHORN GLACIER DU-N-II RGI50-11.02577
5538 CH WEISSHORN GLACIER DU-N-I RGI50-11.02577
380 CH MOIRY RGI50-11.02595
5553 CH BOUQUETINS RGI50-11.02595
4590 CH FEE-N-I (ALPHUBEL) (PART OF B53/16N) RGI50-11.02607
4592 CH FEE-N-I (DOM) (PART OF B53/16N) RGI50-11.02607
4591 CH FEE-N-I (TAESCHHORN) (PART OF B53/16N) RGI50-11.02607
392 CH FEE NORTH RGI50-11.02607
4588 CH FEE-S-I RGI50-11.02635
4589 CH FEE-S-II RGI50-11.02635
5469 CH MELLICH RGI50-11.02724
5478 CH MELLICH-S (PART OF B55/35N) RGI50-11.02724
5479 CH LAENGFLUE-N (PART OF B55/35N) RGI50-11.02758
5470 CH LAENGFLUE-E (PART OF B55/38N) RGI50-11.02758
5489 CH OBERER THEODUL RGI50-11.02845
5490 CH FURGG- RGI50-11.02845
5487 CH BREITHORN (PART OF B56/07) RGI50-11.02848
391 CH GORNER RGI50-11.02848
5488 CH UNTERER THEODUL (PART OF B56/07) RGI50-11.02848

In [38]:
# According to Horst, they shouldn't be deleted

# this indice should be deleted
delete = [
    5862,    # shares a polygon with Jamtalferner in RGI (ice connected), but would be major error
    4966,    # Titlis and Unter Rotegg are separated in FoG
    4959,
    
]
# OBERS ISCHMEER/FIESCHER/OCHS-N?

Corrections for 'problem glaciers'


In [39]:
# Gebroulaz: two RGI Polygons, but only one is captured

#Fenga: RGI50-11.00723 OK

#Horn K. (Ziller): RGI50-11.00459 OK

Output


In [40]:
michi_update = michi_update.rename({'RGI_ID_new':'RGI_ID'})
michi_update.to_csv('c:\\users\\jlandman\\Desktop\\WGMS-FoG-2016-08-AA-GLACIER-ID-LUT_updated.csv', encoding='latin-1')

Cross-check with links from RGI web site


In [41]:
website = pd.read_csv('C:\\Users\\jlandman\\Desktop\\00_rgi50_links\\00_rgi50_links.csv', skiprows=2, encoding='latin-1')

In [42]:
website = website.set_index('FoGId')

In [43]:
all_ind = np.hstack((website.index.values, michi_update.index.values))

In [44]:
all_ind.shape


Out[44]:
(3860,)

In [45]:
dupl_ind = np.unique([i for i in all_ind if all_ind.tolist().count(i) > 1])

In [46]:
len(dupl_ind)


Out[46]:
149

In [47]:
for i in dupl_ind:
    if not (pd.isnull(website.loc[i, 'GLIMSId']) or pd.isnull(michi_update.loc[i, 'GLIMS_ID'])):
        if website.loc[i, 'GLIMSId'] != michi_update.loc[i, 'GLIMS_ID']:
            print(i, website.loc[i, 'Name'],website.loc[i, 'GLIMSId'], michi_update.loc[i, 'GLIMS_ID'])
    if not (pd.isnull(website.loc[i, 'RGIId']) or pd.isnull(michi_update.loc[i, 'RGI_ID_new'])):
        if (website.loc[i, 'RGIId'] != michi_update.loc[i, 'RGI_ID_new']):
            print(i, website.loc[i, 'Name'],website.loc[i, 'RGIId'], michi_update.loc[i, 'RGI_ID_new'])


352 Gebroulaz                                        RGI50-11.03432 RGI50-11.03437

In [ ]: