In [1]:
import pandas as pd
import numpy as np
import pickle
import os
root_directory = 'D:/github/w_vattenstatus/ekostat_calculator'#"../" #os.getcwd()
workspace_directory = root_directory + '/workspaces' 
resource_directory = root_directory + '/resources'
#alias = 'lena'
user_id = 'test_user' #kanske ska vara off_line user?
workspace_alias = 'lena_indicator' # kustzonsmodellen_3daydata
# workspace_alias = 'kustzonsmodellen_3daydata'

# ## Initiate EventHandler
print(root_directory)
paths = {'user_id': user_id, 
         'workspace_directory': root_directory + '/workspaces', 
         'resource_directory': root_directory + '/resources', 
         'log_directory': 'D:/github' + '/log', 
         'test_data_directory': 'D:/github' + '/test_data',
         'cache_directory': 'D:/github/w_vattenstatus/cache'}


D:/github/w_vattenstatus/ekostat_calculator

In [17]:
sharkweb_matchfile = pd.read_csv(open(paths['resource_directory']+'/mappings/water_body_match_sharweb_SVAR_2012_2.txt'), sep = '\t')

In [3]:
current_matchfile = pd.read_csv(open(paths['resource_directory']+'/mappings/water_body_match.txt'), sep = '\t')
current_matchfile_new = pd.read_csv(open(paths['resource_directory']+'/mappings/water_body_match_new.txt'), sep = '\t')

In [35]:
ix_12s = sharkweb_matchfile.loc[sharkweb_matchfile['TYPE_AREA_CODE'] == '12s'].index
ix_12n = sharkweb_matchfile.loc[sharkweb_matchfile['TYPE_AREA_CODE'] == '12n'].index
ix_1s = sharkweb_matchfile.loc[sharkweb_matchfile['TYPE_AREA_CODE'] == '1s'].index
ix_1n = sharkweb_matchfile.loc[sharkweb_matchfile['TYPE_AREA_CODE'] == '1n'].index

sharkweb_matchfile['TYPE_AREA_SUFFIX'] = ''
sharkweb_matchfile.loc[ix_1n, 'TYPE_AREA_SUFFIX'] = 'n'
sharkweb_matchfile.loc[ix_12n, 'TYPE_AREA_SUFFIX'] = 'n'
sharkweb_matchfile.loc[ix_1s, 'TYPE_AREA_SUFFIX'] = 's'
sharkweb_matchfile.loc[ix_12s, 'TYPE_AREA_SUFFIX'] = 's'

In [38]:
sharkweb_matchfile['TYPE_AREA_NAME'] = ''
for type_code in sharkweb_matchfile.TYPE_AREA_CODE.unique():
    ix = sharkweb_matchfile.loc[sharkweb_matchfile['TYPE_AREA_CODE'] == type_code].index
    type_code_new = current_matchfile.loc[current_matchfile.TYPE_AREA_CODE == type_code].TYPE_AREA_CODE.unique()
    type_suffix_new = current_matchfile.loc[current_matchfile.TYPE_AREA_CODE == type_code].TYPE_AREA_SUFFIX.unique()
    type_name_new = current_matchfile.loc[current_matchfile.TYPE_AREA_CODE == type_code].TYPE_AREA_NAME.unique()
    type_suffix = sharkweb_matchfile.loc[sharkweb_matchfile.TYPE_AREA_CODE == type_code].TYPE_AREA_SUFFIX.unique()
    print(type_code, type_suffix_new, type_code_new, type_suffix, type_name_new)
    try:
        sharkweb_matchfile.loc[ix, 'TYPE_AREA_NAME'] = type_name_new[0]
    except IndexError as e:
        print(type_code, e)


23 [nan] ['23'] [''] ['Bottenviken, yttre kustvatten']
22 [nan] ['22'] [''] ['Bottenviken, inre kustvatten']
12n [nan] ['12n'] ['n'] ['Östergötlands samt Stockholms skärgård, mellankustvatten, norra']
17 [nan] ['17'] [''] ['Södra Bottenhavet, yttre kustvatten']
16 [nan] ['16'] [''] ['Södra Bottenhavet, inre kustvatten']
15 [nan] ['15'] [''] ['Stockholms skärgård, yttre kustvatten']
24 [nan] ['24'] [''] ['Stockholms inre skärgård och Hallsfjärden']
12s ['n' nan] ['12s'] ['s'] ['Östergötlands samt Stockholms skärgård, mellankustvatten, södra']
3 [nan] ['3'] [''] ['Västkustens yttre kustvatten, Skagerrak']
14 [nan] ['14'] [''] ['Östergötlands yttre kustvatten']
1n ['n'] ['1n'] ['n'] ['Västkustens inre kustvatten, norra']
10 [nan] ['10'] [''] ['Östra Ölands, sydöstra Gotlands kustvatten samt Gotska sandön']
2 [nan] ['2'] [''] ['Västkustens fjordar']
4 [nan] ['4'] [''] ['Västkustens yttre kustvatten, Kattegatt']
1s ['s'] ['1s'] ['s'] ['Västkustens inre kustvatten, södra']
8 [nan] ['8'] [''] ['Blekinge skärgård och Kalmarsund, inre kustvatten']
0 [] [] [''] []
0 index 0 is out of bounds for axis 0 with size 0
18 [nan] ['18'] [''] ['Norra Bottenhavet, Höga kusten, inre kustvatten']
19 [nan] ['19'] [''] ['Norra Bottenhavet, Höga kusten, yttre kustvatten']
13 [nan] ['13'] [''] ['Östergötlands inre skärgård']
5 [nan] ['5'] [''] ['Södra Halland och norra Öresunds kustvatten'
 'Södra Hallands och norra Öresunds kustvatten']
6 [nan] ['6'] [''] ['Öresunds kustvatten']
7 [nan] ['7'] [''] ['Skånes kustvatten']
11 [nan] ['11'] [''] ['Gotlands västra och norra kustvatten']
9 [nan] ['9'] [''] ['Blekinge skärgård och Kalmarsund, yttre kustvatten']
25 [nan] ['25'] [''] ['Göta Älvs- och Nordre Älvs estuarie']
20 [nan] ['20'] [''] ['Norra Kvarkens inre kustvatten']
21 [nan] ['21'] [''] ['Norra Kvarkens yttre kustvatten']

In [39]:
sharkweb_matchfile.to_csv(paths['resource_directory']+'/mappings/water_body_match_sharkweb_SVAR_2012_2_extended_new.txt', sep = '\t')

In [4]:
missing_wb = pd.read_csv(open('D:\github\w_vattenstatus\dokumentation\HID_not_matching_VISS_EU_CD_SVAR_2012_2.txt'), sep = '\t')   
missing_wb.head()


Out[4]:
used_as HID
0 VISS_EU_CD SE647050-213980
1 VISS_EU_CD SE634210-202020
2 VISS_EU_CD SE641840-211540
3 VISS_EU_CD SE641720-211520
4 VISS_EU_CD SE634110-201920

In [29]:
ix_list = []
for HID in missing_wb.HID:
    ix = sharkweb_matchfile.loc[sharkweb_matchfile['HID'] == HID.strip('SE')].index
    print('HID: {} in SVAR2012 file used as VISS_EU_CD in sharkweb'.format(HID.strip('SE')))
    ix_list.append(sharkweb_matchfile.loc[sharkweb_matchfile['HID'] == HID.strip('SE')].index[0])
    print(sharkweb_matchfile.loc[sharkweb_matchfile['HID'] == HID.strip('SE')][['WATER_BODY_NAME','HID','VISS_EU_CD']])
    #print(sharkweb_matchfile.loc[sharkweb_matchfile['HID'] == HID.strip('SE')][['WATER_BODY_NAME']].values[0])
    #print(sharkweb_matchfile.loc[sharkweb_matchfile['HID'] == HID.strip('SE')][['HID']].values[0])
    #print(sharkweb_matchfile.loc[sharkweb_matchfile['HID'] == HID.strip('SE')][['VISS_EU_CD']].values[0])


HID: 647050-213980 in SVAR2012 file used as VISS_EU_CD in sharkweb
                 WATER_BODY_NAME            HID       VISS_EU_CD
589  S m Bottenvikens kustvatten  647050-213980  SE648760-213140
HID: 634210-202020 in SVAR2012 file used as VISS_EU_CD in sharkweb
    WATER_BODY_NAME            HID       VISS_EU_CD
463        Holmsund  634210-202020  SE634200-202033
HID: 641840-211540 in SVAR2012 file used as VISS_EU_CD in sharkweb
         WATER_BODY_NAME            HID       VISS_EU_CD
459  Inre Lövselefjärden  641840-211540  SE641745-211570
HID: 641720-211520 in SVAR2012 file used as VISS_EU_CD in sharkweb
          WATER_BODY_NAME            HID       VISS_EU_CD
458  Yttre Lövselefjärden  641720-211520  SE641745-211570
HID: 634110-201920 in SVAR2012 file used as VISS_EU_CD in sharkweb
        WATER_BODY_NAME            HID       VISS_EU_CD
461  Yttre Österfjärden  634110-201920  SE634200-202033
HID: 673283-158060 in SVAR2012 file used as VISS_EU_CD in sharkweb
    WATER_BODY_NAME            HID       VISS_EU_CD
567   Yttre Fjärden  673283-158060  SE604200-171765
HID: 634350-202000 in SVAR2012 file used as VISS_EU_CD in sharkweb
       WATER_BODY_NAME            HID       VISS_EU_CD
462  Inre Österfjärden  634350-202000  SE634200-202033
HID: 590020-114520 in SVAR2012 file used as VISS_EU_CD in sharkweb
     WATER_BODY_NAME            HID         VISS_EU_CD
659  Inre Idefjorden  590020-114520  SENO590020-114520
HID: 590860-113810 in SVAR2012 file used as VISS_EU_CD in sharkweb
    WATER_BODY_NAME            HID         VISS_EU_CD
660      Idefjorden  590860-113810  SENO590860-113810
HID: 590900-112300 in SVAR2012 file used as VISS_EU_CD in sharkweb
        WATER_BODY_NAME            HID         VISS_EU_CD
661  Inre Singlefjorden  590900-112300  SENO590900-112300
HID: 590670-111380 in SVAR2012 file used as VISS_EU_CD in sharkweb
    WATER_BODY_NAME            HID         VISS_EU_CD
663   Singlefjorden  590670-111380  SENO590670-111380
HID: 585660-112590 in SVAR2012 file used as VISS_EU_CD in sharkweb
                        WATER_BODY_NAME            HID       VISS_EU_CD
466  N n Bohusläns skärgårds kustvatten  585660-112590  SE585750-105940

In [40]:
#to_save = sharkweb_matchfile.loc[sharkweb_matchfile.index[ix_list]].copy()
                       
sharkweb_matchfile.loc[sharkweb_matchfile.index[ix_list]].to_csv('D:/github/w_vattenstatus/dokumentation/VISS_EU_CD_missmatch_with_HID.txt', sep = '\t')

In [61]:
sharkweb_matchfile.to_csv(paths['resource_directory']+'/mappings/water_body_match_sharkweb_SVAR_2012_2_extended_changed_HID_to_EU_CD.txt', sep = '\t')

In [13]:
current_matchfile.head()


Out[13]:
OBJECTID * Shape * EU_CD MS_CD VISS_EU_CD VISS_MS_CD NAMN TYP_NFS06 CATEGORY WB ... DATUM ObjVer PopNamn GVatten UTL_EU_CD UTL_MS_CD Shape_Length Shape_Area TYPOMR_KOD TYPOMRNAMN
0 4 Polygon SE65E5B59B-D89F-4775-8AC7-D36AD908B1EE WA33184982 SE0101010301-C SE590670-111380 Singlefjorden 1 CW Y ... 2016-10-26 2016 Singlefjorden_(11.14,59.05) Y NO0101010301-C 0101010301-C 47717.47829 17438059.64 1n 1n - Västkustens inre kustvatten, norra
1 80 Polygon SE19D16CCE-760C-4A16-AF1C-105908B8D133 WA80555093 SE570900-121060 SE570900-121060 Balgöarkipelagen 1 CW Y ... 2006-12-31 2010 Balgöarkipelagen_(57.15,12.16) N 49499.97615 26418718.15 1s 1s - Västkustens inre kustvatten, södra
2 83 Polygon SE8F73DEDC-7746-4981-B416-138E4353CD37 WA85895430 SE571240-121000 SE571240-121000 Klosterfjorden 1 CW Y ... 2006-12-31 2010 Klosterfjorden_(57.21,12.2) N 19262.91922 8789632.95 1s 1s - Västkustens inre kustvatten, södra
3 86 Polygon SE16A044F8-3910-4A50-B53A-F73732C1D3D0 WA34827948 SE571720-120640 SE571720-120640 Vändelsöarkipelagen 1 CW Y ... 2006-12-31 2010 Vändelsöarkipelagen_(57.28,12.1) N 49283.29668 32727530.74 1s 1s - Västkustens inre kustvatten, södra
4 90 Polygon SE15DAF483-4DF0-4C96-998F-4F905C6F3CAC WA95954732 SE572072-115880 SE572072-115880 Varren 1 CW Y ... 2006-12-31 2010 Varren_(57.35,11.98) N 18627.73628 4863325.46 1s 1s - Västkustens inre kustvatten, södra

5 rows × 25 columns


In [12]:
current_matchfile.columns


Out[12]:
Index(['OBJECTID *', 'Shape *', 'EU_CD', 'MS_CD', 'VISS_EU_CD', 'VISS_MS_CD',
       'NAMN', 'TYP_NFS06', 'CATEGORY', 'WB', 'District', 'COMP_AUTH',
       'Country', 'Version', 'URL_VISS', 'DATUM', 'ObjVer', 'PopNamn',
       'GVatten', 'UTL_EU_CD', 'UTL_MS_CD', 'Shape_Length', 'Shape_Area',
       'TYPOMR_KOD', 'TYPOMRNAMN'],
      dtype='object')

In [36]:
sharkweb_matchfile.head()


Out[36]:
WATER_BODY_NAME Vattendist OMRTYP DATUM OLD_HID TYP_NFS06 HID DIST_CD VISS_EU_CD WATER_DISTRICT_CODE ... URL_VISS ObjVer PopNamn GVatten UTL_EU_CD UTL_MS_CD Shape_Length TYPOMR_KOD TYPOMRNAMN TYPE_AREA_SUFFIX
0 S. Seskaröfjärden sek namn 1 2 2006-12-31 NaN 23 654100-234100 SE1 SE654100-234100 SE1 ... http://www.viss.lansstyrelsen.se/waters.aspx?... 2010 S. Seskaröfjärden sek namn_(65.65,23.71) N 108577.45400 23 23 - Bottenviken, yttre kustvatten
1 Båtöfjärden 1 2 2006-12-31 NaN 22 654110-224850 SE1 SE654110-224850 SE1 ... http://www.viss.lansstyrelsen.se/waters.aspx?... 2010 Båtöfjärden_(65.69,22.83) N 60805.19025 22 22 - Bottenviken, inre kustvatten
2 Tistersöfjärden 1 2 2006-12-31 NaN 22 654200-222920 SE1 SE654200-222920 SE1 ... http://www.viss.lansstyrelsen.se/waters.aspx?... 2010 Tistersöfjärden_(65.7,22.49) N 22594.66892 22 22 - Bottenviken, inre kustvatten
3 Fjuksöfjärden 1 2 2006-12-31 NaN 22 653900-223280 SE1 SE653900-223280 SE1 ... http://www.viss.lansstyrelsen.se/waters.aspx?... 2010 Fjuksöfjärden_(65.64,22.54) N 44114.24039 22 22 - Bottenviken, inre kustvatten
4 Hamnöfjärden 1 2 2006-12-31 NaN 22 653740-222800 SE1 SE653740-222800 SE1 ... http://www.viss.lansstyrelsen.se/waters.aspx?... 2010 Hamnöfjärden_(65.62,22.47) N 24283.61669 22 22 - Bottenviken, inre kustvatten

5 rows × 37 columns


In [17]:



Out[17]:
[]

In [2]:
', '.join(['a','b','c'])


Out[2]:
'a, b, c'

In [5]:
sharkweb_matchfile.head()


Out[5]:
WATER_BODY_NAME Vattendist OMRTYP DATUM OLD_HID TYP_NFS06 HID DIST_CD VISS_EU_CD WATER_DISTRICT_CODE COUNTRY TYPE_AREA_CODE TYPE_AREA_NO Shape_Leng Shape_Area
0 S. Seskaröfjärden sek namn 1 2 2006-12-31 NaN 23 654100-234100 SE1 SE654100-234100 SE1 SE 23 23 108577.45400 2.019670e+08
1 Båtöfjärden 1 2 2006-12-31 NaN 22 654110-224850 SE1 SE654110-224850 SE1 SE 22 22 60805.19025 4.579085e+07
2 Tistersöfjärden 1 2 2006-12-31 NaN 22 654200-222920 SE1 SE654200-222920 SE1 SE 22 22 22594.66892 2.559865e+07
3 Fjuksöfjärden 1 2 2006-12-31 NaN 22 653900-223280 SE1 SE653900-223280 SE1 SE 22 22 44114.24039 4.612177e+07
4 Hamnöfjärden 1 2 2006-12-31 NaN 22 653740-222800 SE1 SE653740-222800 SE1 SE 22 22 24283.61669 1.647274e+07

In [41]:
new_df = pd.DataFrame()
new_df2 = new_df.append(pd.DataFrame(data = [[4,6]], index = [9], columns = ['A','B']))
new_df2 = new_df2.append(pd.DataFrame(data = [[8,10]], index = [5], columns = ['A','B']))

In [39]:
new_df2.head()


Out[39]:
A B
9 4 6
5 8 10

In [42]:
new_df = pd.DataFrame()
new_df3 = new_df.append(pd.DataFrame(data = [[4,6]], index = [9], columns = ['C','D']))
new_df3 = new_df3.append(pd.DataFrame(data = [[8,10]], index = [5], columns = ['C','D']))

In [59]:
new_df2.merge(new_df3, left_index = True, right_index = True).ix[[9,5]]


Out[59]:
A B C D
9 4 6 4 6
5 8 10 8 10

In [ ]:


In [53]:
sharkweb_matchfile.loc[sharkweb_matchfile.index[[4,8,3,21]]]


---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-53-fbb69a4e4177> in <module>()
----> 1 sharkweb_matchfile.loc[sharkweb_matchfile[[4,8,3,21]]]

C:\Anaconda3\envs\LenaEnv\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2051         if isinstance(key, (Series, np.ndarray, Index, list)):
   2052             # either boolean or fancy integer index
-> 2053             return self._getitem_array(key)
   2054         elif isinstance(key, DataFrame):
   2055             return self._getitem_frame(key)

C:\Anaconda3\envs\LenaEnv\lib\site-packages\pandas\core\frame.py in _getitem_array(self, key)
   2096         else:
   2097             indexer = self.ix._convert_to_indexer(key, axis=1)
-> 2098             return self.take(indexer, axis=1, convert=True)
   2099 
   2100     def _getitem_multilevel(self, key):

C:\Anaconda3\envs\LenaEnv\lib\site-packages\pandas\core\generic.py in take(self, indices, axis, convert, is_copy, **kwargs)
   1667         new_data = self._data.take(indices,
   1668                                    axis=self._get_block_manager_axis(axis),
-> 1669                                    convert=True, verify=True)
   1670         result = self._constructor(new_data).__finalize__(self)
   1671 

C:\Anaconda3\envs\LenaEnv\lib\site-packages\pandas\core\internals.py in take(self, indexer, axis, verify, convert)
   3953         n = self.shape[axis]
   3954         if convert:
-> 3955             indexer = maybe_convert_indices(indexer, n)
   3956 
   3957         if verify:

C:\Anaconda3\envs\LenaEnv\lib\site-packages\pandas\core\indexing.py in maybe_convert_indices(indices, n)
   1871     mask = (indices >= n) | (indices < 0)
   1872     if mask.any():
-> 1873         raise IndexError("indices are out-of-bounds")
   1874     return indices
   1875 

IndexError: indices are out-of-bounds

In [ ]:


In [ ]: