In [1]:
import pandas as pd
import numpy as np
import pickle
import os
root_directory = 'D:/github/w_vattenstatus/ekostat_calculator'#"../" #os.getcwd()
workspace_directory = root_directory + '/workspaces' 
resource_directory = root_directory + '/resources'
#alias = 'lena'
user_id = 'test_user' #kanske ska vara off_line user?
workspace_alias = 'lena_indicator' # kustzonsmodellen_3daydata
# workspace_alias = 'kustzonsmodellen_3daydata'

# ## Initiate EventHandler
print(root_directory)
paths = {'user_id': user_id, 
         'workspace_directory': root_directory + '/workspaces', 
         'resource_directory': root_directory + '/resources', 
         'log_directory': 'D:/github' + '/log', 
         'test_data_directory': 'D:/github' + '/test_data',
         'cache_directory': 'D:/github/w_vattenstatus/cache'}


D:/github/w_vattenstatus/ekostat_calculator

In [3]:
sharkweb_matchfile = pd.read_csv(open(paths['resource_directory']+'/mappings/water_body_match_sharweb_SVAR_2012_2.txt'), sep = '\t')

In [4]:
current_matchfile = pd.read_csv(open(paths['resource_directory']+'/mappings/water_body_match.txt'), sep = '\t')

In [20]:
for type_code in sharkweb_matchfile.TYPE_AREA_CODE.unique():
    type_code_new = current_matchfile.loc[current_matchfile.TYPE_AREA_CODE == type_code].TYPE_AREA_CODE.unique()
    type_suffix_new = current_matchfile.loc[current_matchfile.TYPE_AREA_CODE == type_code].TYPE_AREA_SUFFIX.unique()
    type_name_new = current_matchfile.loc[current_matchfile.TYPE_AREA_CODE == type_code].TYPE_AREA_NAME.unique()
    print(type_code, type_code_new, type_suffix_new, type_name_new)


23 ['23'] [nan] ['Bottenviken, yttre kustvatten']
22 ['22'] [nan] ['Bottenviken, inre kustvatten']
12n ['12n'] [nan] ['Östergötlands samt Stockholms skärgård, mellankustvatten, norra']
17 ['17'] [nan] ['Södra Bottenhavet, yttre kustvatten']
16 ['16'] [nan] ['Södra Bottenhavet, inre kustvatten']
15 ['15'] [nan] ['Stockholms skärgård, yttre kustvatten']
24 ['24'] [nan] ['Stockholms inre skärgård och Hallsfjärden']
12s ['12s'] ['n' nan] ['Östergötlands samt Stockholms skärgård, mellankustvatten, södra']
3 ['3'] [nan] ['Västkustens yttre kustvatten, Skagerrak']
14 ['14'] [nan] ['Östergötlands yttre kustvatten']
1n ['1n'] ['n'] ['Västkustens inre kustvatten, norra']
10 ['10'] [nan] ['Östra Ölands, sydöstra Gotlands kustvatten samt Gotska sandön']
2 ['2'] [nan] ['Västkustens fjordar']
4 ['4'] [nan] ['Västkustens yttre kustvatten, Kattegatt']
1s ['1s'] ['s'] ['Västkustens inre kustvatten, södra']
8 ['8'] [nan] ['Blekinge skärgård och Kalmarsund, inre kustvatten']
0 [] [] []
18 ['18'] [nan] ['Norra Bottenhavet, Höga kusten, inre kustvatten']
19 ['19'] [nan] ['Norra Bottenhavet, Höga kusten, yttre kustvatten']
13 ['13'] [nan] ['Östergötlands inre skärgård']
5 ['5'] [nan] ['Södra Halland och norra Öresunds kustvatten'
 'Södra Hallands och norra Öresunds kustvatten']
6 ['6'] [nan] ['Öresunds kustvatten']
7 ['7'] [nan] ['Skånes kustvatten']
11 ['11'] [nan] ['Gotlands västra och norra kustvatten']
9 ['9'] [nan] ['Blekinge skärgård och Kalmarsund, yttre kustvatten']
25 ['25'] [nan] ['Göta Älvs- och Nordre Älvs estuarie']
20 ['20'] [nan] ['Norra Kvarkens inre kustvatten']
21 ['21'] [nan] ['Norra Kvarkens yttre kustvatten']

In [89]:
add_cols = [c for c in current_matchfile.columns if c not in sharkweb_matchfile.columns]
for ix in sharkweb_matchfile.index:
    code = sharkweb_matchfile.loc[ix, 'VISS_EU_CD']
    name = sharkweb_matchfile.loc[ix, 'WATER_BODY_NAME']
    #print(current_matchfile.loc[current_matchfile['VISS_EU_CD'] == code, 'VISS_EU_CD'])
    if not isinstance(current_matchfile.loc[current_matchfile['VISS_EU_CD'] == code, 'VISS_EU_CD'], pd.Series):
        print(code)
    elif current_matchfile.loc[current_matchfile['VISS_EU_CD'] == code, 'VISS_EU_CD'].empty:
        print(code)
        if not current_matchfile.loc[current_matchfile['NAMN'] == name, 'NAMN'].empty:
            print(code, name)
            for ac in add_cols:
                sharkweb_matchfile.loc[ix, ac] = current_matchfile.loc[current_matchfile['NAMN'] == name, ac].values[0]
    else:
        #print(code)
        #print(current_matchfile.loc[current_matchfile['VISS_EU_CD'] == code, 'VISS_EU_CD'].values[0])
        for ac in add_cols:
            sharkweb_matchfile.loc[ix, ac] = current_matchfile.loc[current_matchfile['VISS_EU_CD'] == code, ac].values[0]
        #print(current_matchfile.loc[current_matchfile['VISS_EU_CD'] == code, ac])
    #wb_name = current_matchfile.loc[current_matchfile['VISS_EU_CD'] == code, 'WATERBODY_NAME']
    #type_name = current_matchfile.loc[current_matchfile['VISS_EU_CD'] == code, 'TYPE_AREA_NAME']
    #MS_CD = current_matchfile.loc[current_matchfile['VISS_EU_CD'] == code, 'VISS_MS_CD']


SE594211-193824
SE591454-192215
SE582950-192156
SE574755-181120
SE573224-190746
SE582008-105731
SE570714-115613
SE603634-183531
SE583649-180707
SE580109-171030
SE595913-190752
SE632213-201821
SE634223-210932
SE555420-145140
SE555851-160709
SE615085-130626
SE551617-133102
SE650320-220650
SE654640-233190
SE654640-233190 Bodöfjärden
SE653415-221340
SE652020-211930
SE654416-230000
SE654416-230000 Storöfjärden
SE658436-162998
SE620333-175418
SENO590020-114520
SENO590020-114520 Inre Idefjorden
SENO590860-113810
SENO590860-113810 Idefjorden
SENO590900-112300
SENO590900-112300 Inre Singlefjorden
SENO591150-113700
SENO590670-111380
SENO590670-111380 Singlefjorden
SE574050-114780
SE574050-114780 Rivö fjord
SE570714-115613

In [63]:
current_matchfile.loc[current_matchfile['VISS_EU_CD'] == code].empty


Out[63]:
True

In [55]:
code


Out[55]:
'SE654100-234100'

In [26]:
current_matchfile.loc[current_matchfile['VISS_EU_CD'] == code, ac].values[0]


Out[26]:
'Bottenviken, yttre kustvatten'

In [16]:
current_matchfile.columns


Out[16]:
Index(['WATERBODY_NAME', 'TYPE_AREA_CODE', 'TYPE_AREA_NAME', 'TYPE_AREA_NO',
       'TYPE_AREA_SUFFIX', 'COUNTRY', 'URL_VISS', 'EU_CD', 'MS_CD',
       'VISS_EU_CD', 'VISS_MS_CD', 'WATER_DISTRICT_CODE',
       'WATER_DISTRICT_NAME'],
      dtype='object')

In [83]:
sharkweb_matchfile.columns


Out[83]:
Index(['WATERBODY_NAME', 'Vattendist', 'OMRTYP', 'DATUM', 'OLD_HID',
       'TYP_NFS06', 'HID', 'DIST_CD', 'VISS_EU_CD', 'WATER_DISTRICT_CODE',
       'COUNTRY', 'TYPE_AREA_CODE', 'TYPE_AREA_NO', 'Shape_Leng',
       'Shape_Area'],
      dtype='object')

In [90]:
sharkweb_matchfile.to_csv(paths['resource_directory']+'/mappings/water_body_match_sharkweb_SVAR_2012_2_extended_new.txt', sep = '\t')

In [17]:



Out[17]:
[]

In [2]:
', '.join(['a','b','c'])


Out[2]:
'a, b, c'

In [5]:
sharkweb_matchfile.head()


Out[5]:
WATER_BODY_NAME Vattendist OMRTYP DATUM OLD_HID TYP_NFS06 HID DIST_CD VISS_EU_CD WATER_DISTRICT_CODE COUNTRY TYPE_AREA_CODE TYPE_AREA_NO Shape_Leng Shape_Area
0 S. Seskaröfjärden sek namn 1 2 2006-12-31 NaN 23 654100-234100 SE1 SE654100-234100 SE1 SE 23 23 108577.45400 2.019670e+08
1 Båtöfjärden 1 2 2006-12-31 NaN 22 654110-224850 SE1 SE654110-224850 SE1 SE 22 22 60805.19025 4.579085e+07
2 Tistersöfjärden 1 2 2006-12-31 NaN 22 654200-222920 SE1 SE654200-222920 SE1 SE 22 22 22594.66892 2.559865e+07
3 Fjuksöfjärden 1 2 2006-12-31 NaN 22 653900-223280 SE1 SE653900-223280 SE1 SE 22 22 44114.24039 4.612177e+07
4 Hamnöfjärden 1 2 2006-12-31 NaN 22 653740-222800 SE1 SE653740-222800 SE1 SE 22 22 24283.61669 1.647274e+07

In [41]:
new_df = pd.DataFrame()
new_df2 = new_df.append(pd.DataFrame(data = [[4,6]], index = [9], columns = ['A','B']))
new_df2 = new_df2.append(pd.DataFrame(data = [[8,10]], index = [5], columns = ['A','B']))

In [39]:
new_df2.head()


Out[39]:
A B
9 4 6
5 8 10

In [42]:
new_df = pd.DataFrame()
new_df3 = new_df.append(pd.DataFrame(data = [[4,6]], index = [9], columns = ['C','D']))
new_df3 = new_df3.append(pd.DataFrame(data = [[8,10]], index = [5], columns = ['C','D']))

In [59]:
new_df2.merge(new_df3, left_index = True, right_index = True).ix[[9,5]]


Out[59]:
A B C D
9 4 6 4 6
5 8 10 8 10

In [ ]:


In [53]:
sharkweb_matchfile.loc[sharkweb_matchfile.index[[4,8,3,21]]]


---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-53-fbb69a4e4177> in <module>()
----> 1 sharkweb_matchfile.loc[sharkweb_matchfile[[4,8,3,21]]]

C:\Anaconda3\envs\LenaEnv\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2051         if isinstance(key, (Series, np.ndarray, Index, list)):
   2052             # either boolean or fancy integer index
-> 2053             return self._getitem_array(key)
   2054         elif isinstance(key, DataFrame):
   2055             return self._getitem_frame(key)

C:\Anaconda3\envs\LenaEnv\lib\site-packages\pandas\core\frame.py in _getitem_array(self, key)
   2096         else:
   2097             indexer = self.ix._convert_to_indexer(key, axis=1)
-> 2098             return self.take(indexer, axis=1, convert=True)
   2099 
   2100     def _getitem_multilevel(self, key):

C:\Anaconda3\envs\LenaEnv\lib\site-packages\pandas\core\generic.py in take(self, indices, axis, convert, is_copy, **kwargs)
   1667         new_data = self._data.take(indices,
   1668                                    axis=self._get_block_manager_axis(axis),
-> 1669                                    convert=True, verify=True)
   1670         result = self._constructor(new_data).__finalize__(self)
   1671 

C:\Anaconda3\envs\LenaEnv\lib\site-packages\pandas\core\internals.py in take(self, indexer, axis, verify, convert)
   3953         n = self.shape[axis]
   3954         if convert:
-> 3955             indexer = maybe_convert_indices(indexer, n)
   3956 
   3957         if verify:

C:\Anaconda3\envs\LenaEnv\lib\site-packages\pandas\core\indexing.py in maybe_convert_indices(indices, n)
   1871     mask = (indices >= n) | (indices < 0)
   1872     if mask.any():
-> 1873         raise IndexError("indices are out-of-bounds")
   1874     return indices
   1875 

IndexError: indices are out-of-bounds

In [ ]:


In [ ]: