In [4]:
    
# Reload when code changed:
%load_ext autoreload
%autoreload 2
%pwd
import sys
path = "../"
sys.path.append(path)
#os.path.abspath("../")
print(os.path.abspath(path))
    
    
In [5]:
    
import os 
import core
import importlib
importlib.reload(core) 
import pandas as pd
from event_handler import EventHandler
print(core.__file__)
pd.__version__
    
    
    Out[5]:
In [6]:
    
import os 
import core
import logging
import importlib
importlib.reload(core) 
try:
    logging.shutdown()
    importlib.reload(logging)
except:
    pass
import pandas as pd
import numpy as np
import json
from event_handler import EventHandler
print(core.__file__)
pd.__version__
    
    
    Out[6]:
In [7]:
    
def print_workspaces():
    request = {'user_id': user_id} 
    respons = ekos.request_workspace_list(request)
    print('')
    print('='*100)
    print('Workspaces for user: {}'.format(user_id)) 
    print('')
    for item in respons['workspaces']:
        print('-'*100)
        for key in sorted(item.keys()):
            print('{}:\t{}'.format(key, item[key]))
        print('')
    print('='*100)
    
    
def print_json(data): 
    json_string = json.dumps(data, indent=2, sort_keys=True)
    print(json_string)
    
In [10]:
    
root_directory = "../" #os.getcwd()
workspace_directory = root_directory + '/workspaces' 
resource_directory = root_directory + '/resources'
alias = 'lena'
user_id = 'test_user' #kanske ska vara off_line user?
    
In [283]:
    
ekos = EventHandler(root_directory)
    
    
    
In [10]:
    
# Remove all workspaces belonging to test user 
# ekos.remove_test_user_workspaces()
    
In [10]:
    
# remove selected workspace
workspace_uuid = ekos.get_unique_id_for_alias(user_id, 'lena_newdata')
#ekos.delete_workspace(user_id = user_id, unique_id = workspace_uuid, permanently=True)
    
    
    
    Out[10]:
In [7]:
    
#default_workspace = core.WorkSpace(alias = 'default_workspace', 
#                                   unique_id = 'default_workspace', 
#                                   parent_directory=workspace_directory,
#                                   resource_directory=resource_directory, 
#                                   user_id = 'default')
    
In [9]:
    
#default_workspace.step_0.print_all_paths()
    
In [8]:
    
#default_workspace.import_default_data()
    
In [257]:
    
ekos.copy_workspace(user_id = user_id, source_alias = 'default_workspace', target_alias = 'lena_newdata')
    
    
    
    Out[257]:
In [284]:
    
ekos.load_workspace(user_id, alias = 'lena_newdata')
    
    
    
    Out[284]:
In [285]:
    
workspace_uuid = ekos.get_unique_id_for_alias(user_id, 'lena_newdata')
    
In [286]:
    
workspace_uuid
    
    Out[286]:
In [287]:
    
ekos.workspaces
    
    Out[287]:
In [129]:
    
ekos.import_default_data(user_id, workspace_alias = workspace_uuid)
    
    
    Out[129]:
In [288]:
    
ekos.load_data(user_id = user_id, unique_id = workspace_uuid)
    
    
In [17]:
    
#os.path.exists(w.paths['directory_path_input_data']+'/exports')
    
In [289]:
    
w = ekos.get_workspace(user_id, unique_id = workspace_uuid, alias = 'lena_newdata')
    
    
In [290]:
    
len(w.data_handler.get_all_column_data_df())
    
    Out[290]:
In [265]:
    
#w.data_handler.physical_chemical.filter_parameters
    
In [266]:
    
use_parameters = w.data_handler.physical_chemical.filter_parameters['use_parameters']
print(use_parameters)
    
    
In [267]:
    
compulsory_fields = w.data_handler.physical_chemical.filter_parameters['compulsory_fields']
display_columns = compulsory_fields + use_parameters
print(display_columns)
    
    
In [38]:
    
df = w.data_handler.get_all_column_data_df()
df.columns
df[df.SEA_AREA_NAME.isnull()].loc[:,['DEPH', 'SALT_BTL',
     'SDATE', 'VISS_EU_CD', 'WATER_BODY_NAME', 'SEA_BASIN',
   'SECCHI', 'TEMP_BTL', 'VISS_EU_ID', 'WATER_DISTRICT', 'WATER_TYPE_AREA', 'MONTH']]
    
    Out[38]:
In [268]:
    
df = w.data_handler.all_data
df[df.WATER_BODY_NAME.notnull()].loc[:,['DEPH', 'SALT_BTL',
       'SDATE', 'VISS_EU_CD', 'WATER_BODY_NAME', 'SEA_BASIN',
       'SECCHI', 'TEMP_BTL', 'VISS_EU_ID', 'WATER_DISTRICT', 'WATER_TYPE_AREA', 'MONTH']]
    
    Out[268]:
In [102]:
    
#lv_workspace.data_handler. __getattribute__('physical_chemical_model').column_data['../ekostat_calculator/workspaces/lv/input_data/raw_data/110001_PROFILER_alldepths_SE652400-223501_toolbox.dat']
    
In [22]:
    
#lv_workspace.print_all_paths()
    
In [269]:
    
# show available waterbodies
workspace_data = w.data_handler.get_all_column_data_df()
lst = workspace_data.WATER_BODY_NAME.unique()
print('WATER_BODY_NAME in workspace:\n{}'.format('\n'.join(lst)))
    
    
In [270]:
    
workspace_data.columns
    
    Out[270]:
In [134]:
    
f0 = w.get_data_filter_object(step=0) 
f0.include_list_filter
    
    Out[134]:
In [135]:
    
#include_WB = ['Norrbottens skärgårds kustvatten']#,
                #'N S M Bottenhavets kustvatten'] 
include_stations = [] 
#exclude_WB = ['Norrbottens skärgårds kustvatten'] 
include_years = [] 
#w.set_data_filter(step=0, filter_type='include_list', filter_name='WATERBODY_NAME', data=include_WB)
w.set_data_filter(step=0, filter_type='include_list', filter_name='STATN', data=include_stations) 
#w.set_data_filter(step=0, filter_type='exclude_list', filter_name='WATERBODY_NAME', data=exclude_WB) 
w.set_data_filter(step=0, filter_type='include_list', filter_name='MYEAR', data=include_years)
    
    
    Out[135]:
In [136]:
    
f0.exclude_list_filter
    
    Out[136]:
In [291]:
    
w.apply_data_filter(step = 0) # This sets the first level of data filter in the IndexHandler
    
    
In [ ]:
    
    
In [292]:
    
data_after_first_filter = w.get_filtered_data(step=0) # level=0 means first filter 
print('{} rows matching the filter criteria'.format(len(data_after_first_filter)))
    
    
    
In [241]:
    
# show available waterbodies
lst = data_after_first_filter.WATER_BODY_NAME.unique()
print('Waterbodies in workspace dataset:\n{}'.format('\n'.join(lst)))
    
    
In [139]:
    
#ekos.copy_subset(user_id, 
#                 workspace_alias='lena_newdata', 
#                 workspace_uuid=None, 
#                 subset_source_alias='default_subset', 
#                 subset_source_uuid='default_subset', 
#                 subset_target_alias='A')
    
In [293]:
    
subset_uuid = ekos.get_unique_id_for_alias(user_id, workspace_alias = 'lena_newdata', subset_alias = 'A')
w.get_subset_list()
    
    Out[293]:
In [294]:
    
w.get_step_object(step = 2, subset = subset_uuid).allowed_data_filter_steps
    
    Out[294]:
In [295]:
    
f1 = w.get_data_filter_object(subset = subset_uuid, step=1) 
f1.include_list_filter
    
    Out[295]:
In [296]:
    
w.apply_data_filter(subset = subset_uuid, step = 1)
    
    
In [297]:
    
df_step1 = w.get_filtered_data(step = 1, subset = subset_uuid)
df_step1.columns
    
    
    Out[297]:
In [298]:
    
w.get_step_object(step = 2, subset = subset_uuid).load_indicator_settings_filters()
    
In [299]:
    
w.get_step_object(step = 2, subset = subset_uuid).indicator_data_filter_settings
    
    Out[299]:
In [300]:
    
dinw_filter_set = w.get_step_object(step = 2, subset = subset_uuid).get_indicator_data_filter_settings('din_winter')
    
In [301]:
    
dinw_filter_set.settings.df
    
    Out[301]:
In [ ]:
    
    
In [302]:
    
wb_list = df_step1.VISS_EU_CD.unique()
len(wb_list)
    
    Out[302]:
In [303]:
    
for wb in wb_list:
    print('*'*30)
    print(wb)
    w.apply_indicator_data_filter(step = 2, 
                              subset = subset_uuid, 
                              indicator = 'din_winter',
                              water_body = wb)
    
    
    
In [304]:
    
print(len(w.index_handler.booleans['step_0'][subset_uuid]['step_1']['step_2'].keys()))
w.index_handler.booleans['step_0'][subset_uuid]['step_1']['step_2'].keys()
    
    
    Out[304]:
In [305]:
    
w.index_handler.booleans['step_0'][subset_uuid]['step_1']['step_2']['SE654470-222700'].keys()
    
    Out[305]:
In [88]:
    
#w.apply_data_filter(subset = 'f270cc60-a18f-4153-93f9-4d30c0246162', step = 2)
    
In [306]:
    
w.index_handler.booleans.keys()
    
    Out[306]:
In [165]:
    
#temp_df = w.get_filtered_data(step = 2, subset = subset_uuid, indicator = 'din_winter', water_body = 'SE654470-222700')
#temp_df.loc[(temp_df['MONTH'].isin([11, 12, 1, 2])) & (temp_df['SEA_AREA_CODE'].isin(['654470-222700']))][['MONTH', 'SEA_AREA_NAME', 'SEA_AREA_CODE', 'WATER_TYPE_AREA']]
    
In [307]:
    
w.get_filtered_data(step = 2, subset = subset_uuid, indicator = 'din_winter', water_body = 'SE654470-222700').MONTH.unique()#[['MONTH', 'SEA_AREA_NAME', 'SEA_AREA_CODE']]
    
    
    Out[307]:
In [309]:
    
water_body = 'SE654470-222700'
temp_df = w.get_filtered_data(step = 2, subset = subset_uuid, indicator = 'din_winter', water_body = water_body)[['SDATE','MONTH', 'WATER_BODY_NAME', 'VISS_EU_CD', 'DIN','SALT_CTD', 'SALT_BTL']].dropna(thresh=7)
print('Waterbodys left: {}'.format(temp_df.loc[temp_df['VISS_EU_CD'].isin([water_body.strip('SE')])]['WATER_BODY_NAME'].unique()))
temp_df.loc[temp_df['VISS_EU_CD'].isin([water_body])]
    
    
    
    Out[309]:
In [310]:
    
w.get_available_indicators(subset= 'A', step=2)
    
    
    
    
    
    
    
    
    
    
    
    Out[310]:
In [130]:
    
for indicator, parameters in w.cfg['indicators'].items():
    print(indicator, parameters)
    try:
        w.get_filtered_data(step = 1, subset = subset_uuid)[parameters].dropna(thresh = len(parameters))
    except KeyError as e:
        print(e)
        #if (w.get_filtered_data(step = 0)[parameters].dropna().count() >0).all():
        #    print(indicator)
    
    
    
In [114]:
    
w.cfg['indicators']['din_winter']=['DIN', 'SALT_CTD']
w.cfg['indicators']
if len(w.get_filtered_data(step = 1)[['DIN', 'SALT_CTD']].apply(pd.to_numeric).dropna(thresh= 2)) > 0:
    print(w.get_filtered_data(step = 1)[['DIN', 'SALT_CTD']].apply(pd.to_numeric).dropna(thresh=2))
    
    
    
In [116]:
    
w.get_filtered_data(step = 1)[['DIN', 'SALT_CTD']].apply(pd.to_numeric).dropna(thresh=2)
    
    
    Out[116]:
In [239]:
    
# Must set filter to subset for len(boolean) to work when calling get_filtered_data, if no filters are set, boolean is None
exclude_year = [1985, 1986, 1987] 
lv_workspace.get_subset_object('A').set_data_filter(step=1, filter_type='exclude_list', filter_name='YEAR', data=exclude_year)
    
    Out[239]:
In [109]:
    
B = lv_workspace.get_subset_object('B')
print(B.get_step_list())
    
    
In [27]:
    
os.listdir(B.paths['subset_directory'])
    
    Out[27]:
In [96]:
    
#include_WB = ['Norrbottens skärgårds kustvatten']
#lv_workspace.set_data_filter(step=1, subset='B', filter_type='include_list', filter_name='SEA_AREA_NAME', data=include_WB)
lv_workspace.get_subset_object('B').get_step_object('step_1').show_settings()
lv_workspace.get_subset_object('B').get_data_filter_object(step=1).exclude_list_filter
    
    
    Out[96]:
In [40]:
    
B2 = B.get_step_object('step_2')
B2.load_indicator_settings_filters()
    
In [41]:
    
B2.print_all_paths()
#B2.add_files_from_workstep(default_workspace.get_step_0_object())
    
    
In [42]:
    
B2.paths['directory_paths']['indicator_settings']
#os.listdir(A1.paths['directory_paths']['indicator_settings'])
    
    Out[42]:
In [43]:
    
B2.allowed_indicator_settings_steps
    
    Out[43]:
In [233]:
    
(lv_workspace.get_filtered_data(step = 0)[['DIN', 'SALT_BTL']].dropna().count() > 0).all()
    
    
    Out[233]:
In [208]:
    
for indicator, parameters in lv_workspace.cfg['indicators'].items():
    print(indicator, parameters)
    if lv_workspace.get_filtered_data(step = 0).columns.isin(parameters).all():
        print(lv_workspace.get_filtered_data(step = 0).columns)
        if (lv_workspace.get_filtered_data(step = 0)[parameters].dropna().count() >0).all():
            print(indicator)
    
    
In [189]:
    
lv_workspace.get_filtered_data(step = 2, subset = 'B') # water_body and subset keywords does notseem to work, all waterbodies are returned
    
    
    Out[189]:
In [190]:
    
lv_workspace.cfg['indicators']
    
    Out[190]:
In [240]:
    
lv_workspace.get_available_indicators(subset = 'B', step = 'step_2')
    
    
    Out[240]:
In [241]:
    
lv_workspace.load_indicators(subset = 'B')
    
    
In [242]:
    
s = lv_workspace.get_subset_object('B').indicator_objects['dip_winter']
s.get_filtered_data(subset = 'B', step = 'step_2')
    
    Out[242]:
In [120]:
    
B2_NTOT_WINTER_SETTINGS = lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_ref_settings['ntot_winter']
lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_ref_settings['ntot_winter'].allowed_variables
# gör om till
# lv_workspace.get_indicator_ref_settings(step = , subset = , indicator = , waterbody/type)
# ger samma resultat som:
#lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_ref_settings['ntot_winter'].settings.ref_columns
lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_ref_settings['ntot_winter'].settings.get_value('EK G/M', 22)
#print(B2_NTOT_WINTER_SETTINGS)
#B2_NTOT_WINTER_SETTINGS.get_value('2', 'DEPTH_INTERVAL')
    
    Out[120]:
In [124]:
    
av = lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_data_filter_settings['ntot_winter'].allowed_variables
lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_data_filter_settings['ntot_winter'].settings.df[av]
    
    Out[124]:
In [119]:
    
lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_data_filter_settings['ntot_winter'].settings.df
    
    Out[119]:
In [67]:
    
B2_NTOT_WINTER_SETTINGS.settings.mapping_water_body['N m Bottenvikens kustvatten']
    
    Out[67]:
In [35]:
    
f1_A = lv_workspace.get_data_filter_object(step=1, subset='A') 
f1_A.include_list_filter
    
    Out[35]:
In [136]:
    
lv_workspace.get_data_filter_info(step=1, subset='A')
    
    Out[136]:
In [36]:
    
f1_A.exclude_list_filter
    
    Out[36]:
In [21]:
    
f0.include_list_filter
    
    Out[21]:
In [22]:
    
lv_workspace.apply_subset_filter(subset='A') # Not handled properly by the IndexHandler
    
    Out[22]:
In [23]:
    
data_after_subset_filter = lv_workspace.get_filtered_data(level=1, subset='A') # level=0 means first filter 
print('{} rows mathing the filter criteria'.format(len(data_after_subset_filter)))
data_after_subset_filter.head()
    
    
    Out[23]:
In [24]:
    
# show available waterbodies
lst = data_after_subset_filter.SEA_AREA_NAME.unique()
print('Waterbodies in subset:\n{}'.format('\n'.join(lst)))
    
    
In [24]:
    
import numpy as np
np.where(lv_workspace.index_handler.subset_filter)
    
    Out[24]:
In [25]:
    
f = lv_workspace.get_data_filter_object(step=1, subset='A')
    
In [26]:
    
f.all_filters
    
    Out[26]:
In [27]:
    
f.exclude_list_filter
    
    Out[27]:
In [28]:
    
f.include_list_filter
    
    Out[28]:
In [29]:
    
s = lv_workspace.get_step_1_object('A')
    
In [30]:
    
s.data_filter.all_filters
    
    Out[30]:
In [31]:
    
f0 = lv_workspace.get_data_filter_object(step=0)
    
In [32]:
    
f0.exclude_list_filter
    
    Out[32]:
In [33]:
    
f0.include_list_filter
    
    Out[33]:
In [ ]:
    
    
In [ ]:
    
lv_workspace.initiate_quality_factors()
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]: