In [ ]:
# Reload when code changed:
%load_ext autoreload
%autoreload 2
%pwd
import os 
import sys
path = "../"
sys.path.append(path)
#os.path.abspath("../")
print(os.path.abspath(path))

In [ ]:
import pandas as pd
import numpy as np
import json
import timeit
import time
import core
import importlib
importlib.reload(core)
import logging
importlib.reload(core) 
try:
    logging.shutdown()
    importlib.reload(logging)
except:
    pass
from event_handler import EventHandler
print(core.__file__)
pd.__version__

Load directories


In [ ]:
root_directory = 'D:/github/ekostat_calculator'#"../" #os.getcwd()
workspace_directory = root_directory + '/workspaces' 
resource_directory = root_directory + '/resources'
#alias = 'lena'
user_id = 'test_user' #kanske ska vara off_line user?
workspace_alias = 'lena_indicator'

Initiate EventHandler


In [ ]:
print(root_directory)
paths = {'user_id': user_id, 
         'workspace_directory': root_directory + '/workspaces', 
         'resource_directory': root_directory + '/resources', 
         'log_directory': 'D:/github' + '/log', 
         'test_data_directory': 'D:/github' + '/test_data'}

In [ ]:
t0 = time.time()
ekos = EventHandler(**paths)
#request = ekos.test_requests['request_workspace_list']
#response = ekos.request_workspace_list(request) 
#ekos.write_test_response('request_workspace_list', response)
print('-'*50)
print('Time for request: {}'.format(time.time()-t0))
# OLD: ekos = EventHandler(root_directory)

Load existing workspace


In [ ]:
#ekos.copy_workspace(source_uuid='default_workspace', target_alias='lena_1')

In [ ]:
ekos.print_workspaces()

In [ ]:
workspace_uuid = ekos.get_unique_id_for_alias(workspace_alias = 'lena_indicator')
print(workspace_uuid)

In [ ]:
workspace_alias = ekos.get_alias_for_unique_id(workspace_unique_id = workspace_uuid)

In [ ]:
ekos.load_workspace(unique_id = workspace_uuid)
# Här får jag ofta felmeddelande:
# AttributeError: module 'core' has no attribute 'ParameterMapping'

In [ ]:
#ekos.import_default_data(workspace_alias = workspace_alias)

Load all data in workspace


In [ ]:
#ekos.get_workspace(unique_id = workspace_uuid, alias = workspace_alias).delete_alldata_export()

In [ ]:
#%%timeit
ekos.load_data(unique_id = workspace_uuid)

In [ ]:
w = ekos.get_workspace(unique_id = workspace_uuid)
len(w.data_handler.get_all_column_data_df())

Step 0


In [ ]:
w.data_handler.all_data.head()

In [ ]:
subset_uuid = ekos.get_unique_id_for_alias(workspace_alias = workspace_alias, subset_alias = 'period_2007-2012_refvalues_2017')

In [ ]:
w.get_data_filter_info(step = 1, subset = subset_uuid)

Apply first data filter


In [ ]:
w.apply_data_filter(step = 0) # This sets the first level of data filter in the IndexHandler

Step 1 Set subset filter


In [ ]:
#w.copy_subset(source_uuid='default_subset', target_alias='period_2007-2012_refvalues_2017')

In [ ]:
subset_uuid = ekos.get_unique_id_for_alias(workspace_alias = workspace_alias, subset_alias = 'period_2007-2012_refvalues_2017')
print(w.get_subset_list())

f1 = w.get_data_filter_object(subset = subset_uuid, step=1) 
print(f1.include_list_filter)

In [ ]:
subset_uuid

In [ ]:
w.set_data_filter(subset = subset_uuid, step=1, 
                         filter_type='include_list', 
                         filter_name='MYEAR', 
                         data=['2007', '2008', '2009', '2010', '2011', '2012']) 


f1 = w.get_data_filter_object(subset = subset_uuid, step=1) 
print(f1.include_list_filter)

In [ ]:
w.apply_data_filter(subset = subset_uuid, step = 1)

df_step1 = w.get_filtered_data(step = 1, subset = subset_uuid)
#df_step1[['SDATE', 'YEAR', 'MONTH', 'POSITION', 'VISS_EU_CD', 'WATER_TYPE_AREA', 'DEPH', 'MNDEP', 'MXDEP','BQIm']].dropna(subset = ['BQIm'])

In [ ]:
df_step1.columns

Step 2

Load indicator settings filter


In [ ]:
w.get_step_object(step = 2, subset = subset_uuid).load_indicator_settings_filters()

In [ ]:
w.mapping_objects['water_body']['SE570900-121060']['WATERBODY_NAME']

In [ ]:
w.get_step_object(step = 2, subset = subset_uuid).get_indicator_data_filter_settings('oxygen')

In [ ]:
w.mapping_objects['water_body']['23']

In [ ]:
type(w.get_step_object(step = 2, subset = subset_uuid).get_indicator_ref_settings('bqi').get_value(variable = 'LEVEL_DEPH_INTERVAL', water_body = 'SE575340-113000'))
#w.get_step_object(step = 2, subset = subset_uuid).get_indicator_ref_settings('oxygen').get_value(variable = 'HG_VALUE_LIMIT', water_body = 'SE625000-180075')
#w.get_step_object(step = 2, subset = subset_uuid).get_indicator_ref_settings('biov').get_ref_value(water_body = 'SE581700-113000')

In [ ]:
df = w.get_step_object(step = 2, subset = subset_uuid).get_indicator_ref_settings('bqi').get_value(water_body = 'SE575340-113000')
ix = df.loc[df['LEVEL_DEPH_INTERVAL'] == '5-20'].index
ix[0]

In [ ]:
w.get_step_object(step = 2, subset = subset_uuid).get_indicator_ref_settings('bqi').get_value(water_body = 'SE575340-113000')

In [ ]:
w.get_step_object(step = 2, subset = subset_uuid).get_indicator_ref_settings('bqi').get_value(water_body = 'SE575340-113000').loc[ix[0]]

In [ ]:
w.get_step_object(step = 2, subset = subset_uuid).indicator_ref_settings

In [ ]:
w.get_step_object(step = 2, subset = subset_uuid)._indicator_setting_files['oxygen']

In [ ]:
indicator_list = w.get_available_indicators(subset= subset_uuid, step=2)

In [ ]:
indicator_list

In [ ]:
wb_list = df_step1.VISS_EU_CD.unique()
print('number of waterbodies in step 1: {}'.format(len(wb_list)))
typeA_list = [row.split('-')[0].strip().lstrip('0') for row in df_step1.WATER_TYPE_AREA.unique()]
print('number of type areas in step 1: {}'.format(len(typeA_list)))

In [ ]:
for wb in wb_list:
    try:
        w.mapping_objects['water_body'][wb]
        #print(w.mapping_objects['water_body'][wb])
        #print('*************************************')
    except AttributeError:
        print('no match for {}'.format(wb))
        print('*************************************')
        continue

In [ ]:
test_wb = ['SE584340-174401',
 'SE581700-113000',
 'SE654470-222700',
 'SE570900-121060',
 'SE633000-195000',
 'SE625000-180075',
 'SE601440-184000',
 'SE612791-171130',
 'SE572072-115880',
 'SE582147-111771',
 'SE572227-115662',
 'SE580688-114860',
 'SE575500-113750',
 'SE591400-183200','SE575370-164220', 'SE573940-163560', 'SE565400-163600', 'SE570080-163430', 'SE565800-163000', 'SE570340-163710', 'SE570500-163750']

Apply indicator filter


In [ ]:
#list(zip(typeA_list, df_step1.WATER_TYPE_AREA.unique()))
indicator_list = ['din_winter','ntot_summer', 'ntot_winter', 'dip_winter', 'ptot_summer', 'ptot_winter']#,'bqi','din_winter','ntot_summer', 'ntot_winter', 'dip_winter', 'ptot_summer', 'ptot_winter', 'biov', 'chl', 'secchi']
for indicator in indicator_list:
    w.apply_indicator_data_filter(step = 2, 
                          subset = subset_uuid, 
                          indicator = indicator)#,
                         # water_body_list = test_wb)
    print(w.mapping_objects['water_body'][wb])
    print('*************************************')

In [ ]:
print(test_wb[1])
byfjorden = 'SE582000-115270'
gullmarn = 'SE581700-113000'
björnöfjärden = 'SE591400-183200'
print(w.mapping_objects['water_body']['SE625000-180075']['WATERBODY_NAME'])
w.mapping_objects['hypsographs'].get_max_depth_of_water_body('SE625000-180075')

In [ ]:
df = w.get_filtered_data(subset = subset_uuid, step= 2, water_body = gullmarn, indicator = 'din_winter')[['DIN', 'NTOT','SALT','DEPH', 'VISS_EU_CD','WATER_TYPE_AREA','SDATE','YEAR','POSITION']].dropna(subset = ['DIN'], how = 'all')
df.head()

In [ ]:
agg_dict1 = {'DIN': 'mean', 'SALT': 'mean', 'DEPH': 'count', 'VISS_EU_CD': 'max', 'WATER_TYPE_AREA': 'max'}       
agg_dict2 = {key: 'mean' for key in ['NTOT']}       
        
by_date = df.groupby(['SDATE', 'YEAR', 'POSITION']).agg({**agg_dict1, **agg_dict2}).reset_index()
by_date.rename(columns={'DEPH':'DEPH_count'}, inplace=True)
by_date['highest_'+'DIN'] = False

In [ ]:
by_date.head()

In [ ]:
for name, group in by_date.groupby(['YEAR','POSITION']):
    group['DIN'].idxmax()
    by_date.loc[group['DIN'].idxmax(), 'highest_'+'DIN'] = True 
by_date.head()

In [ ]:
df = w.get_filtered_data(subset = subset_uuid, step= 2, water_body = björnöfjärden, indicator = 'oxygen')[['DOXY', 'DOXY_CTD','DOXY_BTL','DEPH', 'SDATE','YEAR','POSITION']].dropna(subset = ['DOXY'], how = 'all')
#df.to_csv('D:/Temp/Björnöfjärden.txt', sep = '\t')

In [ ]:
w.mapping_objects['hypsographs'].wb_df['SE581700-113000'].loc[w.mapping_objects['hypsographs'].wb_df['SE581700-113000']['Djup']==60, 'frac_area']

In [ ]:
#w.mapping_objects['hypsographs'].wb_df['SE581700-113000']

In [ ]:
df = w.get_filtered_data(subset = subset_uuid, step= 2, water_body = 'SE581700-113000', indicator = 'oxygen')[['DOXY', 'DEPH', 'SDATE','YEAR','POSITION']].dropna(subset = ['DOXY'], how = 'all')
df.POSITION.unique()
maxD = w.mapping_objects['hypsographs'].get_max_depth_of_water_body('SE581700-113000')
for key, group in df.groupby(['POSITION']):
                group.reset_index()
                q = group.loc[group['DEPH'] > maxD - 10, 'DOXY'].quantile(0.25)
                print(group.loc[group['DEPH'] > 50])
                print(q)

In [ ]:
w.index_handler.booleans['step_0'][subset_uuid]['step_1']['step_2'][test_wb[0]].keys()#['SE584340-174401'].keys()

In [ ]:
wb = test_wb[0]#'SE583926-161744' #typomr 22
#wb = 'SE654470-222700' #typomr 13
indicator = 'din_winter'

In [ ]:
print(w.get_filtered_data(step = 2, subset = subset_uuid, water_body = wb, indicator = indicator).MONTH.unique())
print(w.get_filtered_data(step = 2, subset = subset_uuid, water_body = wb, indicator = indicator).DEPH.min(),
        w.get_filtered_data(step = 2, subset = subset_uuid, water_body = wb, indicator = indicator).DEPH.max())
print(w.get_filtered_data(step = 2, subset = subset_uuid, water_body = wb, indicator = indicator).VISS_EU_CD.unique())
w.get_filtered_data(step = 2, subset = subset_uuid, water_body = wb).WATER_TYPE_AREA.unique()

In [ ]:
w.get_filtered_data(step = 2, subset = subset_uuid, indicator = 'secchi', water_body = 'SE591400-183200').dropna(subset = ['SECCHI']).drop_duplicates(subset = ['SDATE', 'VISS_EU_CD', 'SECCHI'])[['SDATE','VISS_EU_CD','SECCHI','DEPH']].head()

In [ ]:
w.mapping_objects['quality_element'].cfg['indicators']
[item.strip() for item in w.mapping_objects['quality_element'].cfg['indicators'].loc[indicator][0].split(', ')]

Step 3 Load Indicator objects step 3....


In [ ]:
w.get_step_object(step = 3, subset = subset_uuid).indicator_setup(subset_unique_id = subset_uuid) 
#, indicator_list = ['din_winter', 'dip_winter']

In [ ]:
w.get_step_object(step = 3, subset = subset_uuid).mapping_objects['water_body'].get_type_area_for_water_body('SE574050-114780', include_suffix=True)

CALCULATE STATUS


In [ ]:
w.get_step_object(step = 3, subset = subset_uuid).calculate_status(indicator_list = ['ntot_winter', 'dip_winter', 'ptot_summer', 'ptot_winter'])
#'din_winter','ntot_summer', 'ntot_winter', 'dip_winter', 'ptot_summer', 'ptot_winter'

In [ ]:
w.get_step_object(step = 3, subset = subset_uuid).indicator_objects['ntot_winter'].water_body_indicator_df.keys()

In [ ]:
w.get_step_object(step = 3, subset = subset_uuid).indicator_objects['ntot_winter'].water_body_indicator_df['SE581700-113000']

In [ ]:
df

In [ ]:
df['new_index'] = [str(ix) +'_' + wb for ix, wb in zip(df.index, df.VISS_EU_CD)]
df.set_index(keys = 'new_index')
df.index

In [ ]:
w.get_step_object(step = 3, subset = subset_uuid).name

In [ ]:
ind_obj = w.get_step_object(step = 3, subset = subset_uuid).indicator_objects[indicator]

In [ ]:
ind_obj.water_body_indicator_df[test_wb[0]]#.keys()

#df

In [ ]:
by_date

In [ ]:
print(ind_obj.classification_results[wb].keys())
concat_df = False
for key, value in ind_obj.classification_results.items():
    if key == 'all_data':
        pass
    if key.startswith('SE'):
        if type(concat_df) is pd.DataFrame:
            concat_df = pd.concat([concat_df, value])
        else:
            concat_df = value    
    else:
        print('**************************************')
        print(key)
        print(value)

In [ ]:
%matplotlib inline
import seaborn as sns
for name, group in ind_obj.classification_results['status_by_date'].groupby('VISS_EU_CD'):
    #group['date'] = pd.to_datetime(group.SDATE)
    #group.dropna(subset = ['date', 'DIN'])
    #sns.tsplot(data = group.to_dict(), time = 'SDATE', value = 'DIN', condition = 'STATUS', legend = True)
    group.plot('SDATE', ['DIN', 'REFERENCE_VALUE'], title = name + group.WATER_TYPE_AREA.values[0], marker ='*')

In [ ]:
name + group.WATER_TYPE_AREA.values[0]