In [1]:
# Reload when code changed:
%load_ext autoreload
%autoreload 2
%pwd
import os
import sys
path = "../"
sys.path.append(path)
#os.path.abspath("../")
print(os.path.abspath(path))
In [2]:
import pandas as pd
import numpy as np
import json
import timeit
import core
import importlib
importlib.reload(core)
import logging
importlib.reload(core)
try:
logging.shutdown()
importlib.reload(logging)
except:
pass
from event_handler import EventHandler
print(core.__file__)
pd.__version__
Out[2]:
In [3]:
root_directory = "../" #os.getcwd()
workspace_directory = root_directory + '/workspaces'
resource_directory = root_directory + '/resources'
alias = 'lena'
user_id = 'test_user' #kanske ska vara off_line user?
workspace_alias = 'lena_indicator'
In [99]:
ekos = EventHandler(root_directory)
In [100]:
ekos.load_workspace(user_id, alias = workspace_alias)
# Här får jag ofta felmeddelande:
# AttributeError: module 'core' has no attribute 'ParameterMapping'
Out[100]:
In [101]:
workspace_uuid = ekos.get_unique_id_for_alias(user_id, workspace_alias)
print(workspace_uuid)
In [102]:
#ekos.import_default_data(user_id, workspace_alias = workspace_alias, force = True)
In [103]:
#ekos.get_workspace(user_id, unique_id = workspace_uuid, alias = workspace_alias).delete_alldata_export()
In [104]:
#%%timeit
ekos.load_data(user_id = user_id, unique_id = workspace_uuid)
w = ekos.get_workspace(user_id, unique_id = workspace_uuid, alias = workspace_alias)
len(w.data_handler.get_all_column_data_df())
Out[104]:
In [105]:
w.apply_data_filter(step = 0) # This sets the first level of data filter in the IndexHandler
In [106]:
subset_uuid = ekos.get_unique_id_for_alias(user_id, workspace_alias = workspace_alias, subset_alias = 'A')
print(w.get_subset_list())
f1 = w.get_data_filter_object(subset = subset_uuid, step=1)
print(f1.include_list_filter)
w.apply_data_filter(subset = subset_uuid, step = 1)
df_step1 = w.get_filtered_data(step = 1, subset = subset_uuid)
print(df_step1.columns)
#df_step1[['SDATE', 'YEAR', 'MONTH', 'POSITION', 'VISS_EU_CD', 'WATER_TYPE_AREA', 'DEPH', 'MNDEP', 'MXDEP','BQIm']].dropna(subset = ['BQIm'])
In [107]:
w.get_step_object(step = 2, subset = subset_uuid)._indicator_setting_files['oxygen']
Out[107]:
In [108]:
indicator_list = w.get_available_indicators(subset= 'A', step=2)
In [109]:
wb_list = df_step1.VISS_EU_CD.unique()
print('number of waterbodies in step 1: {}'.format(len(wb_list)))
typeA_list = [row.split('-')[0].strip().lstrip('0') for row in df_step1.WATER_TYPE_AREA.unique()]
print('number of type areas in step 1: {}'.format(len(typeA_list)))
In [110]:
#list(zip(typeA_list, df_step1.WATER_TYPE_AREA.unique()))
indicator_list = ['oxygen']#,'bqi','din_winter','ntot_summer', 'ntot_winter', 'dip_winter', 'ptot_summer', 'ptot_winter', 'biov', 'chl', 'secchi']
for indicator in indicator_list:
w.apply_indicator_data_filter(step = 2,
subset = subset_uuid,
indicator = indicator)
In [111]:
w.index_handler.booleans['step_0'][subset_uuid]['step_1']['step_2']['SE584340-174401'].keys()
Out[111]:
In [112]:
wb = 'SE583926-161744' #typomr 22
#wb = 'SE654470-222700' #typomr 13
wb = 'SE582000-112350' #type_area 1-n
type_area = '2'#'01s - Västkustens inre kustvatten'
#w.index_handler.booleans['step_0'][subset_uuid]['step_1']['step_2'][type_area]['din_winter']['boolean']
indicator = 'oxygen'
In [113]:
print(w.get_filtered_data(step = 2, subset = subset_uuid, water_body = wb, indicator = 'indicator').MONTH.unique())
print(w.get_filtered_data(step = 2, subset = subset_uuid, water_body = wb, indicator = 'indicator').DEPH.min(),
w.get_filtered_data(step = 2, subset = subset_uuid, water_body = wb, indicator = 'indicator').DEPH.max())
print(w.get_filtered_data(step = 2, subset = subset_uuid, water_body = wb, indicator = 'indicator').VISS_EU_CD.unique())
w.get_filtered_data(step = 2, subset = subset_uuid, water_body = wb).WATER_TYPE_AREA.unique()
Out[113]:
In [114]:
w.mapping_objects['quality_element'].cfg['indicators']
[item.strip() for item in w.mapping_objects['quality_element'].cfg['indicators'].loc[indicator][0].split(', ')]
Out[114]:
In [115]:
w.get_step_object(step = 3, subset = subset_uuid).indicator_setup(subset_unique_id = subset_uuid, indicator_list = [indicator])
#, indicator_list = ['din_winter', 'dip_winter']
In [116]:
w.get_step_object(step = 3, subset = subset_uuid).indicator_objects[indicator].get_ref_value_type(water_body = wb)
Out[116]:
In [117]:
ind_obj = w.get_step_object(step = 3, subset = subset_uuid).indicator_objects[indicator]
In [118]:
ind_obj.set_water_body_indicator_df(water_body = wb)
In [119]:
ind_obj.get_water_body_indicator_df(wb).dropna(subset = ['DOXY'])#.POSITION.unique()
Out[119]:
In [120]:
df = ind_obj.get_filtered_data(subset = ind_obj.subset, step = 'step_2', water_body = wb)[ind_obj.column_list]
In [28]:
#lowest_quantile =
H = core.Hypsograph(file_path = 'D:/github/ekostat_calculator/resources/mappings/hypsographs.txt')
tol_BW = 5
maxD = H.get_max_depth_of_water_body(wb)
for key, group in df.dropna(subset = ['DOXY']).groupby(['POSITION']):
group.reset_index()
boolean = group['DEPH'] > maxD-tol_BW
#print(group.loc[boolean,'DOXY'])
q = group.loc[group['DEPH'] > maxD-tol_BW,'DOXY'].quantile(0.25)
result = group.loc[group['DOXY'] < q,'DOXY'].mean()
print('mean',group.loc[group['DOXY'] < q,'DOXY'].mean())
print(key, q)
#lowest_quantile
In [49]:
#df[df['MONTH'].isin(list(range(1,5+1)))]
In [127]:
#ind_obj.ref_settings.get_value(variable = 'REF_VALUE_LIMIT', type_area = type_area)
ind_obj.ref_settings.get_ref_value(type_area = type_area, water_body = wb)
#ind_obj.ref_settings.get_value(variable = 'HG_VALUE_LIMIT', water_body = 'SE581740-114820') # Havstensfjorden 'SE581740-114820'
#ind_obj.ref_settings.get_value(variable = 'PB_VALUE_LIMIT', water_body = wb)
Out[127]:
In [128]:
ind_obj.calculate_status(water_body = wb)
In [129]:
print(ind_obj.classification_results[wb].keys())
for key, value in ind_obj.classification_results[wb].items():
if key == 'all_data' or key == 'mean_EQR_by_date' or key == 'number_of_values':
pass
else:
print('**************************************')
print(key)
print(value)
In [98]:
dw_obj.classification_results[wb]['mean_EQR_by_year']#.dropna(subset = ['mean_ek_value'])
Out[98]:
In [100]:
dw_obj.water_body_indicator_df[wb].dropna(subset = ['REFERENCE_VALUE'])
Out[100]:
In [30]:
def get_EK(x):
y = x.DIN/x.REFERENCE_VALUE
if y > 1:
return 1
else:
return y
df = dw_obj.water_body_indicator_df[wb]
df['ek_value'] = df.apply(get_EK, axis = 1)
In [31]:
df[dw_obj.indicator_parameter]/df.REFERENCE_VALUE
Out[31]:
In [32]:
by_date = df.groupby(['SDATE', 'YEAR'],).ek_value.agg(['count', 'min', 'max', 'mean']).reset_index()
# by_date.to_csv(self.paths['results'] +'/' + self.name + water_body +'by_occation.txt', sep='\t')
by_date.rename(columns={'mean':'mean_ek_value', 'count': 'number_of_values'}, inplace=True) # Cant use "mean" below
by_date
Out[32]:
In [34]:
# Remove occations with not enough samples
# Or use count as a flag for what to display for the user?
by_date['all_ok'] = True
ix = by_date.loc[by_date['number_of_values'] < 1, 'all_ok'].index
by_date.set_value(ix, 'all_ok', False)
Out[34]:
In [35]:
"""
2) Medelvärdet av EK för varje parameter beräknas för varje år.
"""
by_year = by_date.groupby('YEAR').mean_ek_value.agg(['count', 'min', 'max', 'mean']).reset_index()
by_year.rename(columns={'mean':'mean_ek_value', 'count': 'number_of_dates'}, inplace=True)
by_year['all_ok'] = True
by_year.loc[by_year['number_of_dates'] < 1, 'all_ok'] = False
# by_year.to_csv(self.paths['results'] +'/' + self.name + water_body + 'by_year.txt', sep='\t')
by_year
Out[35]:
In [36]:
by_period = by_year[['mean_ek_value']].describe()
by_period
Out[36]:
In [37]:
"""
3) Medelvärdet av EK för varje parameter och vattenförekomst (beräknas för minst
en treårsperiod)
"""
by_period = by_year[['mean_ek_value']].describe()#.agg(['count', 'min', 'max', 'mean'])
by_period = by_period.transpose()
#by_period#.loc['mean', 'mean_ek_value']
#
#by_period['count'].get_value('mean_ek_value')
by_period['all_ok'] = True
if by_period['count'].get_value('mean_ek_value') < 3:
by_period['all_ok'] = False
by_period
Out[37]:
In [40]:
by_period['mean'].get_value('mean_ek_value')
Out[40]:
In [29]:
temp_df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
... index=pd.date_range('1/1/2000', periods=10))
temp_df.iloc[3:7] = np.nan
temp_df.describe()
Out[29]:
In [30]:
def set_above_one_value(x):
if x > 1:
return 1
else:
return x
dw_obj.water_body_indicator_df[wb]['EK'] = dw_obj.water_body_indicator_df[wb]['DIN']/dw_obj.water_body_indicator_df[wb]['REFERENCE_VALUE']
dw_obj.water_body_indicator_df[wb]['EK'] = dw_obj.water_body_indicator_df[wb]['EK'].apply(set_above_one_value)
dw_obj.water_body_indicator_df[wb]['EK']
Out[30]:
In [31]:
dw_obj.get_filtered_data(subset = subset_uuid, step = 'step_2', type_area = 22, indicator = 'din_winter')[['DIN']].dropna()
Out[31]:
In [120]:
B2_NTOT_WINTER_SETTINGS = lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_ref_settings['ntot_winter']
lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_ref_settings['ntot_winter'].allowed_variables
# gör om till
# lv_workspace.get_indicator_ref_settings(step = , subset = , indicator = , waterbody/type)
# ger samma resultat som:
#lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_ref_settings['ntot_winter'].settings.ref_columns
lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_ref_settings['ntot_winter'].settings.get_value('EK G/M', 22)
#print(B2_NTOT_WINTER_SETTINGS)
#B2_NTOT_WINTER_SETTINGS.get_value('2', 'DEPTH_INTERVAL')
Out[120]:
In [124]:
av = lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_data_filter_settings['ntot_winter'].allowed_variables
lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_data_filter_settings['ntot_winter'].settings.df[av]
Out[124]:
In [119]:
lv_workspace.get_subset_object('B').get_step_object('step_2').indicator_data_filter_settings['ntot_winter'].settings.df
Out[119]:
In [67]:
B2_NTOT_WINTER_SETTINGS.settings.mapping_water_body['N m Bottenvikens kustvatten']
Out[67]:
In [35]:
f1_A = lv_workspace.get_data_filter_object(step=1, subset='A')
f1_A.include_list_filter
Out[35]:
In [136]:
lv_workspace.get_data_filter_info(step=1, subset='A')
Out[136]:
In [36]:
f1_A.exclude_list_filter
Out[36]:
In [21]:
f0.include_list_filter
Out[21]:
In [22]:
lv_workspace.apply_subset_filter(subset='A') # Not handled properly by the IndexHandler
Out[22]:
In [23]:
data_after_subset_filter = lv_workspace.get_filtered_data(level=1, subset='A') # level=0 means first filter
print('{} rows mathing the filter criteria'.format(len(data_after_subset_filter)))
data_after_subset_filter.head()
Out[23]:
In [24]:
# show available waterbodies
lst = data_after_subset_filter.SEA_AREA_NAME.unique()
print('Waterbodies in subset:\n{}'.format('\n'.join(lst)))
In [24]:
import numpy as np
np.where(lv_workspace.index_handler.subset_filter)
Out[24]:
In [25]:
f = lv_workspace.get_data_filter_object(step=1, subset='A')
In [26]:
f.all_filters
Out[26]:
In [27]:
f.exclude_list_filter
Out[27]:
In [28]:
f.include_list_filter
Out[28]:
In [29]:
s = lv_workspace.get_step_1_object('A')
In [30]:
s.data_filter.all_filters
Out[30]:
In [31]:
f0 = lv_workspace.get_data_filter_object(step=0)
In [32]:
f0.exclude_list_filter
Out[32]:
In [33]:
f0.include_list_filter
Out[33]:
In [ ]:
In [ ]:
lv_workspace.initiate_quality_factors()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: