This demo notebook is for data managers only !
The submission_forms package provides a collection of components to support the management of information related to data ingest related activities (data transport, data checking, data publication and data archival):
example workflows in other data centers:
Data ingest request via
Data ingest request workflow:
In [1]:
# do this in case you want to change imported module code while working with this notebook
# -- (for development and testing puposes only)
%load_ext autoreload
%autoreload 2
In [16]:
# to generate empyty project form including all options for variables
# e.g.:
ACTIVITY_STATUS = "0:open, 1:in-progress ,2:action-required, 3:paused,4:closed"
ERROR_STATUS = "0:open,1:ok,2:error"
ENTITY_STATUS = "0:open,1:stored,2:submitted,3:re-opened,4:closed"
CHECK_STATUS = "0:open,1:warning, 2:error,3:ok"
import dkrz_forms
#from dkrz_forms import form_handler, utils
#sf_t = utils.generate_project_form('ESGF_replication')
#print(checks.get_options(sf_t.sub.activity.status))
Data managers have two separate application scenarios for data ingest information management:
Alternative A)
Alternative B) (not yet documented, only prototype)
In [4]:
## To do: include different examples how to query for data ingest activities based on different properties
#info_file = "/home/stephan/tmp/Repos/form_repo/test/test_testsuite_123.json"
#info_file = "/home/stephan/Forms/local_repo/test/test_testsuite_1.json"
#info_file = "/home/stephan/Forms/local_repo/CORDEX/CORDEX_kindermann_2.json"
info_file = "/opt/jupyter/notebooks/form_directory/CORDEX/CORDEX_mm_mm.json"
from dkrz_forms import form_handler, utils,checks,wflow_handler
from datetime import datetime
my_form = utils.load_workflow_form(info_file)
In [5]:
wflow_dict = wflow_handler.get_wflow_description(my_form)
list(wflow_dict.values())
Out[5]:
In [6]:
wflow_handler.rename_action('data_submission_review',my_form)
Out[6]:
In [7]:
my_form = wflow_handler.start_action('data_submission_review',my_form,"stephan kindermann")
In [8]:
myform = wflow_handler.update_action('data_submission_review',my_form,"stephan kindermann")
In [9]:
review_report = {}
review_report['comment'] = 'needed to change and correct submission form'
review_report['additional_info'] = "mail exchange with a@b with respect to question ..."
myform = wflow_handler.finish_action('data_submission_review',my_form,"stephan kindermann",review_report)
In [10]:
?my_form
In [11]:
?my_form.sub
In [ ]:
my_form.sub.entity_out.report.
In [12]:
sf = form_handler.save_form(my_form, "init")
In [138]:
report = checks.check_report(my_form,"sub")
checks.display_report(report)
In [13]:
my_form.rev.entity_in.check_status
Out[13]:
In [14]:
my_form.sub.activity.ticket_url
Out[14]:
In [17]:
part = dkrz_forms.checks.check_step_form(my_form,"sub")
dkrz_forms.checks.display_check(part,"sub")
In [18]:
## global check
res = checks.check_generic_form(my_form)
checks.display_checks(my_form,res)
In [19]:
print(my_form.sub.entity_out.status)
print(my_form.rev.entity_in.form_json)
print(my_form.sub.activity.ticket_id)
In [20]:
print(my_form.workflow)
each workflow_step dictionary is structured consistently according to
In [21]:
review = my_form.rev
?review.activity
In [22]:
my_form.rev.
In [23]:
workflow_form = utils.load_workflow_form(info_file)
review = workflow_form.rev
# any additional information keys can be added,
# yet they are invisible to generic information management tools ..
workflow_form.status = "review"
review.activity.status = "1:in-review"
review.activity.start_time = str(datetime.now())
review.activity.review_comment = "data volume check to be done"
review.agent.responsible_person = "sk"
sf = form_handler.save_form(workflow_form, "sk: review started")
review.activity.status = "3:accepted"
review.activity.ticket_id = "25389"
review.activity.end_time = str(datetime.now())
review.entity_out.comment = "This submission is related to submission abc_cde"
review.entity_out.tag = "sub:abc_cde" # tags are used to relate different forms to each other
review.entity_out.report = {'x':'y'} # result of validation in a dict (self defined properties)
# ToDo: test and document save_form for data managers (config setting for repo)
sf = form_handler.save_form(workflow_form, "kindermann: form_review()")
Comment: alternatively in tools workflow_step related information could also be directly given and assigned via dictionaries, yet this is only recommended for data managers making sure the structure is consistent with the preconfigured one given in config/project_config.py
In [24]:
workflow_form = utils.load_workflow_form(info_file)
ingest = workflow_form.ing
In [25]:
?ingest.entity_out
In [26]:
# agent related info
workflow_form.status = "ingest"
ingest.activity.status = "started"
ingest.agent.responsible_person = "hdh"
ingest.activity.start_time=str(datetime.now())
# activity related info
ingest.activity.comment = "data pull: credentials needed for remote site"
sf = form_handler.save_form(workflow_form, "kindermann: form_review()")
In [27]:
ingest.activity.status = "completed"
ingest.activity.end_time = str(datetime.now())
# report of the ingest process (entity_out of ingest workflow step)
ingest_report = ingest.entity_out
ingest_report.tag = "a:b:c" # tag structure to be defined
ingest_report.status = "completed"
# free entries for detailed report information
ingest_report.report.remote_server = "gridftp.awi.de://export/data/CMIP6/test"
ingest_report.report.server_credentials = "in server_cred.krb keypass"
ingest_report.report.target_path = ".."
sf = form_handler.save_form(workflow_form, "kindermann: form_review()")
In [ ]:
ingest_report.report.
In [28]:
from datetime import datetime
workflow_form = utils.load_workflow_form(info_file)
qua = workflow_form.qua
In [29]:
workflow_form.status = "quality assurance"
qua.agent.responsible_person = "hdh"
qua.activity.status = "starting"
qua.activity.start_time = str(datetime.now())
sf = form_handler.save_form(workflow_form, "hdh: qa start")
In [30]:
qua.entity_out.status = "completed"
qua.entity_out.report = {
"QA_conclusion": "PASS",
"project": "CORDEX",
"institute": "CLMcom",
"model": "CLMcom-CCLM4-8-17-CLM3-5",
"domain": "AUS-44",
"driving_experiment": [ "ICHEC-EC-EARTH"],
"experiment": [ "history", "rcp45", "rcp85"],
"ensemble_member": [ "r12i1p1" ],
"frequency": [ "day", "mon", "sem" ],
"annotation":
[
{
"scope": ["mon", "sem"],
"variable": [ "tasmax", "tasmin", "sfcWindmax" ],
"caption": "attribute <variable>:cell_methods for climatologies requires <time>:climatology instead of time_bnds",
"comment": "due to the format of the data, climatology is equivalent to time_bnds",
"severity": "note"
}
]
}
sf = form_handler.save_form(workflow_form, "hdh: qua complete")
In [31]:
workflow_form = utils.load_workflow_form(info_file)
workflow_form.status = "publishing"
pub = workflow_form.pub
pub.agent.responsible_person = "katharina"
pub.activity.status = "starting"
pub.activity.start_time = str(datetime.now())
sf = form_handler.save_form(workflow_form, "kb: publishing")
In [32]:
pub.activity.status = "completed"
pub.activity.comment = "..."
pub.activity.end_time = ".."
pub.activity.report = {'model':"MPI-M"} # activity related report information
pub.entity_out.report = {'model':"MPI-M"} # the report of the publication action - all info characterizing the publication
sf = form_handler.save_form(workflow_form, "kb: published")
In [33]:
sf = form_handler.save_form(workflow_form, "kindermann: form demo run 1")
In [34]:
sf.sub.activity.commit_hash
Out[34]:
In [ ]: