Representation of data submission workflow components based on W3C-PROV


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from prov.model import ProvDocument
d1 = ProvDocument()
d1.deserialize?

Model is along the concept described in https://www.w3.org/TR/prov-primer/


In [3]:
from IPython.display import display, Image
Image(filename='key-concepts.png')


Out[3]:

In [5]:
from dkrz_forms import form_handler
#from project_cordex import cordex_dict
#from project_cordex import name_space

# add namespaces for submission provenance capture

#for key,value in name_space.iteritems():
#    d1.add_namespace(key,value)

#d1.add_namespace()
# to do: look into some predefined vocabs, e.g. dublin core, iso19139,foaf  etc.

d1.add_namespace("enes_entity",'http://www.enes.org/enes_entitiy#')
d1.add_namespace('enes_agent','http://www.enes.org/enes_agent#')
d1.add_namespace('data_collection','http://www.enes.org/enes_entity/file_collection')
d1.add_namespace('data_manager','http://www.enes.org/enes_agent/data_manager')
d1.add_namespace('data_provider','http://www.enes.org/enes_agent/data_provider')
d1.add_namespace('subm','http://www.enes.org/enes_entity/data_submsission')
d1.add_namespace('foaf','http://xmlns.com/foaf/0.1/')


Out[5]:
<Namespace: foaf {http://xmlns.com/foaf/0.1/}>

Example name spaces

(from DOI: 10.3390/ijgi5030038 , mehr unter https://github.com/tsunagun/vocab/blob/master/all_20130125.csv)

owl      Web Ontology Language    http://www.w3.org/2002/07/owl#
dctype   DCMI Type Vocabulary     http://purl.org/dc/dcmitype/
dco      DCO  Ontology            http://info.deepcarbon.net/schema#
prov     PROV Ontology            http://www.w3.org/ns/prov#
skos     Simple Knowledge
         Organization System      http://www.w3.org/2004/02/skos/core#
foaf     FOAF Ontology            http://xmlns.com/foaf/0.1/
vivo     VIVO Ontology            http://vivoweb.org/ontology/core#
bibo     Bibliographic Ontology   http://purl.org/ontology/bibo/
xsd      XML Schema Datatype      http://www.w3.org/2001/XMLSchema#
rdf      Resource Description
         Framework                http://www.w3.org/1999/02/22-rdf-syntax-ns#
rdfs     Resource Description
         Framework Schema         http://www.w3.org/2000/01/rdf-schema#    

In [5]:
# later: organize things in bundles
data_manager_ats = {'foaf:givenName':'Peter','foaf:mbox':'lenzen@dkzr.de'}

d1.entity('sub:empty')
def add_stage(agent,activity,in_state,out_state):
    # in_stage exists, out_stage is generated
    d1.agent(agent, data_manager_ats)
    d1.activity(activity)
    d1.entity(out_state)
   
    d1.wasGeneratedBy(out_state,activity)
    d1.used(activity,in_state)
    d1.wasAssociatedWith(activity,agent)
    d1.wasDerivedFrom(out_state,in_state)

In [6]:
import json
form_file = open('/home/stephan/tmp/CORDEX/Kindermann_test1.json',"r")
json_info = form_file.read()
#json_info["__type__"] = "sf",
form_file.close()
sf_dict = json.loads(json_info)


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-6-b1c64640fd41> in <module>()
      1 import json
----> 2 form_file = open('/home/stephan/tmp/CORDEX/Kindermann_test1.json',"r")
      3 json_info = form_file.read()
      4 #json_info["__type__"] = "sf",
      5 form_file.close()

IOError: [Errno 2] No such file or directory: '/home/stephan/tmp/CORDEX/Kindermann_test1.json'

In [7]:
sf = form_handler.dict_to_form(sf_dict)

In [8]:
print sf.__dict__


{u'status': u'published', u'first_name': u'Stephan', u'last_name': u'Kindermann', u'sub': {u'status': [u'stored'], u'package_name': u'Kindermann_test1.json', u'timestamp': u'2016-03-28 19:25:28.340678', u'ticket_url': u'https://dm-rt.dkrz.de/Ticket/Display.html?id=22252', u'checks_done': u'cordex form check v0.1', u'repo': u'/home/stephan/tmp/CORDEX', u'ticket_id': 22252, u'package_path': u'/home/stephan/tmp/CORDEX/Kindermann_test1.json', u'form_name': u'Kindermann_test1', u'responsible_person': u'pl'}, u'keyword': u'test1', u'data_path': u'/scratch/b20030/data/cordex/', u'pub': {u'started': u'2016-03-28 19:24:52.312040', u'search_string': u'project=cordex&model=hmoc&institute=MPI-M', u'finished': u'2016-03-28 19:25:28.340155', u'responsible_person': u'kberger'}, u'email': u'snkinder@freenet.de', u'institute_id': u'MPI-M', u'check_status': u'not checked', u'ing': {u'started': u'2016-03-28 19:17:10.601303', u'target_path': u'/scratch/bb0303/data/cordex/test1', u'finished': u'2016-03-28 19:18:11.767079', u'responsible_person': u'pl'}, u'submission_type': u'test', u'institution': u'DKRZ', u'che': {u'started': u'2016-03-28 19:22:40.212107', u'finished': u'2016-03-28 19:24:13.371716', u'tool_version': u'qa_dkrz_v1.1', u'responsible_person': u'hdh', u'results': u'/path/to/results'}}

In [9]:
data_provider = sf.first_name+'_'+sf.last_name
submission_manager = sf.sub['responsible_person']
ingest_manager = sf.ing['responsible_person']
qa_manager = sf.ing['responsible_person']
publication_manager =  sf.pub['responsible_person']

add_stage(agent='data_provider:test_user_id',activity='subm:submit',in_state="subm:empty",out_state='subm:out1_sub')
add_stage(agent='data_manager:peter_lenzen_id',activity='subm:review',in_state="subm:out1_sub",out_state='subm:out1_rev')
add_stage(agent='data_manager:peter_lenzen_id',activity='subm:ingest',in_state="subm:out1_rev",out_state='subm:out1_ing')
add_stage(agent='data_manager:hdh_id',activity='subm:check',in_state="subm:out1_ing",out_state='subm:out1_che')
add_stage(agent='data_manager:katharina_b_id',activity='subm:publish',in_state="subm:out1_che",out_state='subm:out1_pub')
add_stage(agent='data_manager:lta_id',activity='subm:archive',in_state="subm:out1_pub",out_state='subm:out1_arch')

In [ ]:

assign information to provenance graph nodes and edges


In [30]:
%matplotlib inline
d1.plot()



In [ ]:
d1.wasAttributedTo(data_submission,'????')

Transform submission object to a provenance graph


In [19]:
#d1.get_records()
submission = d1.get_record('subm:out1_sub')[0]
review = d1.get_record('subm:out1_rev')[0]
ingest = d1.get_record('subm:out1_ing')[0]
check = d1.get_record('subm:out1_che')[0]
publication = d1.get_record('subm:out1_pub')[0]
lta = d1.get_record('subm:out1_arch')[0]

In [20]:
res = form_handler.prefix_dict(sf.sub,'sub',sf.sub.keys())
res['sub:status']="fertig"
print res
ing = form_handler.prefix_dict(sf.ing,'ing',sf.ing.keys())
che = form_handler.prefix_dict(sf.che,'che',sf.che.keys())
pub = form_handler.prefix_dict(sf.pub,'pub',sf.pub.keys())


{u'sub:ticket_url': u'https://dm-rt.dkrz.de/Ticket/Display.html?id=22252', u'sub:checks_done': u'cordex form check v0.1', u'sub:package_name': u'Kindermann_test1.json', u'sub:status': 'fertig', u'sub:timestamp': u'2016-03-28 19:25:28.340678', u'sub:package_path': u'/home/stephan/tmp/CORDEX/Kindermann_test1.json', u'sub:form_name': u'Kindermann_test1', u'sub:responsible_person': u'pl', u'sub:ticket_id': 22252, u'sub:repo': u'/home/stephan/tmp/CORDEX'}

In [21]:
submission.add_attributes(res)
ingest.add_attributes(ing)
check.add_attributes(che)
publication.add_attributes(pub)

In [29]:
che_act = d1.get_record('subm:check') 
tst = che_act[0]
test_dict = {'subm:test':'test'}
tst.add_attributes(test_dict)

In [32]:
print tst
tst.FORMAL_ATTRIBUTES
tst.


activity(subm:check, -, -, [subm:test="test"])
Out[32]:
(<QualifiedName: prov:startTime>, <QualifiedName: prov:endTime>)

In [ ]:
che_act = d1.get_record('subm:check') 
#tst.formal_attributes
#tst.FORMAL_ATTRIBUTES
tst.add_attributes({'foaf:name':'tst'})
print tst.attributes
#for i in tst:
 #   print i
#tst.insert([('subm:givenName','sk')])

In [ ]:
import sys
sys.path.append('/home/stephan/Repos/ENES-EUDAT/submission_forms')
from dkrz_forms import form_handler
sf,repo = form_handler.init_form("CORDEX")



init_dict = sf.__dict__ 
sub_form = form_handler.prefix(sf,'subm',sf.__dict__.keys())            

sub_dict = sub_form.__dict__

#init_state = d1.get_record('subm:empty')[0]
#init_state.add_attributes(init_dict)

sub_state = d1.get_record('subm:out1_sub')[0]
init_state.add_attributes(sub_dict)

In [ ]:
tst_dict = {'test1':'val1','test2':'val2'}
tst = form_handler.submission_form(tst_dict) 
print tst.__dict__

In [ ]:
print result.__dict__

In [ ]:
dict_from_class(sf)

In [ ]: