Outline:
In [ ]:
    
import json
from etk.etk import ETK
from etk.extractors.glossary_extractor import GlossaryExtractor
from etk.etk_module import ETKModule
    
In [23]:
    
etk = ETK()
    
In [24]:
    
sample_input = {
        "projects": [
            {
                "name": "etk",
                "description": "version 2 of etk, implemented by Runqi, Dongyu, Sylvia, Amandeep and others."
            },
            {
                "name": "rltk",
                "description": "record linkage toolkit, implemented by Pedro, Mayank, Yixiang and several students."
            }
        ]
    }
    
In [25]:
    
doc = etk.create_document(sample_input)
    
    
In [26]:
    
name_extractor = GlossaryExtractor(etk.load_glossary("./examples/hello_world/names.txt"), "name_extractor",
                                                etk.default_tokenizer,
                                                case_sensitive=False, ngrams=1)
    
In [27]:
    
descriptions = doc.select_segments("projects[*].description")
projects = doc.select_segments("projects[*]")
    
In [28]:
    
for d, p in zip(descriptions, projects):
    names = doc.extract(name_extractor, d)
    p.store(names, "members")
    
In [29]:
    
print(json.dumps(doc.value, indent=2))
    
    
In [30]: