Write a YAML file based on fetching a material document that includes keys not currently accounted for by the API repository.


In [ ]:
import json
import os
import yaml
from bson.json_util import dumps as bson_dumps
from mongogrant import Client

client = Client()
db = client.db("ro:prod/mp_emmet_prod")

In [ ]:
props = list(filter(None, db.materials.distinct("has")))
props_remaining = list(props)
docs = []
while props_remaining:
    doc_with_most_props = list(db.materials.aggregate([
        {"$match": {"has": {"$in": props_remaining}}},
        {"$project": {"task_id": 1,
                      "has": 1,
                      "nhas": {"$cond":
                               {"if": {"$isArray": "$has" },
                                "then": {"$size": "$has"},
                                "else": 0}}}},
        {"$sort": {"nhas": -1}},
        {"$limit": 1}
    ]))[0]
    docs.append(doc_with_most_props)
    props_remaining = list(set(props_remaining) - set(doc_with_most_props["has"]))

In [ ]:
for mid in [d["task_id"] for d in docs]:
    doc = json.loads(bson_dumps(db.materials.find_one({'task_id': mid})))

    with open(f"{mid}.yaml", "w") as f:
        yaml.safe_dump(doc, f, default_flow_style=False)

Fetch and store example task documents


In [ ]:
from collections import defaultdict

task_type_map = defaultdict(list)
for doc in db.materials.find({}, ["blessed_tasks"]):
    for k, v in doc["blessed_tasks"].items():
        task_type_map[k].append(v)

In [ ]:
docs = []
for k, v in task_type_map.items():
        tid = v[0]
        docs.append(db.tasks.find_one({"task_id": tid}))

In [ ]:
for tid in [d["task_id"] for d in docs]:
    doc = json.loads(bson_dumps(db.tasks.find_one({'task_id': tid})))

    with open(f"task_{tid}.yaml", "w") as f:
        yaml.safe_dump(doc, f, default_flow_style=False)