Write a YAML file based on fetching a material document that includes keys not currently accounted for by the API repository.
In [ ]:
import json
import os
import yaml
from bson.json_util import dumps as bson_dumps
from mongogrant import Client
client = Client()
db = client.db("ro:prod/mp_emmet_prod")
In [ ]:
props = list(filter(None, db.materials.distinct("has")))
props_remaining = list(props)
docs = []
while props_remaining:
doc_with_most_props = list(db.materials.aggregate([
{"$match": {"has": {"$in": props_remaining}}},
{"$project": {"task_id": 1,
"has": 1,
"nhas": {"$cond":
{"if": {"$isArray": "$has" },
"then": {"$size": "$has"},
"else": 0}}}},
{"$sort": {"nhas": -1}},
{"$limit": 1}
]))[0]
docs.append(doc_with_most_props)
props_remaining = list(set(props_remaining) - set(doc_with_most_props["has"]))
In [ ]:
for mid in [d["task_id"] for d in docs]:
doc = json.loads(bson_dumps(db.materials.find_one({'task_id': mid})))
with open(f"{mid}.yaml", "w") as f:
yaml.safe_dump(doc, f, default_flow_style=False)
In [ ]:
from collections import defaultdict
task_type_map = defaultdict(list)
for doc in db.materials.find({}, ["blessed_tasks"]):
for k, v in doc["blessed_tasks"].items():
task_type_map[k].append(v)
In [ ]:
docs = []
for k, v in task_type_map.items():
tid = v[0]
docs.append(db.tasks.find_one({"task_id": tid}))
In [ ]:
for tid in [d["task_id"] for d in docs]:
doc = json.loads(bson_dumps(db.tasks.find_one({'task_id': tid})))
with open(f"task_{tid}.yaml", "w") as f:
yaml.safe_dump(doc, f, default_flow_style=False)