Searches using Forge.search()
are limited to 10,000 results. However, there are two methods to circumvent this restriction: Forge.aggregate_source()
and Forge.aggregate()
.
In [1]:
import json
from mdf_forge.forge import Forge
In [2]:
mdf = Forge()
In [3]:
# First, let's aggregate all the nist_xps_db data.
all_entries = mdf.aggregate_sources("nist_xps_db")
print(len(all_entries))
In [4]:
# Now, let's parse out the enery_uncertainty_ev and print the results for analysis.
uncertainties = {}
for record in all_entries:
if record["mdf"]["resource_type"] == "record":
unc = record.get("nist_xps_db_v1", {}).get("energy_uncertainty_ev", 0)
if not uncertainties.get(unc):
uncertainties[unc] = 1
else:
uncertainties[unc] += 1
print(json.dumps(uncertainties, sort_keys=True, indent=4, separators=(',', ': ')))
In [5]:
# First, let's aggregate everything that has "Ga" in the list of elements.
all_results = mdf.aggregate("material.elements:Ga")
print(len(all_results))
In [6]:
# Now, let's parse out the other elements in each record and keep a running tally to print out.
elements = {}
for record in all_results:
if record["mdf"]["resource_type"] == "record":
elems = record["material"]["elements"]
for elem in elems:
if elem in elements.keys():
elements[elem] += 1
else:
elements[elem] = 1
print(json.dumps(elements, sort_keys=True, indent=4, separators=(',', ': ')))
In [ ]: