This notebook documents the process of
We first need a filename for the ZIP archive.
In [ ]:
zipfilename = '/Users/dwinston/Dropbox/best/structures/ever.zip'
Let's create a list of structures from the ZIP archive's CIF files. Anything invalid about the ZIP archive or CIF files will raise an exception here.
In [ ]:
from zipfile import ZipFile
from pymatgen.io.cif import CifParser
structures = []
myzip = ZipFile(zipfilename, 'r')
for name in myzip.namelist():
with myzip.open(name) as cif_file:
structures.extend(CifParser(cif_file).get_structures())
In [ ]:
len(structures)
Reject structures already on MP web site.
In [ ]:
from pymatgen import MPRester
mpr = MPRester()
mp_ids = []
new_structures = []
for s in structures:
found = mpr.find_structure(s)
if len(found) > 0:
mp_ids.extend(found)
else:
new_structures.append(s)
if len(mp_ids) > 0:
print("Filtered out structures already on MP: {}".format(mp_ids))
In [ ]:
len(new_structures)
Create a mock "job" for each structure, and then simulate the checks the submission processor does to reject jobs. The structures that pass here will actually spawn a ready workflow, so we will filter for such structures.
In [ ]:
from pymatgen import Composition
from pymatgen.util.provenance import StructureNL
def get_meta_from_structure(structure):
"""Used by `structure_to_mock_job`, to "fill out" a job document."""
comp = structure.composition
elsyms = sorted(set([e.symbol for e in comp.elements]))
meta = {'nsites': len(structure),
'elements': elsyms,
'nelements': len(elsyms),
'formula': comp.formula,
'reduced_cell_formula': comp.reduced_formula,
'reduced_cell_formula_abc': Composition(comp.reduced_formula)
.alphabetical_formula,
'anonymized_formula': comp.anonymized_formula,
'chemsystem': '-'.join(elsyms),
'is_ordered': structure.is_ordered,
'is_valid': structure.is_valid()}
return meta
def structure_to_mock_job(structure):
# Needs at least one author. This is for a mock job, so can put whatever.
snl = StructureNL(structure, [{"name": "Evgraf Fedorov", "email": "symmetry@ftw.org"}])
job = snl.as_dict()
if 'is_valid' not in job: job.update(get_meta_from_structure(snl.structure))
sorted_structure = snl.structure.get_sorted_structure()
job.update(sorted_structure.as_dict())
return job
# mpworks.processors.process_submissions.SubmissionProcessor#submit_new_workflow
MAX_SITES = 200 # SubmissionProcessor.MAX_SITES above
# from mpworks.workflows.wf_utils import NO_POTCARS
NO_POTCARS = ['Po', 'At', 'Rn', 'Fr', 'Ra', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr']
def job_is_submittable(job):
snl = StructureNL.from_dict(job)
if len(snl.structure.sites) > MAX_SITES:
print 'REJECTED WORKFLOW FOR {} - too many sites ({})'.format(
snl.structure.formula, len(snl.structure.sites))
elif not job['is_valid']:
print 'REJECTED WORKFLOW FOR {} - invalid structure (atoms too close)'.format(
snl.structure.formula)
elif len(set(NO_POTCARS) & set(job['elements'])) > 0:
print 'REJECTED WORKFLOW FOR {} - invalid element (No POTCAR)'.format(
snl.structure.formula)
elif not job['is_ordered']:
print 'REJECTED WORKFLOW FOR {} - invalid structure (disordered)'.format(
snl.structure.formula)
else:
return True
return False
# No longer need separate reference for new_structures
structures = new_structures
submittables = []
for s in structures:
if job_is_submittable(structure_to_mock_job(s)):
submittables.append(s)
If there are issues with the metadata, an exception will be raised on attempting to create snl_list
.
In [ ]:
# No longer need separate reference for submittables
structures = submittables
# List of (name, email) pairs
authors = [
('Evgraf Fedorov', 'symmetry@ftw.org'),
('Arthur Schoenflies', 'art@berlin.de'),
]
# BiBTeX string of references
references = """
@article{Graf1961,
author = {Graf, Donald L},
journal = {American Mineralogist},
number = {11},
pages = {1283--1316},
title = {{Crystallographic tables for the rhombohedral carbonates}},
volume = {46},
year = {1961}
}
@article{Akao_1977,
author = {Akao, M and Iwai, S},
doi = {10.1107/s0567740877005834},
journal = {Acta Crystallogr Sect B},
month = {apr},
number = {4},
pages = {1273--1275},
publisher = {International Union of Crystallography ({\{}IUCr{\}})},
title = {{The hydrogen bonding of hydromagnesite}},
url = {http://dx.doi.org/10.1107/s0567740877005834},
volume = {33},
year = {1977}
}
"""
# Projects? List of strings.
projects = []
# Remarks? List of strings.
remarks = []
snl_list = StructureNL.from_structures(structures, authors, references=references,
projects=projects, remarks=remarks)
In [ ]:
# Using v1 endpoint
mpr = MPRester(endpoint="https://www.materialsproject.org/rest/v1")
#mpr.submit_snl(snl_list)