In [ ]:
import json, os
from mpcontribs.client import Client
from pymatgen import Composition, Structure, MPRester
In [ ]:
project = 'MnO2_phase_selection'
client = Client('your-api-key-here')
mpr = MPRester()
Retrieve and update project info
In [ ]:
client.projects.get_entry(pk=project, _fields=['_all']).result()
In [ ]:
phase_names = {
'beta': 'Pyrolusite',
'gamma': 'Intergrowth',
'ramsdellite': 'Ramsdellite',
'alpha': 'Hollandite',
'lambda': 'Spinel',
'delta': 'Layered',
'other': 'Other',
}
# client.projects.update_entry(pk=project, project={'other': {'phase−names': phase_names}}).result()
Create contributions
In [ ]:
# mp_contrib_phases: data/MPContrib_formatted_entries.json
# hull_states: data/MPContrib_hull_entries.json
data = {}
for fn in os.scandir('data'):
with open(fn, 'r') as f:
data[fn.name] = json.load(f)
In [ ]:
other = [
['LiMnO2', -3.064, 'Y', '--'], ['KMnO2', -2.222, 'Y', '--'],
['Ca0.5MnO2', -2.941, 'Y', '--'], ['Na0.5MnO2', -1.415, 'Y', '--']
]
In [ ]:
from itertools import islice
def chunks(data, SIZE=500):
it = iter(data)
for i in range(0, len(data), SIZE):
if isinstance(data, dict):
yield {k: data[k] for k in islice(it, SIZE)}
else:
yield data[i:i+SIZE]
In [ ]:
contributions = {}
is_public = True
for idx, hstate in enumerate(data['MPContrib_hull_entries.json']):
contrib = {'project': project, 'is_public': is_public}
phase = hstate['phase']
composition = Composition.from_dict(hstate['c'])
structure = Structure.from_dict(hstate['s'])
mpids = mpr.find_structure(structure)
comp = composition.get_integer_formula_and_factor()[0]
identifier = mpids[0] if mpids else comp
contrib['identifier'] = identifier
phase_name = phase_names[phase]
phase_data = data['MPContrib_formatted_entries.json'].get(phase_name, other)
if not phase_data:
print('no data found for', composition, phase_name)
continue
for iv, values in enumerate(phase_data):
if Composition(values[0]) == composition:
contrib['data'] = {'GS': values[2], 'ΔH': f'{values[1]} eV/mol'}
if not isinstance(values[3], str):
contrib['data']['ΔH|hyd'] = f'{values[3]} eV/mol'
break
else:
print('no data found for', composition, phase)
continue
sdct = {'label': '2018/02/16', 'is_public': is_public}
sdct.update(structure.as_dict())
if identifier in contributions:
nstruc = len(contributions[identifier]['structures'])
sdct['name'] = f'{comp}-{nstruc}'
contributions[identifier]['structures'].append(sdct)
# TODO add contrib hdata?
else:
sdct['name'] = f'{comp}-0'
contributions[identifier] = {'contrib': contrib, 'structures': [sdct]}
len(contributions)
In [ ]:
# clean up
has_more = True
while has_more:
resp = client.contributions.delete_entries(project=project, _limit=250).result()
print(resp['count'], 'contributions deleted')
has_more = resp['has_more']
# submit
for i, chunk in enumerate(chunks(contributions, SIZE=250)):
contribs = [c['contrib'] for c in chunk.values()]
created = client.contributions.create_entries(contributions=contribs).result()
print(i, created['count'], 'contributions created')
create_structures = []
for contrib in created['data']:
identifier = contrib['identifier']
for s in chunk[identifier]['structures']:
s['contribution'] = contrib['id']
create_structures.append(s)
print('submit', len(create_structures), 'structures ...')
for j, subchunk in enumerate(chunks(create_structures, SIZE=100)):
created = client.structures.create_entries(structures=subchunk).result()
print(j, created['count'], 'structures created')