In [ ]:
import json, os
from mpcontribs.client import Client
from pymatgen import Composition, Structure, MPRester

In [ ]:
project = 'MnO2_phase_selection'
client = Client('your-api-key-here')
mpr = MPRester()

Retrieve and update project info


In [ ]:
client.projects.get_entry(pk=project, _fields=['_all']).result()

In [ ]:
phase_names = {
    'beta': 'Pyrolusite',
    'gamma': 'Intergrowth',
    'ramsdellite': 'Ramsdellite',
    'alpha': 'Hollandite',
    'lambda': 'Spinel',
    'delta': 'Layered',
    'other': 'Other',
}
# client.projects.update_entry(pk=project, project={'other': {'phase−names': phase_names}}).result()

Create contributions


In [ ]:
# mp_contrib_phases: data/MPContrib_formatted_entries.json
# hull_states: data/MPContrib_hull_entries.json
data = {}
for fn in os.scandir('data'):
    with open(fn, 'r') as f:
        data[fn.name] = json.load(f)

In [ ]:
other = [
    ['LiMnO2', -3.064, 'Y', '--'], ['KMnO2', -2.222, 'Y', '--'],
    ['Ca0.5MnO2', -2.941, 'Y', '--'], ['Na0.5MnO2', -1.415, 'Y', '--']
]

In [ ]:
from itertools import islice

def chunks(data, SIZE=500):
    it = iter(data)
    for i in range(0, len(data), SIZE):
        if isinstance(data, dict):
            yield {k: data[k] for k in islice(it, SIZE)}
        else:
            yield data[i:i+SIZE]

In [ ]:
contributions = {}
is_public = True
for idx, hstate in enumerate(data['MPContrib_hull_entries.json']):
    contrib = {'project': project, 'is_public': is_public}
    phase = hstate['phase']
    composition = Composition.from_dict(hstate['c'])
    structure = Structure.from_dict(hstate['s'])
    mpids = mpr.find_structure(structure)
    comp = composition.get_integer_formula_and_factor()[0]
    identifier = mpids[0] if mpids else comp
    contrib['identifier'] = identifier

    phase_name = phase_names[phase]
    phase_data = data['MPContrib_formatted_entries.json'].get(phase_name, other)
    if not phase_data:
        print('no data found for', composition, phase_name)
        continue

    for iv, values in enumerate(phase_data):
        if Composition(values[0]) == composition:
            contrib['data'] = {'GS': values[2], 'ΔH': f'{values[1]} eV/mol'}
            if not isinstance(values[3], str):
                contrib['data']['ΔH|hyd'] = f'{values[3]} eV/mol'
            break
    else:
        print('no data found for', composition, phase)
        continue

    sdct = {'label': '2018/02/16', 'is_public': is_public}
    sdct.update(structure.as_dict())
    
    if identifier in contributions:
        nstruc = len(contributions[identifier]['structures'])
        sdct['name'] = f'{comp}-{nstruc}'
        contributions[identifier]['structures'].append(sdct)
        # TODO add contrib hdata?
    else:
        sdct['name'] = f'{comp}-0'
        contributions[identifier] = {'contrib': contrib, 'structures': [sdct]}
        
len(contributions)

In [ ]:
# clean up
has_more = True
while has_more:
    resp = client.contributions.delete_entries(project=project, _limit=250).result()    
    print(resp['count'], 'contributions deleted')
    has_more = resp['has_more']

# submit
for i, chunk in enumerate(chunks(contributions, SIZE=250)):
    contribs = [c['contrib'] for c in chunk.values()]
    created = client.contributions.create_entries(contributions=contribs).result()
    print(i, created['count'], 'contributions created')    

    create_structures = []
    for contrib in created['data']:
        identifier = contrib['identifier']
        for s in chunk[identifier]['structures']:
            s['contribution'] = contrib['id']
            create_structures.append(s)

    print('submit', len(create_structures), 'structures ...')
    for j, subchunk in enumerate(chunks(create_structures, SIZE=100)):
        created = client.structures.create_entries(structures=subchunk).result()
        print(j, created['count'], 'structures created')