In [ ]:
import os
from time import sleep
from ase.db import connect
from urllib.request import urlretrieve
from mpcontribs.client import Client

In [ ]:
project = 'dtu'
db = 'https://cmr.fysik.dtu.dk/_downloads/mp_gllbsc.db'
client = Client('your-api-key-here')

Retrieve and update project info


In [ ]:
# client.projects.get_entry(pk=project, _fields=['_all']).result()
# client.projects.update_entry(pk=project, project={'urls': {'DTU|DB': db}}).result()

Create contributions


In [ ]:
dbfile = db.rsplit('/', 1)[-1]
if not os.path.exists(dbfile):
    urlretrieve(db, dbfile) 

con = connect(dbfile)
nr_mpids = con.count(selection='mpid')
print(nr_mpids)

In [ ]:
from itertools import islice

def chunks(data, SIZE=500):
    it = iter(data)
    for i in range(0, len(data), SIZE):
        if isinstance(data, dict):
            yield {k: data[k] for k in islice(it, SIZE)}
        else:
            yield data[i:i+SIZE]

In [ ]:
contributions, existing = {}, []
batch_size = 1000

for idx, row in enumerate(con.select('mpid')):
    if not idx%100:
        print(idx, len(contributions))               

    if len(contributions) >= batch_size:
        for i, chunk in enumerate(chunks(contributions, SIZE=250)):
            contribs = [c['contrib'] for c in chunk.values()]
            created = client.contributions.create_entries(contributions=contribs).result()
            print(i, created['count'], 'contributions created')    

        contributions.clear()
        existing.clear()
    
    if not len(contributions) and not len(existing):
        has_more = True
        while has_more:
            skip = len(existing)
            contribs = client.contributions.get_entries(
                project=project, _skip=skip, _limit=250, _fields=['identifier']
            ).result()
            existing += [c['identifier'] for c in contribs['data']]
            has_more = contribs['has_more']

        print(len(existing), 'already uploaded.')


    
    identifier = f'mp-{row.mpid}'
    print(idx, identifier)
    contrib = {'project': project, 'identifier': identifier, 'is_public': True, 'data': {
        'ΔE|KS': { # kohn-sham band gap
            'indirect': f'{row.gllbsc_ind_gap - row.gllbsc_disc} eV',
            'direct': f'{row.gllbsc_dir_gap - row.gllbsc_disc} eV'            
        },
        'ΔE|QP': { # quasi particle band gap
            'indirect': f'{row.gllbsc_ind_gap} eV',
            'direct': f'{row.gllbsc_dir_gap} eV'           
        },
        'C': f'{row.gllbsc_disc} eV' # derivative discontinuity
    }}