In [ ]:
import os
from time import sleep
from ase.db import connect
from urllib.request import urlretrieve
from mpcontribs.client import Client
In [ ]:
project = 'dtu'
db = 'https://cmr.fysik.dtu.dk/_downloads/mp_gllbsc.db'
client = Client('your-api-key-here')
Retrieve and update project info
In [ ]:
# client.projects.get_entry(pk=project, _fields=['_all']).result()
# client.projects.update_entry(pk=project, project={'urls': {'DTU|DB': db}}).result()
Create contributions
In [ ]:
dbfile = db.rsplit('/', 1)[-1]
if not os.path.exists(dbfile):
urlretrieve(db, dbfile)
con = connect(dbfile)
nr_mpids = con.count(selection='mpid')
print(nr_mpids)
In [ ]:
from itertools import islice
def chunks(data, SIZE=500):
it = iter(data)
for i in range(0, len(data), SIZE):
if isinstance(data, dict):
yield {k: data[k] for k in islice(it, SIZE)}
else:
yield data[i:i+SIZE]
In [ ]:
contributions, existing = {}, []
batch_size = 1000
for idx, row in enumerate(con.select('mpid')):
if not idx%100:
print(idx, len(contributions))
if len(contributions) >= batch_size:
for i, chunk in enumerate(chunks(contributions, SIZE=250)):
contribs = [c['contrib'] for c in chunk.values()]
created = client.contributions.create_entries(contributions=contribs).result()
print(i, created['count'], 'contributions created')
contributions.clear()
existing.clear()
if not len(contributions) and not len(existing):
has_more = True
while has_more:
skip = len(existing)
contribs = client.contributions.get_entries(
project=project, _skip=skip, _limit=250, _fields=['identifier']
).result()
existing += [c['identifier'] for c in contribs['data']]
has_more = contribs['has_more']
print(len(existing), 'already uploaded.')
identifier = f'mp-{row.mpid}'
print(idx, identifier)
contrib = {'project': project, 'identifier': identifier, 'is_public': True, 'data': {
'ΔE|KS': { # kohn-sham band gap
'indirect': f'{row.gllbsc_ind_gap - row.gllbsc_disc} eV',
'direct': f'{row.gllbsc_dir_gap - row.gllbsc_disc} eV'
},
'ΔE|QP': { # quasi particle band gap
'indirect': f'{row.gllbsc_ind_gap} eV',
'direct': f'{row.gllbsc_dir_gap} eV'
},
'C': f'{row.gllbsc_disc} eV' # derivative discontinuity
}}