In [ ]:
import json
from pandas import DataFrame
from pymatgen import MPRester, Structure
from mpcontribs.client import Client
In [ ]:
with open('/global/homes/h/huck/log_kvrh', 'r') as f:
data = json.load(f)
df = DataFrame(data['data'], columns=data['columns'])
In [ ]:
project = 'matbench_v1'
client = Client('your-api-key-here')
mpr = MPRester()
Get project info:
In [ ]:
client.projects.get_entry(pk=project).result()
Create contributions with data and add structures:
In [ ]:
from itertools import islice
def chunks(data, SIZE=500):
it = iter(data)
for i in range(0, len(data), SIZE):
if isinstance(data, dict):
yield {k: data[k] for k in islice(it, SIZE)}
else:
yield data[i:i+SIZE]
In [ ]:
# clean up
has_more = True
while has_more:
resp = client.contributions.delete_entries(project=project, _limit=250).result()
print(resp['count'], 'contributions deleted')
has_more = resp['has_more']
In [ ]:
contributions, existing, uploaded = {}, [], None
batch_size = 500
for idx, (s, kvrh) in enumerate(zip(df['structure'], df['log10(K_VRH)'])):
if len(contributions) >= batch_size:
for i, chunk in enumerate(chunks(contributions, SIZE=250)):
contribs = [c['contrib'] for c in chunk.values()]
created = client.contributions.create_entries(contributions=contribs).result()
print(i, created['count'], 'contributions created')
create_structures = []
for contrib in created['data']:
identifier = contrib['identifier']
for s in chunk[identifier]['structures']:
s['contribution'] = contrib['id']
create_structures.append(s)
print('submit', len(create_structures), 'structures ...')
for j, subchunk in enumerate(chunks(create_structures, SIZE=100)):
created = client.structures.create_entries(structures=subchunk).result()
print(j, created['count'], 'structures created')
contributions.clear()
existing.clear()
if not len(contributions) and not len(existing):
has_more = True
while has_more:
skip = len(existing)
contribs = client.contributions.get_entries(
project=project, _skip=skip, _limit=250, _fields=['identifier']
).result()
existing += [c['identifier'] for c in contribs['data']]
has_more = contribs['has_more']
uploaded = len(existing)
print(uploaded, 'already uploaded.')
if idx < uploaded:
continue
structure = Structure.from_dict(s)
matches = mpr.find_structure(structure)
if not matches:
print('no match for idx', idx)
continue
identifier = matches[0]
if identifier in existing:
continue
if identifier in contributions:
print(idx, identifier, 'already parsed')
continue
contrib = {'project': project, 'identifier': identifier, 'data': {'K|VRH': kvrh}}
sdct = dict(name=structure.composition.reduced_formula, label='2020/02/02')
sdct.update(structure.as_dict())
contributions[identifier] = {'contrib': contrib, 'structures': [sdct]}
Retrieve and download contributions:
In [ ]:
contribs = client.contributions.get_entries(project=project, _fields=['id'], _limit=100).result()
contribs['total_count'], len(contribs['data'])