In [ ]:
from __future__ import print_function
from shutil import copyfile
import os, subprocess, pipes, re, json, glob, yaml, sys
from collections import Counter
from tqdm import tqdm
from fireworks.core.launchpad import LaunchPad
from boltons.tbutils import ParsedException
from pymongo import MongoClient
GARDEN = '/global/projecta/projectdirs/matgen/garden/'
In [ ]:
lpad = LaunchPad.from_file('my_launchpad.yaml')
lpad.fireworks.count()
In [ ]:
materials_prod_config_path = os.path.join('materials_db_prod.yaml')
materials_prod_config_file = open(materials_prod_config_path, 'r')
config = yaml.load(materials_prod_config_file)
In [ ]:
conn = MongoClient(config['host'], config['port'], j=False)
db_jp = conn[config['db']]
db_jp.authenticate(config['user_name'], config['password'])
db_jp.materials.count()
In [ ]:
vasp_config = json.load(open('tasks_db.json'))
vasp_conn = MongoClient(vasp_config['host'], vasp_config['port'], j=False)
db_vasp = vasp_conn[vasp_config['database']]
db_vasp.authenticate(vasp_config['readonly_user'], vasp_config['readonly_password'])
db_vasp.tasks.count()
In [ ]:
!cat tasks_db.json
In [ ]:
def print_categories(cats):
c = Counter(dict((k, len(v)) for k, v in cats.items()))
top10 = c.most_common(10)
total = 0
for k,v in top10:
print(v, '\t', cats[k], '\t', k)
total += v
print(total)
In [ ]:
states = ["RUNNING", "WAITING", "FIZZLED", "READY", "COMPLETED", "RESERVED", "ARCHIVED", "DEFUSED", "PAUSED"]
In [ ]:
user_remarks = [
"new ICSD batch", "Pauling file", "Heusler ABC2 phases",
"proton conducting materials for fuel cells", "solid solution metal", "solid solution oxide", "intermetallic",
"CNGMD Nitrides", "MAGICS calculation of band structures of 2D TMDC stacked heterostructures"
]
In [ ]:
#user_query = {"spec.task_type": "Add to SNL database", "spec.snl.about.remarks": "MP user submission"}
user_query = {"spec.task_type": "Add to SNL database", "spec.snl.about.remarks": user_remarks[-3]}
fw_ids_user = lpad.fireworks.find(user_query, {'fw_id': 1, '_id': 0}).distinct('fw_id')
print(len(fw_ids_user), 'user-submitted workflows')
In [ ]:
counter = Counter()
for root_fw_id in fw_ids_user:
# print(root_fw_id)
wflows = list(lpad.workflows.find({'nodes': root_fw_id}, ['nodes', 'state', 'links']))
if len(wflows) > 1:
print('\tmultiple workflows for', root_fw_id)
continue
wf = wflows[0]
fws = {}
for fw in lpad.fireworks.find(
{'fw_id': {'$in': wf['nodes']}}, {'fw_id': 1, 'spec.task_type': 1, 'state': 1, '_id': 0}
):
fw_id = fw.pop('fw_id')
fws[fw_id] = fw
for fw_id, fw in fws.items():
# pause controller tasks (problems with band structure calcs)
# if fw['spec']['task_type'] == 'Controller: add Electronic Structure v2' and \
# fw['state'] in ['WAITING', 'READY']:
# lpad.pause_fw(fw_id)
# fws[fw_id]['state'] = 'PAUSED'
# print('\tpaused', fw_id)
# defuse workflows with more than 20 tasks (endless SO?)
if wf['state'] != 'COMPLETED' and len(wf['nodes']) > 20 and \
fw['state'] not in ['COMPLETED', 'DEFUSED', 'PAUSED']:
try:
lpad.defuse_fw(fw_id)
fws[fw_id]['state'] = 'DEFUSED'
print('\tdefused', fw_id)
except Exception as ex:
print('\tdefusing', fw_id, 'failed:', str(ex))
lpad.fireworks.update_one({'fw_id': fw_id}, {"$set":{"state":"FIZZLED"}})
print('\t', fw_id, 'set to FIZZLED')
if fws[root_fw_id]['state'] == 'COMPLETED':
current_fw_id = root_fw_id
while 1:
daughters = wf['links'][str(current_fw_id)]
if not daughters:
raise ValueError('why did I get here?')
if len(daughters) == 1:
#print('daughter:', current_fw_id, daughters[0], fws[daughters[0]]['spec']['task_type'])
if fws[daughters[0]]['spec']['task_type'] == 'Controller: add Electronic Structure v2':
counter[fws[current_fw_id]['state']] += 1
break
else:
current_fw_id = daughters[0]
else:
so_task_found = False
for daughter in daughters:
if fws[daughter]['spec']['task_type'] == 'GGA optimize structure (2x)':
current_fw_id = daughter
so_task_found = True
break
if not so_task_found:
raise ValueError('SO task not found!')
else:
counter[fws[root_fw_id]['state']] += 1
print(counter)
print('total =', sum(counter.values()))
In [ ]:
vw_fws = {}
for state in states:
vw_fws[state] = list(lpad.fireworks.find({
"state": state, "$or": [
{"spec.snl.about.remarks": "solid solution metal"},
{"spec.mpsnl.about.remarks": "solid solution metal"}
]
}, ['spec.task_type', 'fw_id']))
if vw_fws[state]:
print(state, len(vw_fws[state]))
if state in ['RUNNING', 'READY', 'RESERVED']:
print(Counter(fw['spec']['task_type'] for fw in vw_fws[state]))
In [ ]:
priority_user_query = {
"$and": [
{"$or": [
{"spec.snl.about.remarks": {"$in": ["MP user submission"], "$nin": user_remarks}},
{"spec.mpsnl.about.remarks": {"$in": ["MP user submission"], "$nin": user_remarks}},
]}, {"$or": [
{"spec.prev_vasp_dir": {"$exists": 0}},
{"spec.prev_vasp_dir": {"$regex": "/oasis/"}},
]}
]
}
priority_user_fws = {}
for state in states:
if state == 'READY':
state_query = {'state': state}
state_query.update(priority_user_query)
priority_user_fws[state] = list(lpad.fireworks.find(state_query, {
"fw_id": 1, "spec.task_type": 1, "spec.prev_vasp_dir": 1, "_id": 0}))
nr_fws = len(priority_user_fws[state])
if nr_fws > 0:
add_to_snl = []
for d in priority_user_fws[state]:
if d['spec']['task_type'] == 'Add to SNL database':
add_to_snl.append(d['fw_id'])
print(' '.join(map(str, add_to_snl)))
print('{} {} user-submitted XSEDE tasks'.format(nr_fws, state))
print('DONE')
In [ ]:
# 118151 = {Ti,Zr,Hf}-Zn-N piezoelectricity study -> ALL COMPLETED 2017-01-24
# 114781 = Kitchaev Workflows
# 115780 = Heusler ABC2 phases
# 89070 = Silvana Botti Perovskite Structures
submission_group_id = 89070
query = {'nodes': {'$in': fw_ids_user}}
# if user_query["spec.snl.about.remarks"] == "MP user submission":
# print('FYI: only looking at workflows with submission_group_id', submission_group_id)
# query.update({'metadata.submission_group_id': submission_group_id})
wflows = {}
total_wflows = float(lpad.workflows.find(query).count())
wflows_projection = {'fw_states': 1, 'parent_links': 1, 'links': 1, 'nodes': 1, '_id': 0, 'state': 1}
for state in states:
state_query = {'state': state}
state_query.update(query)
wflows[state] = list(lpad.workflows.find(state_query, wflows_projection))
nr_wflows = len(wflows[state])
if nr_wflows > 0:
if state == 'FIZZLED':
print([wf['nodes'][0] for wf in wflows[state]])
wflows_fraction = nr_wflows / total_wflows
print('{} {} workflows ({:.1f}%)'.format(nr_wflows, state, wflows_fraction*100.))
print(int(total_wflows), 'workflows in total')
In [ ]:
def find_root_node(wflow):
# wflow['nodes'][0] is not necessarily the root node!
parent_links_keys = wflow['parent_links'].keys()
for node in wflow['nodes']:
if str(node) in parent_links_keys:
continue
return node
In [ ]:
state = 'FIZZLED' # workflow state
rerun_fws = []
fw_ids_state = {}
for wflow in wflows[state]:
root_fw_id = find_root_node(wflow)
# decend links until fizzled firework found
fw_id = root_fw_id
check_states = [state] if state != 'RUNNING' else ['READY', 'RESERVED']
while 1:
current_state = wflow['fw_states'][str(fw_id)]
if current_state == 'RUNNING':
print(fw_id, 'is RUNNING -> probably need to do `lpad rerun_fws -i {}`'.format(fw_id))
break
if current_state in check_states:
task_type = lpad.fireworks.find_one({'fw_id': fw_id}, {'spec.task_type': 1})['spec']['task_type']
if task_type not in fw_ids_state:
fw_ids_state[task_type] = [int(fw_id)]
else:
fw_ids_state[task_type].append(int(fw_id))
alt_state = lpad.fireworks.find_one({'fw_id': fw_id}, {'state': 1, '_id': 0})['state']
if alt_state == 'RESERVED':
rerun_fws.append(str(fw_id))
break
# if multiple children use non-waiting fw
children = wflow['links'][str(fw_id)]
for child in children:
if wflow['fw_states'][str(child)] != 'WAITING':
fw_id = child
if rerun_fws:
print('lpad rerun_fws -i', ' '.join(rerun_fws))
for k,v in fw_ids_state.items():
#if 'GGA' not in k: continue
print(k, v)
# for fw_id in v:
# launches = lpad.launches.find({'fw_id': fw_id}, {'launch_dir': 1})
# for launch in launches:
# if not 'oasis' in launch['launch_dir']:
# print ('\t', fw_id, launch['launch_dir'])
In [ ]:
fw_ids_incomplete = {}
for wflow in wflows['RUNNING']:
for fw_id, fw_state in wflow['fw_states'].items():
if fw_state != 'COMPLETED':
if fw_state not in fw_ids_incomplete:
fw_ids_incomplete[fw_state] = [int(fw_id)]
else:
fw_ids_incomplete[fw_state].append(int(fw_id))
print(fw_ids_incomplete)
In [ ]:
nodes = []
for d in lpad.workflows.find({'nodes': {'$in':[1370872,1566138,1566120,1566104,1566099,1567504,1567491,1563287,1652717]}}, {'_id': 0, 'nodes': 1}):
nodes += d['nodes']
print(nodes)
In [ ]:
query = {'fw_id': {'$in': [fw_id for fw_id in fw_ids_state.values()]}} # FIXME
projection = {'fw_id': 1, 'launches': 1, '_id': 0}
fws = list(lpad.fireworks.find(query, projection))
assert(len(fws) == len(wflows[state]))
In [ ]:
with open('task_fw_ids_wBS.json', 'r') as f:
task_fw_ids_wBS = json.loads(f.read())
print(len(task_fw_ids_wBS), 'tasks already checked for projections')
In [ ]:
vasp_fw_ids = []
for fw_id in task_fw_ids_wBS.itervalues():
wf = lpad.workflows.find_one({'nodes': fw_id}, {'_id': 0, 'links': 1})
for daughter in wf['links'][str(fw_id)]:
fw = lpad.fireworks.find_one(
{'fw_id': daughter, 'spec.task_type': 'VASP db insertion'}, {'fw_id': 1, '_id': 0}
)
if fw:
vasp_fw_ids.append(fw['fw_id'])
break
len(vasp_fw_ids)
In [ ]:
lpad.fireworks.update_many(
{'fw_id': {'$in': vasp_fw_ids}},
{'$unset' : {'spec._tasks.0.update_duplicates' : 1}}
).raw_result
In [ ]:
print(
lpad.fireworks.find({'state': 'READY', 'spec.task_type': 'VASP db insertion'}).count(),
'VASP db insertion tasks ready to run'
)
In [ ]:
with open('task_fw_ids_woBS.json', 'r') as f:
task_fw_ids_woBS = json.loads(f.read())
print(len(task_fw_ids_woBS), 'tasks without BS')
fws = lpad.fireworks.find(
{'fw_id': {'$in': task_fw_ids_woBS.values()}, 'state': 'COMPLETED'},
{'launches': 1, 'fw_id': 1, '_id': 0}
)
print('{}/{} fireworks found'.format(fws.count(), len(task_fw_ids_woBS)))
for XSEDE: rsync to Mendel from
rsync -avz block_* mendel:/global/projecta/projectdirs/matgen/garden/
In [ ]:
fws_info = {}
no_launches_found = []
for fw in fws:
if not fw['launches']:
no_launches_found.append(fw['fw_id'])
continue
launch_id = fw['launches'][-1]
launch = lpad.launches.find_one({'launch_id': launch_id}, {'launch_dir': 1, '_id': 0})
launch_dir = launch['launch_dir']
launch_dir_exists = False
for fw_id, fw_info in fws_info.items():
if launch_dir == fw_info['launch_dir']:
launch_dir_exists = True
break
if launch_dir_exists:
if 'duplicates' in fws_info[fw_id]:
fws_info[fw_id]['duplicates'].append(fw['fw_id'])
else:
fws_info[fw_id]['duplicates'] = [fw['fw_id']]
continue
fws_info[fw['fw_id']] = {'launch_dir': launch_dir.strip()}
if len(no_launches_found) > 0:
print('launches not found for', len(no_launches_found), 'fireworks')
In [ ]:
nr_duplicates = 0
for fw_id, fw_info in fws_info.iteritems():
if 'duplicates' in fw_info:
nr_duplicates += len(fw_info['duplicates'])
print(nr_duplicates, '/', len(fws), 'workflows have duplicate launch_dirs =>',
len(fws)-nr_duplicates, 'unique launch_dirs')
In [ ]:
def get_dest_blocks(s):
a = s.strip().split('/block_')
if len(a) == 2:
return [a[0], 'block_'+a[1]]
a = s.strip().split('/launcher_')
return [a[0], 'launcher_'+a[1]]
In [ ]:
def parse_launchdirs():
for fw_id, fw_info in fws_info.iteritems():
launch_dir = fw_info['launch_dir']
if not os.path.exists(launch_dir):
dest, block = get_dest_blocks(launch_dir)
launch_dir = os.path.join(GARDEN, block)
fw_info['launch_dir'] = launch_dir if os.path.exists(launch_dir) else None
# 'compgen -G "$i/*.out" >> ~/launchdirs_exist_outfiles.txt; '
# 'compgen -G "$i/*.error" >> ~/launchdirs_exist_outfiles.txt; '
print('found {}/{} launch directories'.format(
sum([bool(fw_info['launch_dir']) for fw_info in fws_info.itervalues()]), len(fws_info)
))
In [ ]:
parse_launchdirs()
In [ ]:
def get_file_path(extension, dirlist):
for fstr in dirlist:
fn, ext = os.path.splitext(os.path.basename(fstr))
if fn+ext == 'vasp.out':
continue
if ext == extension:
return fstr
return None
In [ ]:
def scan_errors_warnings(f):
for line in f.readlines():
line_lower = line.strip().lower()
if 'error:' in line_lower or 'warning:' in line_lower:
return line.strip()
In [ ]:
for fw_id, fw_info in tqdm(fws_info.items()):
fw_info['errors'] = []
if 'remote_dir' not in fw_info:
fw_info['errors'].append('remote_dir not found')
continue
local_dir = fw_info['local_dir']
if not os.path.exists(local_dir):
fw_info['errors'].append('local_dir not found')
continue
ls = glob.glob(os.path.join(local_dir, '*'))
if not ls:
fw_info['errors'].append('no files found in local_dir')
continue
error_file = get_file_path('.error', ls)
if error_file is not None:
# look for a traceback in *.error
with open(error_file, 'r') as f:
fcontent = f.read()
match = re.search('Traceback((.+\n)+)Traceback', fcontent)
if not match:
match = re.search('Traceback((.+\n)+)INFO', fcontent)
if not match:
match = re.search('Traceback((.+\n)+)$', fcontent)
if match:
fw_info['errors'].append('Traceback'+match.group(1))
else:
scan = scan_errors_warnings(f)
if scan:
fw_info['errors'].append(scan)
# look into .out file
out_file = get_file_path('.out', ls)
with open(out_file, 'r') as f:
scan = scan_errors_warnings(f)
if scan:
fw_info['errors'].append(scan)
# look into vasp.out
vasp_out = os.path.join(local_dir, 'vasp.out')
if os.path.exists(vasp_out):
with open(vasp_out, 'r') as f:
vasp_out_tail = f.readlines()[-1].strip()
fw_info['errors'].append(' -- '.join(['vasp.out', vasp_out_tail]))
# FIXME .out and .error for non-reservation mode one directory up
In [ ]:
def add_fw_to_category(fw_id, key, cats):
if key in cats:
cats[key].append(fw_id)
else:
cats[key] = [fw_id]
In [ ]:
categories = {}
for fw_id, fw_info in fws_info.iteritems():
if not fw_info['errors']:
add_fw_to_category(fw_id, 'no errors parsed', categories)
continue
for error in fw_info['errors']:
if 'remote_dir' in error or 'local_dir' in error:
add_fw_to_category(fw_id, error, categories)
elif error.startswith('Traceback'):
exc = ParsedException.from_string(error)
msg = exc.exc_msg[:50]
match = re.search('errors reached: (.*)', msg)
if match:
msg = match.group(1)
key = ' -- '.join([exc.exc_type, msg])
lineno = exc.frames[-1]['lineno']
key = ' -- '.join([key, os.path.basename(exc.source_file) + '#' + lineno])
add_fw_to_category(fw_id, key, categories)
else:
match = re.search('{(.*)}', error) # matches dictionary
if match:
dstr = '{' + match.group(1) + '}'
dstr = dstr.replace("u'", '"').replace("'", '"')
dstr = re.sub('{"handler": (.*), "errors"', '{"handler": "\g<1>", "errors"', dstr)
try:
d = json.loads(dstr)
except:
add_fw_to_category(fw_id, 'looks like dict but could not decode', categories)
else:
if 'handler' in d and 'errors' in d:
if '<' in d['handler']:
match = re.search('custodian\.vasp\.handlers\.(.*) object', d['handler'])
if match:
d['handler'] = match.group(1)
else:
raise ValueError('custodian.vasp.handlers not matched')
add_fw_to_category(fw_id, d['handler'], categories)
elif 'action' in d:
add_fw_to_category(fw_id, 'action', categories)
else:
add_fw_to_category(fw_id, 'found dict but not handler or action error', categories)
else:
add_fw_to_category(fw_id, error, categories)
break # only look at first error
print_categories(categories)
In [ ]:
fws_info[1564191]['remote_dir']
In [ ]:
lpad.fireworks.find_one({'fw_id': 1564191}, {'spec._priority': 1, 'state': 1})
In [ ]:
lpad.fireworks.find_one({'fw_id': 1285769}, {'spec._priority': 1, 'state': 1})
In [ ]:
lpad.fireworks.find_one({'fw_id': 1399045}, {'spec._priority': 1, 'state': 1})
In [ ]:
f = open('mpcomplete_kitchaev.json', 'r')
In [ ]:
import json
d = json.load(f)
In [ ]:
def find_last_node(wflow):
for node in wflow['links'].keys():
if not wflow['links'][node]:
return node
raise ValueError('last node not found!')
In [ ]:
for cif, info in d.items():
submission_id = info['submission_id']
wflow = lpad.workflows.find_one({'metadata.submission_id': submission_id}, wflows_projection)
if wflow['state'] != 'COMPLETED':
continue
fw_id = find_root_node(wflow)
task_ids = [None]
while 1:
launch_id = lpad.fireworks.find_one({'fw_id': fw_id}, {'launches': 1, '_id': 0})['launches'][-1]
launch = lpad.launches.find_one(
{'launch_id': launch_id, 'action.stored_data.task_id': {'$exists': 1}},
{'action.stored_data.task_id': 1, '_id': 0}
)
if launch:
task_ids.append(launch['action']['stored_data']['task_id'])
children = wflow['links'][str(fw_id)]
if not children:
break
fw_id = children[-1]
mat = db_jp.materials.find_one({'task_ids': {'$in': task_ids}}, {'task_id': 1, 'task_ids': 1, '_id': 0})
info['fw_id'] = fw_id
info['mp_id'] = mat['task_id']
print(d[cif])
#break
print('DONE')
In [ ]:
fout = open('mpcomplete_kitchaev_mpids.json', 'w')
json.dump(d, fout)
In [ ]:
# mp_ids = ['mp-27187','mp-695866','mp-25732','mp-770957','mp-770953','mp-685168','mp-672214','mp-561549','mp-679630',
# 'mp-7323','mp-772665','mp-17895','mp-770566','mp-25772','mp-3009','mp-625837','mp-12797','mp-28588',
# 'mp-770887','mp-776836','mp-5185','mp-24570','mp-723049','mp-657176','mp-25766','mp-19548','mp-625823',
# 'mp-684950','mp-557613','mp-704536','mp-722237','mp-676950']
mp_ids = ['mp-5229']
snlgroup_ids = db_jp.materials.find({'task_ids': {'$in': mp_ids}}).distinct('snlgroup_id')
fw_ids_dosfix = lpad.fireworks.find({"spec.snlgroup_id": {'$in': snlgroup_ids}}).distinct('fw_id')
wflows_dosfix = list(lpad.workflows.find({'nodes': {'$in': fw_ids_dosfix}}))
fw_ids_rerun = []
fw_ids_defuse = []
task_ids = set()
for wflow in wflows_dosfix:
print('wf:', wflow['nodes'][0])
fw_ids_uniform = []
for fw in list(lpad.fireworks.find({'fw_id': {'$in': wflow['nodes']}})):
if 'Uniform' in fw['spec']['task_type']:
fw_ids_uniform.append(fw['fw_id'])
elif 'Boltztrap' in fw['spec']['task_type']:
fw_ids_defuse.append(fw['fw_id'])
elif 'VASP db' in fw['spec']['task_type']:
print(fw['fw_id'], fw['launches'][-1])
launch = lpad.launches.find_one({'launch_id': fw['launches'][-1]}, {'_id': 0, 'action.stored_data': 1})
task_ids.add(launch['action']['stored_data'].get('task_id'))
if not fw_ids_uniform:
continue
fw_ids_rerun.append(max(fw_ids_uniform))
len(fw_ids_rerun)
fw_ids_rerun
task_ids
In [ ]:
' '.join(map(str, fw_ids_rerun))
In [ ]:
fw_ids_defuse
In [ ]:
fw_ids_run = []
for wflow in lpad.workflows.find({'nodes': {'$in': fw_ids_rerun}}):
for fw_id, fw_state in wflow['fw_states'].items():
if fw_state != 'COMPLETED' and fw_state != 'DEFUSED':
fw_ids_run.append(fw_id)
','.join(map(str, fw_ids_run))
In [ ]:
' '.join(map(str, fw_ids_defuse))
In [ ]:
fw_ids_dos_offset = []
for doc in list(lpad.workflows.find({'nodes': {'$in': fw_ids_gga}}, {'fw_states': 1, '_id': 0})):
for fw_id, fw_state in doc['fw_states'].items():
if fw_state == 'READY' or fw_state == 'WAITING':
fw_ids_dos_offset.append(fw_id)
len(fw_ids_dos_offset)
In [ ]:
map(int, fw_ids_dos_offset)
In [ ]:
fw_ids_vasp_db_rerun = []
for fw_id, fw_info in fws_info.items():
if fw_info['launch_dir']: # GGA Uniform launch_dir exists
wf = lpad.workflows.find_one({'nodes': fw_id}, {'_id': 0, 'links': 1})
for daughter in wf['links'][str(fw_id)]:
fw = lpad.fireworks.find_one(
{'fw_id': daughter, 'spec.task_type': 'VASP db insertion'}, {'fw_id': 1, '_id': 0}
)
if fw:
fw_ids_vasp_db_rerun.append(fw['fw_id'])
break
len(fw_ids_vasp_db_rerun)
In [ ]:
lpad.fireworks.update_many(
{'fw_id': {'$in': fw_ids_vasp_db_rerun}},
{"$set":{"state":"READY", "spec._tasks.0.update_duplicates": True}}
).raw_result
In [ ]:
with open('snl_tasks_atomate.json', 'r') as f:
data = json.load(f)
query = {} if not data else {'task_id': {'$nin': data.keys()}}
has_bs_piezo_dos = {'has_bandstructure': True, 'piezo': {'$exists': 1}, 'dos': {'$exists': 1}}
#query.update(has_bs_piezo_dos)
has_bs_dos = {'has_bandstructure': True, 'dos': {'$exists': 1}}
query.update(has_bs_dos)
docs = db_jp.materials.find(query, {'task_ids': 1, '_id': 0, 'task_id': 1, 'snl.snl_id': 1})
for idx,doc in tqdm(enumerate(docs), total=docs.count()):
mpid = doc['task_id']
data[mpid] = {'tasks': {}}
if set(has_bs_piezo_dos.keys()).issubset(query.keys()):
data[mpid]['tags'] = ['has_bs_piezo_dos']
if set(has_bs_dos.keys()).issubset(query.keys()):
data[mpid]['tags'] = ['has_bs_dos']
for task_id in doc['task_ids']:
tasks = list(db_vasp.tasks.find({'task_id': task_id}, {'dir_name': 1, '_id': 0}))
if len(tasks) > 1:
data[mpid]['error'] = 'found {} tasks'.format(len(tasks))
continue
elif not tasks:
data[mpid]['error'] = 'no task found'
continue
dir_name = tasks[0]['dir_name']
launch_dir = os.path.join(GARDEN, dir_name)
if not os.path.exists(launch_dir):
data[mpid]['error'] = '{} not found'.format(dir_name)
break
data[mpid]['tasks'][task_id] = launch_dir
data[mpid]['snl_id'] = doc['snl']['snl_id']
if not idx%2000:
with open('snl_tasks_atomate.json', 'w') as f:
json.dump(data, f)
#break
with open('snl_tasks_atomate.json', 'w') as f:
json.dump(data, f)