These Oxidation Pool notebooks parallelize the oxidation finding routine for the dataset. However this one also takes all the files and them creates pickle files for the structures (struct_all.pickle) and the oxidation numbers (val_all.pickle) and saves them in the data folder
In [1]:
import pymatgen_pars as pymt
import numpy as np
import pymatgen.analysis.bond_valence as bv
from collections import Counter
In [2]:
st=pymt.read_unique_data("unique_data.json")
In [3]:
import tqdm
In [4]:
st_masked=[i for i in st if np.all([a in bv.BV_PARAMS for a in i.composition.elements])]
In [5]:
BV=bv.BVAnalyzer()
In [6]:
valency1=[]
True_vals=[]
for i in tqdm.tqdm_notebook(st_masked[0:5000]):
try:
valency1.append(BV.get_valences(i))
True_vals.append(i)
except:
pass
In [10]:
valency=[]
st_true=[]
valency=valency+valency1
st_true=st_true+True_vals
In [13]:
import pickle
In [16]:
val3=pickle.load(open('val3.pickle', 'rb'))
true3=pickle.load(open("True3.pickle",'rb'))
In [17]:
val4=pickle.load(open('val4.pickle', 'rb'))
true4=pickle.load(open("True4.pickle",'rb'))
In [18]:
import pickle
with open("val1.pickle","w") as f:
pickle.dump(valency1,f)
with open("True1.pickle","w") as f:
pickle.dump(True_vals,f)
In [19]:
valency=valency+val3
st_true=st_true+true3
In [20]:
valency=valency+val4
st_true=st_true+true4
In [22]:
val_types=[set(i) for i in valency]
In [24]:
vel_lens=[len(i) for i in val_types]
In [26]:
Counter(vel_lens)
Out[26]:
In [27]:
val5=pickle.load(open('val5.pickle', 'rb'))
true5=pickle.load(open("True5.pickle",'rb'))
In [28]:
valency=valency+val5
st_true=st_true+true5
In [38]:
species=[[i.name for i in entry.species] for entry in st_true]
In [42]:
no_vals=[len(i) for i in valency]
no_spec=[len(i) for i in species]
In [44]:
np.all(np.array(no_spec)==np.array(no_vals))
Out[44]:
In [45]:
val2=pickle.load(open('val2.pickle', 'rb'))
true2=pickle.load(open("True2.pickle",'rb'))
In [46]:
valency=valency+val2
st_true=st_true+true2
In [47]:
val_types=[set(i) for i in valency]
In [48]:
species=[[i.name for i in entry.species] for entry in st_true]
In [49]:
val_lens=[len(i) for i in val_types]
In [50]:
no_vals=[len(i) for i in valency]
no_spec=[len(i) for i in species]
In [51]:
np.all(np.array(no_spec)==np.array(no_vals))
Out[51]:
In [52]:
with open("val_all.pickle","w") as f:
pickle.dump(valency,f)
with open("struct_all.pickle","w") as f:
pickle.dump(st_true,f)
In [53]:
len(valency)
Out[53]:
In [64]:
st_true[0],st_true[0].species,valency[0]
Out[64]:
In [57]:
Counter(val_lens)
Out[57]:
In [61]:
len(st_masked),len(st)
Out[61]:
In [ ]: