These Oxidation Pool notebooks parallelize the oxidation finding routine for the dataset. However this one also takes all the files and them creates pickle files for the structures (struct_all.pickle) and the oxidation numbers (val_all.pickle) and saves them in the data folder



In [1]:

    
import pymatgen_pars as pymt
import numpy as np
import pymatgen.analysis.bond_valence as bv
from collections import Counter



In [2]:

    
st=pymt.read_unique_data("unique_data.json")



In [3]:

    
import tqdm



In [4]:

    
st_masked=[i for i in st if np.all([a in bv.BV_PARAMS for a in i.composition.elements])]



In [5]:

    
BV=bv.BVAnalyzer()



In [6]:

    
valency1=[]
True_vals=[]
for i in tqdm.tqdm_notebook(st_masked[0:5000]):
    try:
        valency1.append(BV.get_valences(i))
        True_vals.append(i)
    except:
        pass



In [10]:

    
valency=[]
st_true=[]
valency=valency+valency1
st_true=st_true+True_vals



In [13]:

    
import pickle



In [16]:

    
val3=pickle.load(open('val3.pickle', 'rb'))
true3=pickle.load(open("True3.pickle",'rb'))



In [17]:

    
val4=pickle.load(open('val4.pickle', 'rb'))
true4=pickle.load(open("True4.pickle",'rb'))



In [18]:

    
import pickle

with open("val1.pickle","w") as f:
    pickle.dump(valency1,f)

with open("True1.pickle","w") as f:
    pickle.dump(True_vals,f)



In [19]:

    
valency=valency+val3
st_true=st_true+true3



In [20]:

    
valency=valency+val4
st_true=st_true+true4



In [22]:

    
val_types=[set(i) for i in valency]



In [24]:

    
vel_lens=[len(i) for i in val_types]



In [26]:

    
Counter(vel_lens)









    Out[26]:





Counter({2: 1674, 3: 7471, 4: 1277, 5: 61, 6: 1})



In [27]:

    
val5=pickle.load(open('val5.pickle', 'rb'))
true5=pickle.load(open("True5.pickle",'rb'))



In [28]:

    
valency=valency+val5
st_true=st_true+true5



In [38]:

    
species=[[i.name for i in entry.species] for entry in st_true]



In [42]:

    
no_vals=[len(i) for i in valency]

no_spec=[len(i) for i in species]



In [44]:

    
np.all(np.array(no_spec)==np.array(no_vals))









    Out[44]:





True



In [45]:

    
val2=pickle.load(open('val2.pickle', 'rb'))
true2=pickle.load(open("True2.pickle",'rb'))



In [46]:

    
valency=valency+val2
st_true=st_true+true2



In [47]:

    
val_types=[set(i) for i in valency]



In [48]:

    
species=[[i.name for i in entry.species] for entry in st_true]



In [49]:

    
val_lens=[len(i) for i in val_types]



In [50]:

    
no_vals=[len(i) for i in valency]

no_spec=[len(i) for i in species]



In [51]:

    
np.all(np.array(no_spec)==np.array(no_vals))









    Out[51]:





True



In [52]:

    
with open("val_all.pickle","w") as f:
    pickle.dump(valency,f)

with open("struct_all.pickle","w") as f:
    pickle.dump(st_true,f)



In [53]:

    
len(valency)









    Out[53]:





17311



In [64]:

    
st_true[0],st_true[0].species,valency[0]









    Out[64]:





(Structure Summary
 Lattice
     abc : 4.0239864900000004 4.0239864900000004 4.0239864900000004
  angles : 90.0 90.0 90.0
  volume : 65.158269541098122
       A : 4.0239864900000004 0.0 2.4639810873953469e-16
       B : -2.4639810873953469e-16 4.0239864900000004 2.4639810873953469e-16
       C : 0.0 0.0 4.0239864900000004
 PeriodicSite: Nb (0.0000, 0.0000, 0.0000) [0.0000, 0.0000, 0.0000]
 PeriodicSite: Ag (2.0120, 2.0120, 2.0120) [0.5000, 0.5000, 0.5000]
 PeriodicSite: O (2.0120, 0.0000, 0.0000) [0.5000, 0.0000, 0.0000]
 PeriodicSite: O (-0.0000, 2.0120, 0.0000) [0.0000, 0.5000, 0.0000]
 PeriodicSite: O (0.0000, 0.0000, 2.0120) [0.0000, 0.0000, 0.5000],
 [Element Nb, Element Ag, Element O, Element O, Element O],
 [5, 1, -2, -2, -2])



In [57]:

    
Counter(val_lens)









    Out[57]:





Counter({2: 2761, 3: 12499, 4: 1959, 5: 91, 6: 1})



In [61]:

    
len(st_masked),len(st)









    Out[61]:





(24059, 26230)



In [ ]: