These Oxidation Pool notebooks parallelize the oxidation finding routine for the dataset. However this one also takes all the files and them creates pickle files for the structures (struct_all.pickle) and the oxidation numbers (val_all.pickle) and saves them in the data folder


In [1]:
import pymatgen_pars as pymt
import numpy as np
import pymatgen.analysis.bond_valence as bv
from collections import Counter

In [2]:
st=pymt.read_unique_data("unique_data.json")

In [3]:
import tqdm

In [4]:
st_masked=[i for i in st if np.all([a in bv.BV_PARAMS for a in i.composition.elements])]

In [5]:
BV=bv.BVAnalyzer()

In [6]:
valency1=[]
True_vals=[]
for i in tqdm.tqdm_notebook(st_masked[0:5000]):
    try:
        valency1.append(BV.get_valences(i))
        True_vals.append(i)
    except:
        pass




In [10]:
valency=[]
st_true=[]
valency=valency+valency1
st_true=st_true+True_vals

In [13]:
import pickle

In [16]:
val3=pickle.load(open('val3.pickle', 'rb'))
true3=pickle.load(open("True3.pickle",'rb'))

In [17]:
val4=pickle.load(open('val4.pickle', 'rb'))
true4=pickle.load(open("True4.pickle",'rb'))

In [18]:
import pickle

with open("val1.pickle","w") as f:
    pickle.dump(valency1,f)

with open("True1.pickle","w") as f:
    pickle.dump(True_vals,f)

In [19]:
valency=valency+val3
st_true=st_true+true3

In [20]:
valency=valency+val4
st_true=st_true+true4

In [22]:
val_types=[set(i) for i in valency]

In [24]:
vel_lens=[len(i) for i in val_types]

In [26]:
Counter(vel_lens)


Out[26]:
Counter({2: 1674, 3: 7471, 4: 1277, 5: 61, 6: 1})

In [27]:
val5=pickle.load(open('val5.pickle', 'rb'))
true5=pickle.load(open("True5.pickle",'rb'))

In [28]:
valency=valency+val5
st_true=st_true+true5

In [38]:
species=[[i.name for i in entry.species] for entry in st_true]

In [42]:
no_vals=[len(i) for i in valency]

no_spec=[len(i) for i in species]

In [44]:
np.all(np.array(no_spec)==np.array(no_vals))


Out[44]:
True

In [45]:
val2=pickle.load(open('val2.pickle', 'rb'))
true2=pickle.load(open("True2.pickle",'rb'))

In [46]:
valency=valency+val2
st_true=st_true+true2

In [47]:
val_types=[set(i) for i in valency]

In [48]:
species=[[i.name for i in entry.species] for entry in st_true]

In [49]:
val_lens=[len(i) for i in val_types]

In [50]:
no_vals=[len(i) for i in valency]

no_spec=[len(i) for i in species]

In [51]:
np.all(np.array(no_spec)==np.array(no_vals))


Out[51]:
True

In [52]:
with open("val_all.pickle","w") as f:
    pickle.dump(valency,f)

with open("struct_all.pickle","w") as f:
    pickle.dump(st_true,f)

In [53]:
len(valency)


Out[53]:
17311

In [64]:
st_true[0],st_true[0].species,valency[0]


Out[64]:
(Structure Summary
 Lattice
     abc : 4.0239864900000004 4.0239864900000004 4.0239864900000004
  angles : 90.0 90.0 90.0
  volume : 65.158269541098122
       A : 4.0239864900000004 0.0 2.4639810873953469e-16
       B : -2.4639810873953469e-16 4.0239864900000004 2.4639810873953469e-16
       C : 0.0 0.0 4.0239864900000004
 PeriodicSite: Nb (0.0000, 0.0000, 0.0000) [0.0000, 0.0000, 0.0000]
 PeriodicSite: Ag (2.0120, 2.0120, 2.0120) [0.5000, 0.5000, 0.5000]
 PeriodicSite: O (2.0120, 0.0000, 0.0000) [0.5000, 0.0000, 0.0000]
 PeriodicSite: O (-0.0000, 2.0120, 0.0000) [0.0000, 0.5000, 0.0000]
 PeriodicSite: O (0.0000, 0.0000, 2.0120) [0.0000, 0.0000, 0.5000],
 [Element Nb, Element Ag, Element O, Element O, Element O],
 [5, 1, -2, -2, -2])

In [57]:
Counter(val_lens)


Out[57]:
Counter({2: 2761, 3: 12499, 4: 1959, 5: 91, 6: 1})

In [61]:
len(st_masked),len(st)


Out[61]:
(24059, 26230)

In [ ]: