Finger_Pool Notebooks are the ones used to create the fingerprints (without oxidation states for structures with <50 atoms in the unit cell. This could have been done in one cell using the Multiprocess.Pool.map function however I wanted to track progress using tqdm which is harder to do with Multiprocess module.
Note However that in this notebook we also gather the fingerprints created in the other 3 Finger_Pool notebooks and create the Pandas dataframe with the Formulas in there as well
In [3]:
import fingerprint as fp
struct_all=s_all=fp.read_pickle("struct_all.pickle")
structs_lim_50=[x for x in struct_all if len(x.species)<50]
In [11]:
import tqdm
import numpy as np
import itertools
def phi_getter(i):
phi_ones=fp.get_phi_scaled(i,obser='ones')
phi_Z=fp.get_phi_scaled(i,obser='Z')
phi_Chi=fp.get_phi_scaled(i,obser='Chi')
return list(itertools.chain(phi_ones,phi_Z,phi_Chi))
lim1=0
lim2=3700
finger_part=np.array([phi_getter(structs_lim_50[lim1+i]) for i in tqdm.tqdm_notebook(range(lim2-lim1))])
finger_part.shape
Out[11]:
In [12]:
np.savetxt("finger_part1.npz",finger_part)
In [13]:
fp1=np.loadtxt("finger_part1.npz")
fp2=np.loadtxt("finger_part2.npz")
fp3=np.loadtxt("finger_part3.npz")
fp4=np.loadtxt("finger_part4.npz")
In [14]:
finger_all=np.vstack((fp1,fp2,fp3,fp4))
In [15]:
finger_all.shape
Out[15]:
In [16]:
np.savetxt("finger_all.npz",finger_all)
In [17]:
import pandas as pd
In [22]:
Formulae=[x.composition.formula for x in structs_lim_50]
In [23]:
len(Formulae)
Out[23]:
In [25]:
Df=pd.DataFrame({"Formula":Formulae})
In [32]:
Df=pd.DataFrame({"Formula":Formulae})
for i in range(100):
Df["Ones_"+str(i+1)]=finger_all[:,i]
for i in range(100):
Df["Z_"+str(i+1)]=finger_all[:,100+i]
for i in range(100):
Df["Chi_"+str(i+1)]=finger_all[:,200+i]
In [33]:
Df.columns
Out[33]:
In [45]:
np.shape(Df.iloc[0:2].drop("Formula",axis=1).values)
Out[45]:
In [46]:
Df.to_csv("FingerPrint_lt50.csv",sep='\t')
In [49]:
load_test_csv=pd.read_csv("FingerPrint_lt50_old.csv",sep='\t',index_col=0)
In [51]:
load_test_csv.head()
Out[51]:
In [ ]: