FInger_Pool1-checkpoint


Finger_Pool Notebooks are the ones used to create the fingerprints (without oxidation states for structures with <50 atoms in the unit cell. This could have been done in one cell using the Multiprocess.Pool.map function however I wanted to track progress using tqdm which is harder to do with Multiprocess module.

Note However that in this notebook we also gather the fingerprints created in the other 3 Finger_Pool notebooks and create the Pandas dataframe with the Formulas in there as well


In [3]:
import fingerprint as fp
struct_all=s_all=fp.read_pickle("struct_all.pickle")
structs_lim_50=[x for x in struct_all if len(x.species)<50]


/usr/local/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

In [11]:
import tqdm
import numpy as np
import itertools
def phi_getter(i):
    phi_ones=fp.get_phi_scaled(i,obser='ones')
    phi_Z=fp.get_phi_scaled(i,obser='Z')
    phi_Chi=fp.get_phi_scaled(i,obser='Chi')
    return list(itertools.chain(phi_ones,phi_Z,phi_Chi))




lim1=0
lim2=3700
finger_part=np.array([phi_getter(structs_lim_50[lim1+i]) for i in tqdm.tqdm_notebook(range(lim2-lim1))])

finger_part.shape



Out[11]:
(3700, 300)

In [12]:
np.savetxt("finger_part1.npz",finger_part)

In [13]:
fp1=np.loadtxt("finger_part1.npz")
fp2=np.loadtxt("finger_part2.npz")
fp3=np.loadtxt("finger_part3.npz")
fp4=np.loadtxt("finger_part4.npz")

In [14]:
finger_all=np.vstack((fp1,fp2,fp3,fp4))

In [15]:
finger_all.shape


Out[15]:
(14722, 300)

In [16]:
np.savetxt("finger_all.npz",finger_all)

In [17]:
import pandas as pd

In [22]:
Formulae=[x.composition.formula for x in structs_lim_50]

In [23]:
len(Formulae)


Out[23]:
14722

In [25]:
Df=pd.DataFrame({"Formula":Formulae})

In [32]:
Df=pd.DataFrame({"Formula":Formulae})
for i in range(100):
    Df["Ones_"+str(i+1)]=finger_all[:,i]
for i in range(100):
    Df["Z_"+str(i+1)]=finger_all[:,100+i]
for i in range(100):
    Df["Chi_"+str(i+1)]=finger_all[:,200+i]

In [33]:
Df.columns


Out[33]:
Index([u'Formula', u'Ones_1', u'Ones_2', u'Ones_3', u'Ones_4', u'Ones_5',
       u'Ones_6', u'Ones_7', u'Ones_8', u'Ones_9',
       ...
       u'Chi_91', u'Chi_92', u'Chi_93', u'Chi_94', u'Chi_95', u'Chi_96',
       u'Chi_97', u'Chi_98', u'Chi_99', u'Chi_100'],
      dtype='object', length=301)

In [45]:
np.shape(Df.iloc[0:2].drop("Formula",axis=1).values)


Out[45]:
(2, 300)

In [46]:
Df.to_csv("FingerPrint_lt50.csv",sep='\t')

In [49]:
load_test_csv=pd.read_csv("FingerPrint_lt50_old.csv",sep='\t',index_col=0)

In [51]:
load_test_csv.head()


Out[51]:
Formula Ones_1 Ones_2 Ones_3 Ones_4 Ones_5 Ones_6 Ones_7 Ones_8 Ones_9 ... Chi_91 Chi_92 Chi_93 Chi_94 Chi_95 Chi_96 Chi_97 Chi_98 Chi_99 Chi_100
0 Nb1 Ag1 O3 -1.0 -1.0 -1.0 -1.000000 -1.00000 -1.000000 -1.000000 -1.000000 -1.000000 ... -0.497277 -0.453894 -0.191895 0.064329 0.104619 -0.118050 -0.394161 -0.537855 -0.587967 -0.654697
1 Li2 Ag6 O4 -1.0 -1.0 -1.0 -1.000000 -1.00000 -1.000000 -1.000000 -1.000000 -0.999999 ... -0.120349 -0.206105 -0.217994 -0.183563 -0.075639 0.073079 0.137550 0.062229 -0.112490 -0.359644
2 Cs2 Ag2 Cl4 -1.0 -1.0 -1.0 -1.000000 -1.00000 -1.000000 -1.000000 -1.000000 -1.000000 ... 0.259454 0.218056 0.031980 -0.098497 -0.054451 0.137466 0.285717 0.202032 -0.097068 -0.462973
3 Ag2 Hg1 I4 -1.0 -1.0 -1.0 -1.000000 -1.00000 -1.000000 -1.000000 -1.000000 -1.000000 ... -0.059718 -0.141794 -0.241085 -0.272397 -0.160438 0.054698 0.275458 0.446003 0.474980 0.227803
4 Ag2 C2 O6 -1.0 -1.0 -1.0 -0.999999 -0.99997 -0.999462 -0.993801 -0.954192 -0.782973 ... -0.123078 -0.202793 -0.201147 -0.164696 -0.108094 0.009786 0.153578 0.153906 -0.070010 -0.394319

5 rows × 301 columns


In [ ]: