Finger_Pool Notebooks are the ones used to create the fingerprints (without oxidation states for structures with <50 atoms in the unit cell. This could have been done in one cell using the Multiprocess.Pool.map function however I wanted to track progress using tqdm which is harder to do with Multiprocess module.

Note However that in this notebook we also gather the fingerprints created in the other 3 Finger_Pool notebooks and create the Pandas dataframe with the Formulas in there as well



In [3]:

    
import fingerprint as fp
struct_all=s_all=fp.read_pickle("struct_all.pickle")
structs_lim_50=[x for x in struct_all if len(x.species)<50]









    



/usr/local/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')



In [11]:

    
import tqdm
import numpy as np
import itertools
def phi_getter(i):
    phi_ones=fp.get_phi_scaled(i,obser='ones')
    phi_Z=fp.get_phi_scaled(i,obser='Z')
    phi_Chi=fp.get_phi_scaled(i,obser='Chi')
    return list(itertools.chain(phi_ones,phi_Z,phi_Chi))




lim1=0
lim2=3700
finger_part=np.array([phi_getter(structs_lim_50[lim1+i]) for i in tqdm.tqdm_notebook(range(lim2-lim1))])

finger_part.shape









    









    Out[11]:





(3700, 300)



In [12]:

    
np.savetxt("finger_part1.npz",finger_part)



In [13]:

    
fp1=np.loadtxt("finger_part1.npz")
fp2=np.loadtxt("finger_part2.npz")
fp3=np.loadtxt("finger_part3.npz")
fp4=np.loadtxt("finger_part4.npz")



In [14]:

    
finger_all=np.vstack((fp1,fp2,fp3,fp4))



In [15]:

    
finger_all.shape









    Out[15]:





(14722, 300)



In [16]:

    
np.savetxt("finger_all.npz",finger_all)



In [17]:

    
import pandas as pd



In [22]:

    
Formulae=[x.composition.formula for x in structs_lim_50]



In [23]:

    
len(Formulae)









    Out[23]:





14722



In [25]:

    
Df=pd.DataFrame({"Formula":Formulae})



In [32]:

    
Df=pd.DataFrame({"Formula":Formulae})
for i in range(100):
    Df["Ones_"+str(i+1)]=finger_all[:,i]
for i in range(100):
    Df["Z_"+str(i+1)]=finger_all[:,100+i]
for i in range(100):
    Df["Chi_"+str(i+1)]=finger_all[:,200+i]



In [33]:

    
Df.columns









    Out[33]:





Index([u'Formula', u'Ones_1', u'Ones_2', u'Ones_3', u'Ones_4', u'Ones_5',
       u'Ones_6', u'Ones_7', u'Ones_8', u'Ones_9',
       ...
       u'Chi_91', u'Chi_92', u'Chi_93', u'Chi_94', u'Chi_95', u'Chi_96',
       u'Chi_97', u'Chi_98', u'Chi_99', u'Chi_100'],
      dtype='object', length=301)



In [45]:

    
np.shape(Df.iloc[0:2].drop("Formula",axis=1).values)









    Out[45]:





(2, 300)



In [46]:

    
Df.to_csv("FingerPrint_lt50.csv",sep='\t')



In [49]:

    
load_test_csv=pd.read_csv("FingerPrint_lt50_old.csv",sep='\t',index_col=0)



In [51]:

    
load_test_csv.head()









    Out[51]:






  
    
      
      Formula
      Ones_1
      Ones_2
      Ones_3
      Ones_4
      Ones_5
      Ones_6
      Ones_7
      Ones_8
      Ones_9
      ...
      Chi_91
      Chi_92
      Chi_93
      Chi_94
      Chi_95
      Chi_96
      Chi_97
      Chi_98
      Chi_99
      Chi_100
    
  
  
    
      0
      Nb1 Ag1 O3
      -1.0
      -1.0
      -1.0
      -1.000000
      -1.00000
      -1.000000
      -1.000000
      -1.000000
      -1.000000
      ...
      -0.497277
      -0.453894
      -0.191895
      0.064329
      0.104619
      -0.118050
      -0.394161
      -0.537855
      -0.587967
      -0.654697
    
    
      1
      Li2 Ag6 O4
      -1.0
      -1.0
      -1.0
      -1.000000
      -1.00000
      -1.000000
      -1.000000
      -1.000000
      -0.999999
      ...
      -0.120349
      -0.206105
      -0.217994
      -0.183563
      -0.075639
      0.073079
      0.137550
      0.062229
      -0.112490
      -0.359644
    
    
      2
      Cs2 Ag2 Cl4
      -1.0
      -1.0
      -1.0
      -1.000000
      -1.00000
      -1.000000
      -1.000000
      -1.000000
      -1.000000
      ...
      0.259454
      0.218056
      0.031980
      -0.098497
      -0.054451
      0.137466
      0.285717
      0.202032
      -0.097068
      -0.462973
    
    
      3
      Ag2 Hg1 I4
      -1.0
      -1.0
      -1.0
      -1.000000
      -1.00000
      -1.000000
      -1.000000
      -1.000000
      -1.000000
      ...
      -0.059718
      -0.141794
      -0.241085
      -0.272397
      -0.160438
      0.054698
      0.275458
      0.446003
      0.474980
      0.227803
    
    
      4
      Ag2 C2 O6
      -1.0
      -1.0
      -1.0
      -0.999999
      -0.99997
      -0.999462
      -0.993801
      -0.954192
      -0.782973
      ...
      -0.123078
      -0.202793
      -0.201147
      -0.164696
      -0.108094
      0.009786
      0.153578
      0.153906
      -0.070010
      -0.394319
    
  

5 rows × 301 columns



In [ ]:

	Formula	Ones_1	Ones_2	Ones_3	Ones_4	Ones_5	Ones_6	Ones_7	Ones_8	Ones_9	...	Chi_91	Chi_92	Chi_93	Chi_94	Chi_95	Chi_96	Chi_97	Chi_98	Chi_99	Chi_100
0	Nb1 Ag1 O3	-1.0	-1.0	-1.0	-1.000000	-1.00000	-1.000000	-1.000000	-1.000000	-1.000000	...	-0.497277	-0.453894	-0.191895	0.064329	0.104619	-0.118050	-0.394161	-0.537855	-0.587967	-0.654697
1	Li2 Ag6 O4	-1.0	-1.0	-1.0	-1.000000	-1.00000	-1.000000	-1.000000	-1.000000	-0.999999	...	-0.120349	-0.206105	-0.217994	-0.183563	-0.075639	0.073079	0.137550	0.062229	-0.112490	-0.359644
2	Cs2 Ag2 Cl4	-1.0	-1.0	-1.0	-1.000000	-1.00000	-1.000000	-1.000000	-1.000000	-1.000000	...	0.259454	0.218056	0.031980	-0.098497	-0.054451	0.137466	0.285717	0.202032	-0.097068	-0.462973
3	Ag2 Hg1 I4	-1.0	-1.0	-1.0	-1.000000	-1.00000	-1.000000	-1.000000	-1.000000	-1.000000	...	-0.059718	-0.141794	-0.241085	-0.272397	-0.160438	0.054698	0.275458	0.446003	0.474980	0.227803
4	Ag2 C2 O6	-1.0	-1.0	-1.0	-0.999999	-0.99997	-0.999462	-0.993801	-0.954192	-0.782973	...	-0.123078	-0.202793	-0.201147	-0.164696	-0.108094	0.009786	0.153578	0.153906	-0.070010	-0.394319