Reading turbomole files

(from Basis Set Exchange)

The following data is fetched from https://bse.pnl.gov/bse/portal by by choosing Be in the periodic table, 3-21G in the menu to the left and "Turbomole" from the drop-down menu to the lower left.


In [1]:
"""
#  3-21G  EMSL  Basis Set Exchange Library  5/6/15 1:07 PM
# Elements                             References
# --------                             ----------
#  H - Ne: J.S. Binkley, J.A. Pople, W.J. Hehre, J. Am. Chem. Soc 102 939 (1980)
# Na - Ar: M.S. Gordon, J.S. Binkley, J.A. Pople, W.J. Pietro and W.J. Hehre, 
#          J. Am. Chem. Soc. 104, 2797 (1983).
#  K - Ca: K.D. Dobbs, W.J. Hehre, J. Comput. Chem. 7, 359 (1986). 
# Ga - Kr: K.D. Dobbs, W.J. Hehre, J. Comput. Chem. 7, 359 (1986).
# Sc - Zn: K.D. Dobbs, W.J. Hehre, J. Comput. Chem. 8, 861 (1987). 
#  Y - Cd: K.D. Dobbs, W.J. Hehre, J. Comput. Chem. 8, 880 (1987). 
# Cs     : A 3-21G quality set derived from the Huzinage MIDI basis sets.
#          E.D. Glendening and D. Feller, J. Phys. Chem. 99, 3060 (1995)
#   


$basis
*
he   3-21G
*
    2  s
     13.6267000              0.1752300        
      1.9993500              0.8934830        
    1  s
      0.3829930              1.0000000        
*
$end
"""


Out[1]:
'\n#  3-21G  EMSL  Basis Set Exchange Library  5/6/15 1:07 PM\n# Elements                             References\n# --------                             ----------\n#  H - Ne: J.S. Binkley, J.A. Pople, W.J. Hehre, J. Am. Chem. Soc 102 939 (1980)\n# Na - Ar: M.S. Gordon, J.S. Binkley, J.A. Pople, W.J. Pietro and W.J. Hehre, \n#          J. Am. Chem. Soc. 104, 2797 (1983).\n#  K - Ca: K.D. Dobbs, W.J. Hehre, J. Comput. Chem. 7, 359 (1986). \n# Ga - Kr: K.D. Dobbs, W.J. Hehre, J. Comput. Chem. 7, 359 (1986).\n# Sc - Zn: K.D. Dobbs, W.J. Hehre, J. Comput. Chem. 8, 861 (1987). \n#  Y - Cd: K.D. Dobbs, W.J. Hehre, J. Comput. Chem. 8, 880 (1987). \n# Cs     : A 3-21G quality set derived from the Huzinage MIDI basis sets.\n#          E.D. Glendening and D. Feller, J. Phys. Chem. 99, 3060 (1995)\n#   \n\n\n$basis\n*\nhe   3-21G\n*\n    2  s\n     13.6267000              0.1752300        \n      1.9993500              0.8934830        \n    1  s\n      0.3829930              1.0000000        \n*\n$end\n'

Here is how to read it:

  • Lines beginning with # (hashtag) is comments, and they describe the source for the basis.
  • What you are after when converting this to primitives is the lines enclosed by "basis" and "end" (dollarsign omitted).
  • Star $*$ is just used as a separator.
  • The line "he 3-21G" is just a label for the basis (Helium, 3-21G (type of orbital))
  • The actual data is what follows between the stars:

  • The line consisting of an integer and letter ("2 s") asserts that what follows constitues one (1) s-orbital consisting of two primitives.

    • The s-orbital has $(i,j,k) = (0,0,0)$
    • The p-orbital has $(i,j,k) = (1,0,0), (0,1,0)$ and $(0,0,1)$
    • The d-orbital has $(i,j,k) = (2,0,0), (0,2,0), (0,0,2), (1,1,0), (1,0,1)$ and $(0,1,1)$.
    • Important: For a given orbital, all combinations of i,j,k must be included in the gaussian orbital. This means that:
      • The "1 s" orbital has one primitive.
      • The "1 p" orbital has three primitives.
      • The "2 s" orbitals has one primitives each.
      • The "2 p" orbitals has three primitives each.
      • The "1 d" orbital has six primitives.
  • The lines containing floats consists of the weight ($\omega$) and exponent ($\alpha$), respectively.
    • These lines are used to set up the previously announced ("2 s") primitives. (one orbital with two primitives)
    • The weight is used together with a normalizing function to make up the actual coefficient of the primitive: $$ \chi(x,y,z) = N(\alpha, i,j,k) \omega x^i y^j z^k e^{-\alpha (x^2 + y^2 + z^2)}$$
    • Where, for the turbomole format, only: $$ N(\alpha, i,j,k) = (\frac{2\alpha}{\pi})^{\frac{3}{4}} \sqrt{(8\alpha)^{i+j+k}\frac{i!j!k!}{(2i)!(2j)!(2k)!}} $$

The following snippet is used to set up the 3-21G hydrogen basis in C++:


In [3]:
"""
void basisbank::add_3_21G_h(vec3 corePos){
    bs.add_state();
    Primitive S0A0 = bs.turbomolePrimitive(0.15628500,5.44717800,0,0,0,corePos);
    bs.add_primitive_to_state(bs.Nstates-1, S0A0);
    Primitive S1A0 = bs.turbomolePrimitive(0.90469100,0.82454700,0,0,0,corePos);
    bs.add_primitive_to_state(bs.Nstates-1, S1A0);
    bs.add_state();
    Primitive S0A1 = bs.turbomolePrimitive(1.00000000,0.18319200,0,0,0,corePos);
    bs.add_primitive_to_state(bs.Nstates-1, S0A1);
}


Primitive basis::turbomolePrimitive(double weight, double exponent, double i, double j, double k, vec3 corePos){
    return Primitive(turboNormalization(exponent,i,j,k)*weight,i,j,k,exponent,corePos);

}

double basis::turboNormalization(double x, double i, double j, double k){
    //x = exponent
    return pow((2*x/pi),0.75)*sqrt(pow(8*x,i+j+k)*factorial(i)*factorial(j)*factorial(k)/(factorial(2*i)*factorial(2*j)*factorial(2*k)));
}
"""


Out[3]:
'\nvoid basisbank::add_3_21G_h(vec3 corePos){\n    bs.add_state();\n    Primitive S0A0 = bs.turbomolePrimitive(0.15628500,5.44717800,0,0,0,corePos);\n    bs.add_primitive_to_state(bs.Nstates-1, S0A0);\n    Primitive S1A0 = bs.turbomolePrimitive(0.90469100,0.82454700,0,0,0,corePos);\n    bs.add_primitive_to_state(bs.Nstates-1, S1A0);\n    bs.add_state();\n    Primitive S0A1 = bs.turbomolePrimitive(1.00000000,0.18319200,0,0,0,corePos);\n    bs.add_primitive_to_state(bs.Nstates-1, S0A1);\n}\n\n\nPrimitive basis::turbomolePrimitive(double weight, double exponent, double i, double j, double k, vec3 corePos){\n    return Primitive(turboNormalization(exponent,i,j,k)*weight,i,j,k,exponent,corePos);\n\n}\n\ndouble basis::turboNormalization(double x, double i, double j, double k){\n    //x = exponent\n    return pow((2*x/pi),0.75)*sqrt(pow(8*x,i+j+k)*factorial(i)*factorial(j)*factorial(k)/(factorial(2*i)*factorial(2*j)*factorial(2*k)));\n}\n'

The following code will allow you to produce C++ code as above from any turbomole file up to f-shells. Just download the files from basis set exchange and store them in the same folder as the script as .txt files. Then run the script, and it will output .cpp and .h files for your code. (Remember to uncomment the final line)


In [4]:
#Turbomole converter 
#Converts basis files in turbomole format to C++ files readable by Fermion Mingle

import glob
import os

def extract_m_basisinfo(filename, printing = False):
    #Extract multiple basises from a turbomole file
    swtch = 0
    f = open(filename)
    
    comments = []
    basis_sets = [] #list containing all basis sets
    basis = []
    basisW = []
    basisE = []
    basisT = []
    contractedtype = []
    activecontracted = -1
    for i in f:
        I = i.split()
        if len(I)==0:
            continue
        if I[0] == "*":
            continue #ignore wildcards
        if I[0] == '#':        
            comments.append("//")
            comments[-1] += i
            #for e in I:
            #    comments[-1] += e + " "
            continue
        #print I
        
        if I[0][0] == "$":
            continue
        if I[0] in ["h", "he", "li", "be", "b", "c", "n", "o", "f", "ne", "na" ,"mg", "al", "si", "p", "s", "cl", "ar", "k", "ca", "sc", "ti", "v", "cr", "mn", "fe", "co", "ni", "cu", "zn", "ga", "ge", "as", "se","br", "kr", "rb", "sr", "y", "zr", "nb", "mo", "tc", "ru", "rh", "pd", "ag", "cd", "in", "sn", "sb", "te", "i", "xe"]:
            #If not first time here, put current basis into list
            if swtch == 1:
                basis_sets.append([bT, basisW, basisE, contractedtype, len(contractedtype)])
                basisW = []
                basisE = []
                contractedtype = []
                activecontracted = -1
            swtch = 1

            #Begin collecting data
            bT = I[1]+"_"+I[0]
            bT = bT.replace("-", "_") #basistype
            bT = bT.replace("(", "") #basistype
            bT = bT.replace(")", "") #basistype                        
            bT = bT.replace(",", "") #basistype                                    
            continue
        if I[0] in "123456789":
            if I[1] == "s":
                #print "Identified an s orbital."
                contractedtype.append(0)
            if I[1] == "p":
                #print "Identified an p orbital."
                contractedtype.append(1)
            if I[1] == "d":
                #print "Identified an d orbital."
                contractedtype.append(2)
            if I[1] == "f":
                #print "Identified an f orbital."
                contractedtype.append(3) 
            #Create contracted
            basis.append([])
            basisE.append([])
            basisW.append([])
            activecontracted += 1   
            #print I
        else:
            try:
                w = float(I[0].replace("D", "E"))
                e = float(I[1].replace("D", "E"))
                basisE[activecontracted].append(e)
                basisW[activecontracted].append(w)
                #basisE[activecontracted].append(float(I[1]))
                #basisW[activecontracted].append(float(I[0]))
            except:
                comments.append("//Ignored the following information from file:")
                for e in I:
                    comments[-1] += e + " "
                #comments[-1] += "Ignored the following information:" + I
                #print "Failed to read weigths and exponents."
                #print I
        #print I   
    if printing:
        print comments
        print contractedtype
        print len(contractedtype)
        print basisE
        print basisW
    f.close()
    #return comments, contractedtype, len(contractedtype), basisE, basisW
    return comments, basis_sets

def extract_basisinfo(filename, printing = False):
    swtch = 0
    f = open(filename)
    
    comments = []
    basis = []
    basisW = []
    basisE = []
    basisT = []
    contractedtype = []
    activecontracted = -1
    for i in f:
        I = i.split()
        if len(I)==0:
            continue
        if I[0] == "*":
            continue #ignore wildcards
        if I[0] == '#':        
            comments.append("//")
            for e in I:
                comments[-1] += e + " "
            continue
        #print I
        
        if I[0][0] == "$":
            swtch += 1
            swtch = swtch %2
            #if swtch == 1:
            #Add new function   
            continue
        if I[0] in ["h", "he", "li", "be", "b", "c", "n", "o", "f", "ne", "na" ,"mg", "al", "si", "p", "s", "cl", "ar", "k", "ca", "sc", "ti", "v", "cr", "mn", "fe", "co", "ni", "cu", "zn", "ga", "ge", "as", "se","br", "kr", "rb", "sr", "y", "zr", "nb", "mo", "tc", "ru", "rh", "pd", "ag", "cd", "in", "sn", "sb", "te", "i", "xe"]:
            bT = I[1]+"_"+I[0]
            bT = bT.replace("-", "_")
            basisT.append(bT)
            print basisT[-1]
            continue
        if I[0] in "123456789":
            if I[1] == "s":
                #print "Identified an s orbital."
                contractedtype.append(0)
            if I[1] == "p":
                #print "Identified an p orbital."
                contractedtype.append(1)
            if I[1] == "d":
                #print "Identified an d orbital."
                contractedtype.append(2)
            if I[1] == "f":
                #print "Identified an f orbital."
                contractedtype.append(3) 
            #Create contracted
            basis.append([])
            basisE.append([])
            basisW.append([])
            activecontracted += 1   
            #print I
        else:
            try:
                w = float(I[0].replace("D", "E"))
                e = float(I[1].replace("D", "E"))
                basisE[activecontracted].append(e)
                basisW[activecontracted].append(w)
            except:
                comments.append("//Ignored the following information from file:")
                for e in I:
                    comments[-1] += e + " "
                #comments[-1] += "Ignored the following information:" + I
                #print "Failed to read weigths and exponents."
                #print I
        #print I          
    if printing:
        print comments
        print contractedtype
        print len(contractedtype)
        print basisE
        print basisW
    f.close()
    return comments, contractedtype, len(contractedtype), basisE, basisW



def createfunction(fname, comments, orbital_types, N_orbitals, exponents, weights):
    endline = ["\n"]
    cppclass  = []
    cppheader = []
    for i in comments:
        cppclass.append(i)
        #cppheader.append(i)        
        
    #cppclass.append(endline)
    #cppheader.append(endline)
    
    #writing to header
    cppheader.append("    void add_"+fname+"(vec3 corePos);")
    

    cppclass.append("void basisbank::add_"+fname+"(vec3 corePos){\n")
    for i in range(N_orbitals):
        if orbital_types[i] == 0:
            #create an s-orbital
            cppclass.append("    bs.add_state();")
            for e in range(len(exponents[i])):
                cppclass.append("    Primitive S%iA%i = bs.turbomolePrimitive(%.8f,%.8f,0,0,0,corePos);" % (e, i,exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, S%iA%i);" % (e,i))
        if orbital_types[i] == 1:
            #create an p-orbital
            cppclass.append("    bs.add_state();")
            for e in range(len(exponents[i])):
                cppclass.append("    Primitive P%iA%i = bs.turbomolePrimitive(%.8f,%.8f,1,0,0,corePos);" % (e,i, exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, P%iA%i);" %( e,i))
                cppclass.append("    Primitive P%iB%i = bs.turbomolePrimitive(%.8f,%.8f,0,1,0,corePos);" % (e,i, exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, P%iB%i);" % (e,i))
                cppclass.append("    Primitive P%iC%i = bs.turbomolePrimitive(%.8f,%.8f,0,0,1,corePos);" % (e,i, exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, P%iC%i);" % (e,i))
                
        if orbital_types[i] == 2:
            #create an d-orbital
            cppclass.append("    bs.add_state();")
            for e in range(len(exponents[i])):
                cppclass.append("    Primitive D%iA%i = bs.turbomolePrimitive(%.8f,%.8f,2,0,0,corePos);" % (e, i,exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, D%iA%i);" % (e,i))
                cppclass.append("    Primitive D%iB%i = bs.turbomolePrimitive(%.8f,%.8f,0,2,0,corePos);" % (e,i, exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, D%iB%i);" %(e,i))
                cppclass.append("    Primitive D%iC%i = bs.turbomolePrimitive(%.8f,%.8f,0,0,2,corePos);" % (e,i, exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, D%iC%i);" %( e,i))
                
                cppclass.append("    Primitive D%iD%i = bs.turbomolePrimitive(%.8f,%.8f,1,1,0,corePos);" % (e, i,exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, D%iD%i);" % (e,i))
                cppclass.append("    Primitive D%iE%i = bs.turbomolePrimitive(%.8f,%.8f,0,1,1,corePos);" % (e,i, exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, D%iE%i);" % (e,i))
                cppclass.append("    Primitive D%iF%i = bs.turbomolePrimitive(%.8f,%.8f,1,0,1,corePos);" % (e, i,exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, D%iF%i);" %( e,i))
        if orbital_types[i] == 3:
            #create an d-orbital
            cppclass.append("    bs.add_state();")
            for e in range(len(exponents[i])):
                cppclass.append("    Primitive E%iA%i = bs.turbomolePrimitive(%.8f,%.8f,3,0,0,corePos);" % (e, i,exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, E%iA%i);" % (e,i))
                cppclass.append("    Primitive E%iB%i = bs.turbomolePrimitive(%.8f,%.8f,0,3,0,corePos);" % (e,i, exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, E%iB%i);" %(e,i))
                cppclass.append("    Primitive E%iC%i = bs.turbomolePrimitive(%.8f,%.8f,0,0,3,corePos);" % (e,i, exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, E%iC%i);" %( e,i))
                
                cppclass.append("    Primitive E%iD%i = bs.turbomolePrimitive(%.8f,%.8f,1,2,0,corePos);" % (e, i,exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, E%iD%i);" % (e,i))
                cppclass.append("    Primitive E%iE%i = bs.turbomolePrimitive(%.8f,%.8f,0,1,2,corePos);" % (e,i, exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, E%iE%i);" % (e,i))
                cppclass.append("    Primitive E%iF%i = bs.turbomolePrimitive(%.8f,%.8f,1,0,2,corePos);" % (e, i,exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, E%iF%i);" %( e,i))
                
                cppclass.append("    Primitive E%iG%i = bs.turbomolePrimitive(%.8f,%.8f,2,1,0,corePos);" % (e, i,exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, E%iG%i);" % (e,i))
                cppclass.append("    Primitive E%iH%i = bs.turbomolePrimitive(%.8f,%.8f,0,2,1,corePos);" % (e,i, exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, E%iH%i);" % (e,i))
                cppclass.append("    Primitive E%iI%i = bs.turbomolePrimitive(%.8f,%.8f,2,0,1,corePos);" % (e, i,exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, E%iI%i);" %( e,i))               
                
                cppclass.append("    Primitive E%iJ%i = bs.turbomolePrimitive(%.8f,%.8f,1,1,1,corePos);" % (e, i,exponents[i][e], weights[i][e]))
                cppclass.append("    bs.add_primitive_to_state(bs.Nstates-1, E%iJ%i);" %( e,i))   
                
    cppclass.append("}\n")
    
    return cppheader, cppclass

def saveclass(cppclasses, cppheaders):
    cppfile = "//This file is maintained by an external python script and should not be edited manually.\n#include <basisbank.h>\n#include <armadillo>\n#include <basis.h>\n#include <primitive.h>\nusing namespace std;\nusing namespace arma;\nbasisbank::basisbank(basis BS){\n    bs = BS;} \nbasisbank::basisbank(){} \n \n"
    
    for i in cppclasses:
        for e in i:
            cppfile += e+ "\n"
        cppfile += "\n"
    #print cppfile
    
    
    cppheader = "//This file is maintained by an external python script and should not be edited manually.\n#ifndef BASISBANK_H\n#define BASISBANK_H\n#include <armadillo>\n#include <basis.h>\n#include <primitive.h>\nusing namespace std;\nusing namespace arma;\n \nclass basisbank{\npublic:\n    basisbank(basis BS);\n    basisbank();\n    basis bs;\n    string basistype;"
    for i in cppheaders:
        for e in i:
            cppheader += e + "\n"
    cppheader += "};\n"
    cppheader += "#endif // BASISBANK_H"
    #print cppheader
    #Write files
    cpp_header = open("basisbank.h", "w")
    cpp_class = open("basisbank.cpp", "w")
    cpp_header.write(cppheader)
    cpp_class.write(cppfile)
    cpp_header.close()
    cpp_class.close()

    
def convertfolder():
    
    cppclasses = []
    cppheaders = []
    folder  = [] #a list containing all files in folder
    #os.chdir("/")
    #print os.getcwd()
    for filename in os.listdir(os.getcwd()):
        if filename.endswith(".txt"):
            try:
                #comments, orbital_types, N_orbitals, exponents, weigths = extract_basisinfo(filename)
                comments, I = extract_m_basisinfo(filename)
                commented = 0
                #print comments

                
                for i in I:
                    #i = [bT, basisW, basisE, contractedtype, len(contractedtype)]
                    if commented == 0:
                        cppheader, cppclass = createfunction(i[0], comments, i[3], i[4], i[2], i[1])
                        cppclasses.append(cppclass)
                        cppheaders.append(cppheader)
                        commented = 1
                    else:
                        cppheader, cppclass = createfunction(i[0], [""], i[3], i[4], i[2], i[1])
                        cppclasses.append(cppclass)
                        cppheaders.append(cppheader)

                        
            except:
                print "Failed to import file:", filename

    saveclass(cppclasses, cppheaders)


#convertfolder() #converts all .txt files in current folder

In [ ]: