In [32]:
# Approximation data (based on Levenshtein distance)
path = 'data/verification/'
#DB_medium = pandas.read_csv(path + 'DBS_medium_raw2.csv')
#M13_medium = pandas.read_csv(path + 'M13_medium_raw2.csv')
def read_data(filename):
data = []
with open(filename, 'r') as f:
sequence = f.readline().strip()
energies = []
for line in f.readlines():
if line.startswith('0.0'):
pass
elif '&' in line:
energy, _, _ = line.strip().split(',')
energies.append(float(energy))
else:
data.append((sequence, energies))
sequence = line.strip()
energies = []
data.append((sequence, energies))
return data
data = read_data(path + 'DBS_medium_raw2.csv')
print len(data)
In [36]:
import bigfloat
def get_boltzmann_distribution(energies):
R = 8.3144621 # gas constant
T = 293.15 # room temperature
factor = 4184.0 # joules_per_kcal
ps = []
total = bigfloat.BigFloat(0)
for energy in energies:
p = bigfloat.exp((-energy*factor)/(R*T), bigfloat.precision(1000))
ps.append(p)
total = bigfloat.add(total, p)
normal_ps = []
for p in ps:
normal_ps.append(float(bigfloat.div(p,total)))
return normal_ps
with open('DBS_verification.csv', 'w') as out:
for d in data:
out.write(str(d[0]) + ',' + str(get_boltzmann_distribution(d[1])[0]) + '\n')