In [29]:
# sequence input and output
import Bio.SeqIO
# provides dictionary of codon names
from Bio.SeqUtils.CodonUsage import SynonymousCodons
# for converting 3 letter amino acid code to 1 letter code
from Bio.SeqUtils import seq1
# for fast access of fasta files
import pyfaidx
# for parsing GFF3 files
import HTSeq
# for tab data processing
import pandas as pd
# numeric and matrix library
import numpy as np
# shell utilities
import shutil
# for submitting shell commands
import subprocess as sp
# for string matching
import re
# create a dictionary of codon names and number (arranged alphabetically by aa)
codonnum = 0
codonDict = dict()
for aa in sorted(SynonymousCodons, key=lambda aa3: seq1(aa3)):
if aa == 'STOP':
continue
for codon in sorted(SynonymousCodons[aa]):
# these two codons are numbered out of order for consistent notation
# with Subramaniam et al. Cell 2014
if codon in ['AGC']:
codonDict['AGC'] = 59
elif codon in ['AGT']:
codonDict['AGT'] = 60
else:
codonDict[codon] = codonnum
codonnum += 1
# to convert 3 letter codons to numbers between 0 and 63
def get_numerical_codon_sequence(seq):
numseq = list()
for pos in range(0, len(seq) - 3, 3):
try:
numseq.append(str(codonDict[seq[pos:pos + 3]]))
except KeyError:
numseq.append('-1')
raise
return None
return ' '.join(numseq)
# starting yfp sequence for leucine starvation expts
yfp0 = Bio.SeqIO.read('../annotations/simulations/yfp0.fa', 'fasta')
yfp0 = str(yfp0.seq)
# starting sequence for serine starvation expts
# all ser codons in yfp0 were AGC
yfp_agc = list(yfp0)
for pos in range(0, len(yfp0), 3):
current_codon = yfp0[pos:pos + 3]
if current_codon in SynonymousCodons['SER']:
yfp_agc[pos:pos + 3] = 'AGC'
yfp_agc = ''.join(yfp_agc)
In [19]:
genome = '../annotations/simulations/NC_000913.fna'
annotations = '../annotations/simulations/NC_000913.gff'
# read genome as a python dictionary
genome = pyfaidx.Fasta(genome)
annotationrecords = HTSeq.GFF_Reader(annotations)
annotationdf = dict()
for record in annotationrecords:
if record.type != 'CDS':
continue
if 'pseudo' in record.attr.keys() and record.attr['pseudo'] == 'true':
continue
# fdnG, fdoG, fdhF have TGA encoded selenocysteine codons
if record.attr['gene'] in ['fdnG', 'fdoG', 'fdhF']:
continue
sequence = str(genome[record.iv.chrom][record.iv.start:record.iv.end])
if record.iv.strand == '-':
sequence = str(HTSeq.Sequence(sequence).get_reverse_complement())
annotationdf[record.attr['ID']] = {
'iv': record.iv,
'gene': record.attr['gene'],
'product': record.attr['product'],
'start': record.iv.start,
'end': record.iv.end,
'strand': record.iv.strand,
'sequence': sequence,
}
annotationdf = pd.DataFrame.from_dict(annotationdf, orient='index')
annotationdf['length'] = annotationdf['iv'].apply(lambda x: x.length)
annotationdf['numsequence'] = annotationdf['sequence'].apply(
get_numerical_codon_sequence)
In [22]:
# Number of ribosomes (molecules per cell), 37C, 20 min doubling
# Bremer 2008
nRibo = 73000
# Number of mrna (nt per cell), 37C, 20 min doubling
# Bremer 2008
nMrna = 4.3e6
# Bound fraction of ribosomes, 37C, 20 min doubling
# Bremer 2008
boundRiboFract = 0.85
# Mean elongation rate (s^-1), 37C, 20 min doubling
# Bremer 2008
meanElongationRate = 22
rawdata = pd.read_table(
'../annotations/simulations/ecoli.mrna.concn.and.te.li.2014.csv',
sep=',', )
rawdata = rawdata.dropna().set_index('Gene')
rawdata['mRNA level (RPKM)'] = rawdata['mRNA level (RPKM)'].apply(int)
combined = annotationdf.join(rawdata, on='gene', how='inner')
# the total number of mrnaribonucleotides in the cell should be equal to
# the number of each mrna species multiplied by its length
combined['rpkm_nt'] = combined['mRNA level (RPKM)'] * combined['length']
mrnaNormalization = combined['rpkm_nt'].sum() / nMrna
combined['mrnaCopyNumber'] = combined['mRNA level (RPKM)'].apply(
lambda x: int(np.round(x / mrnaNormalization)))
combined = combined[combined.mrnaCopyNumber > 0]
# basic idea for TE normalization below is the equality:
# total number of mrna bound ribosomes in the cell =
# sum over all mrnas( initation rate * mrna copy number
# * length / elongation rate)
combined['boundribosomes'] = (combined['length'] *
combined['Translation efficiency (AU)'] *
combined['mrnaCopyNumber'])
initiationRateNormalization = (combined['boundribosomes'].sum() /
(nRibo * boundRiboFract * meanElongationRate))
combined['initiationRate'] = (
combined['Translation efficiency (AU)'] /
initiationRateNormalization).apply(lambda x: np.round(x, 4))
# Write initiation rate, mRNA copy number and the codon sequence to a file
# for using in the simulation
temp = combined[['initiationRate', 'mrnaCopyNumber', 'numsequence'
]].sort_values(
by=['mrnaCopyNumber'], ascending=False)
temp.to_csv(
'../annotations/simulations/run1/run1_ecoli_mrnas.csv',
sep='\t',
index=False,
header=None)
In [23]:
max_aminoacylation_rate = 2.0e10
# Read tRNA concentrations, anticodon, cognate codons
# from Table 2 in Dong et al. J. Mol. Biol. 1996.
# Gly1 and Ile2 entries were manually deleted from the table
# since these were not measured in this study.
# Sec tRNA was deleted since we are not considering
# this non-canonical translation.
# A typo for Val1 anticodon was corrected (TAG → TAC).
lines = open('../annotations/simulations/ecoli.trna.abundance.dong1996.txt'
).read().splitlines()
trnas = dict()
# read every 6th line until last but one.
for trnaindex, trna in enumerate(lines[0:-1:6]):
trnas[trna] = dict()
trnas[trna]['anticodon'] = lines[trnaindex * 6 + 1].strip().replace('U',
'T')
trnas[trna]['codons'] = [
codon.strip().replace('U', 'T')
for codon in lines[trnaindex * 6 + 2].split(',')
]
trnas[trna]['mean_concn'] = int(lines[trnaindex * 6 + 4].split('(')[0])
trnas[trna]['std_concn'] = int(
lines[trnaindex * 6 + 4].split('(')[1].split(')')[0].strip())
trnas[trna]['fraction'] = float(lines[trnaindex * 6 + 5].strip())
trnas[trna]['aminoacid'] = trna[:3].lower()
# Ile2 was not measured in Dong 1996. So assigning it to Ile1.
trnas['Ile1']['codons'].append('ATA')
# Calculate the factors for codon-tRNA interaction
# that multiply the kcat/KM for codon-tRNA reading:
# w = 1 for Watson-Crick base pairs,
# w = 0.64 for purine-pyrimidine mismatch,
# w = 0.61 for purine-purine mismatch. (Weights based on Shah 2013).
# If two trnas have the same anticodon
# they are considered as a single trna species in the simulation.
concentrations = dict()
cognate_pairings = dict()
weights = dict()
for trna in trnas:
cognatecodons = trnas[trna]['codons']
if trnas[trna]['anticodon'] not in concentrations:
concentrations[trnas[trna]['anticodon']] = trnas[trna]['mean_concn']
else:
concentrations[trnas[trna]['anticodon']] += trnas[trna]['mean_concn']
for codon in cognatecodons:
# pairing at the wobble position.
pairing = codon[-1:] + trnas[trna]['anticodon'][:1]
watson_crick_pairs = ['GC', 'AT', 'TA', 'CG']
pur_pyr_pairs = ['GT', 'TG', 'CA']
pur_pur_pairs = ['TT', 'AA']
if pairing in watson_crick_pairs:
weight = 1
elif pairing in pur_pyr_pairs:
weight = 0.64
else:
weight = 0.61
if codon not in cognate_pairings:
cognate_pairings[codon] = [trnas[trna]['anticodon']]
weights[codon] = [weight]
elif trnas[trna]['anticodon'] not in cognate_pairings[codon]:
cognate_pairings[codon].append(trnas[trna]['anticodon'])
weights[codon].append(weight)
# trnaindices are sorted alphabetically.
# concentrations file.
outputFile = open(
"../annotations/simulations/wholecell.trna.concentrations.tsv", "write")
outputFile.write('trnaindex\tanticodon\tmolpercell\taminoacid\n')
# trnaindex 0 is a dummy trna without any anticodon.
outputFile.write('0\tNNN\t0\txxx\n')
trnaindices = dict()
for trnaindex, anticodon in enumerate(sorted(concentrations)):
aminoacid = [
trnas[trna]['aminoacid'] for trna in trnas
if trnas[trna]['anticodon'] == anticodon
][0]
trnaindices[anticodon] = trnaindex + 1
outputFile.write(
str(trnaindex + 1) + '\t' + anticodon + '\t' + '%d' %
concentrations[anticodon] + '\t' + aminoacid + '\n')
outputFile.close()
# aminoacylation rates file.
outputFile = open(
"../annotations/simulations/wholecell.trna.aminoacylation.rate.tsv",
"write")
outputFile.write('trnaindex\tanticodon\tkcatBykm\n')
outputFile.write('0\tNNN\t0\n')
trnaindices = dict()
for trnaindex, anticodon in enumerate(sorted(concentrations)):
trnaindices[anticodon] = trnaindex + 1
outputFile.write(
str(trnaindex + 1) + '\t' + anticodon + '\t' + '%d' %
max_aminoacylation_rate + '\n')
outputFile.close()
# cognate pairings file.
outputFile = open("../annotations/simulations/wholecell.cognate.pairs.tsv",
"write")
outputFile.write('codonindex\tcodon\ttrnaindex1\ttrnaindex2\n')
for codon in sorted(codonDict, key=lambda x: codonDict[x]):
if len(cognate_pairings[codon]) == 1:
outputFile.write(
str(codonDict[codon]) + '\t' + codon + '\t' + '%d' %
trnaindices[cognate_pairings[codon][0]] + '\t0\n')
elif len(cognate_pairings[codon]) == 2:
outputFile.write(
str(codonDict[codon]) + '\t' + codon + '\t' + '%d' %
trnaindices[cognate_pairings[codon][0]] + '\t' + '%d' %
trnaindices[cognate_pairings[codon][1]] + '\n')
outputFile.close()
# cognate weights file.
outputFile = open("../annotations/simulations/wholecell.cognate.weights.tsv",
"write")
outputFile.write('codonindex\tcodon\ttrnaweight1\ttrnaweight2\n')
for codon in sorted(codonDict, key=lambda x: codonDict[x]):
if len(weights[codon]) == 1:
outputFile.write(
str(codonDict[codon]) + '\t' + codon + '\t' + '%0.2f' %
weights[codon][0] + '\t0.00\n')
elif len(weights[codon]) == 2:
outputFile.write(
str(codonDict[codon]) + '\t' + codon + '\t' + '%0.2f' %
weights[codon][0] + '\t' + '%0.2f' % weights[codon][1] + '\n')
outputFile.close()
In [24]:
mutation_locations = [
{
15: 'cta'
},
{
10: 'cta',
14: 'cta',
15: 'cta'
},
]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for location, codon in mutant.items()
])
yfpmutants[key] = list(yfp0)
leucodon_number = 0
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
# proceed only if the codon is a Leu codon (which are all CTG in yfp0)
if currentcodon not in ['CTG']:
continue
leucodon_number += 1
for location in mutant.keys():
if leucodon_number == location:
yfpmutants[key][position:position + 3] = mutant[
location].upper()
yfpmutants[key] = ''.join(yfpmutants[key])
# mrna copy number is arbitrary, but kept low so that
# ribosomes are not overloaded
defaultMrnaCopyNumber = 10 # per cell
# median initiation rate of all E. coli mRNAs in the simulation, s-1
defaultInitationRate = 0.3 # s-1,
listOfInitiationRates = [defaultInitationRate]
outputFile = '../annotations/simulations/run1/run1_ecoli_and_reporter_mrnas.csv'
shutil.copyfile('../annotations/simulations/run1/run1_ecoli_mrnas.csv',
outputFile)
for initiationRate in listOfInitiationRates:
File = open(outputFile, 'a')
File.write("%0.3f\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp0[:-3])))
for mutant in sorted(yfpmutants, reverse=True):
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File.write("%0.3f\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
In [25]:
%%writefile simulation_run_1.py
#!/usr/bin/env python
# SBATCH --mem=8000
import subprocess as sp
import sys
jobindex = int(sys.argv[1])
currentindex = -1
for starvationfactor in [0.01]:
currentindex += 1
if currentindex != jobindex:
continue
sp.check_output(' '.join([
'./wholecell_simulation',
'--threshold-time', '100',
'--total-time', '150',
'--aminoacid', 'leu',
'starvationfactor', '%0.2f' % starvationfactor,
'--aminoacid', 'leu',
'relativetrnaaminoacylationrates', '1', '2.2', '1.2', '1', '0.1',
'--output-prefix', '../rawdata/simulations/run1/',
'--input-genes', '../annotations/simulations/run1/run1_ecoli_and_reporter_mrnas.csv'
]), shell=True)
In [13]:
import subprocess as sp
# loop submits each simulation to a different node of the cluster
for index in range(1):
sp.check_output([
'sbatch', # for SLURM cluster; this line can be commented out if running locally
'-t',
'15', # for SLURM cluster; this line can be commented out if running locally
'-n',
'1', # for SLURM cluster; this line can be commented out if running locally
'simulation_run_1.py',
str(index)
])
In [26]:
aa = 'SER'
mutation_locations = [{
2: 'tcg',
3: 'tcg',
4: 'tcg',
5: 'tcg',
6: 'tcg',
7: 'tcg',
8: 'tcg'
}, ]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for location, codon in mutant.items()
])
yfpmutants[key] = list(yfp_agc)
codon_number = 0
for position in range(0, len(yfp_agc), 3):
currentcodon = yfp_agc[position:position + 3]
# proceed only if the codon is a Leu codon (which are all CTG in yfp0)
if currentcodon not in SynonymousCodons[aa]:
continue
codon_number += 1
for location in mutant.keys():
if codon_number == location:
yfpmutants[key][position:position + 3] = mutant[
location].upper()
yfpmutants[key] = ''.join(yfpmutants[key])
# mrna copy number is arbitrary, but kept low so that
# ribosomes are not overloaded
defaultMrnaCopyNumber = 10 # per cell
# median initiation rate of all E. coli mRNAs in the simulation, s-1
defaultInitationRate = 0.3 # s-1,
listOfInitiationRates = [defaultInitationRate]
outputFile = '../annotations/simulations/run12/run12_ecoli_and_reporter_mrnas.csv'
shutil.copyfile('../annotations/simulations/run1/run1_ecoli_mrnas.csv',
outputFile)
for initiationRate in listOfInitiationRates:
File = open(outputFile, 'a')
File.write("%0.3f\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp_agc[:-3])))
for mutant in sorted(yfpmutants, reverse=True):
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File.write("%0.3f\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
In [33]:
%%writefile simulation_run_12.py
#!/usr/bin/env python
# SBATCH --mem=8000
import subprocess as sp
import sys
jobindex = int(sys.argv[1])
currentindex = -1
for starvationfactor in [0.01]:
currentindex += 1
if currentindex != jobindex:
continue
sp.check_output(' '.join([
'./wholecell_simulation',
'--threshold-time', '100',
'--total-time', '150',
'--aminoacid', 'ser',
'starvationfactor', '%0.2f' % starvationfactor,
'--aminoacid', 'ser',
'relativetrnaaminoacylationrates', '0.1', '1.5', '1.5', '1',
'--output-prefix', '../rawdata/simulations/run12/',
'--input-genes', '../annotations/simulations/run12/run12_ecoli_and_reporter_mrnas.csv'
]), shell=True)
In [34]:
import subprocess as sp
# loop submits each simulation to a different node of the cluster
for index in range(1):
sp.check_output([
'sbatch', # for SLURM cluster; this line can be commented out if running locally
'-t',
'15', # for SLURM cluster; this line can be commented out if running locally
'-n',
'1', # for SLURM cluster; this line can be commented out if running locally
'simulation_run_12.py',
str(index)
])
In [30]:
inputfile = '../rawdata/simulations/run1/mrnafile_run1_ecoli_and_reporter_mrnas_thresholdtime_100_totaltime_150_leu_starvationfactor_0.01_leu_relativetrnaaminoacylationrates_1_2.2_1.2_1_0.1/avg_ribo_tRNA.out'
trnaconcentrationfile = pd.read_table(
'../annotations/simulations/wholecell.trna.concentrations.tsv')
trnaconcnlist = open(inputfile).readlines()
trnaConcn = list()
for trnatype in ['acylated']:
temp = map(
lambda x: re.findall('^' + trnatype + '_trna_(\d+)_([ACTG]{3})\t([\w\.]+)', x),
trnaconcnlist)
temp = filter(lambda x: len(x), temp)
temp = pd.DataFrame(
[x[0] for x in temp], columns=['trnaindex', 'anticodon', 'molpercell'])
trnaConcn.append(temp)
trnaConcn = pd.concat(trnaConcn, join='inner', axis=1)
trnaConcn = trnaConcn.merge(
trnaconcentrationfile[['anticodon', 'aminoacid']], how='right').fillna(0)
trnaConcn['trnaindex'] = trnaConcn['trnaindex'].apply(int)
trnaConcn['molpercell'] = trnaConcn['molpercell'].apply(
lambda x: int(float(x)))
trnaConcn.sort_values(by='trnaindex').to_csv(
'../annotations/simulations/leucine.starvation.average.trna.concentrations.tsv',
index=False,
sep='\t')
In [35]:
inputfile = (
'../rawdata/simulations/run12/mrnafile_run12_ecoli_and_reporter_mrnas_'
'thresholdtime_100_totaltime_150_ser_starvationfactor_0.01_'
'ser_relativetrnaaminoacylationrates_0.1_1.5_1.5_1/avg_ribo_tRNA.out')
trnaconcentrationfile = pd.read_table(
'../annotations/simulations/wholecell.trna.concentrations.tsv')
trnaconcnlist = open(inputfile).readlines()
trnaConcn = list()
for trnatype in ['acylated']:
temp = map(
lambda x: re.findall('^' + trnatype + '_trna_(\d+)_([ACTG]{3})\t([\w\.]+)', x),
trnaconcnlist)
temp = filter(lambda x: len(x), temp)
temp = pd.DataFrame(
[x[0] for x in temp], columns=['trnaindex', 'anticodon', 'molpercell'])
trnaConcn.append(temp)
trnaConcn = pd.concat(trnaConcn, join='inner', axis=1)
trnaConcn = trnaConcn.merge(
trnaconcentrationfile[['anticodon', 'aminoacid']], how='right').fillna(0)
trnaConcn['trnaindex'] = trnaConcn['trnaindex'].apply(int)
trnaConcn['molpercell'] = trnaConcn['molpercell'].apply(
lambda x: int(float(x)))
trnaConcn.sort_values(by='trnaindex').to_csv(
'../annotations/simulations/serine.starvation.average.trna.concentrations.tsv',
index=False,
sep='\t')
In [37]:
mutation_locations = [
{
'cta': 2
},
{
'cta': 6
},
{
'cta': 8
},
{
'cta': 9
},
{
'cta': 10
},
{
'cta': 11
},
{
'cta': 12
},
{
'cta': 13
},
{
'cta': 14
},
{
'cta': 18
},
{
'ctc': 2
},
{
'ctc': 6
},
{
'ctc': 8
},
{
'ctc': 9
},
{
'ctc': 10
},
{
'ctc': 11
},
{
'ctc': 12
},
{
'ctc': 13
},
{
'ctc': 14
},
{
'ctc': 18
},
{
'ctt': 2
},
{
'ctt': 6
},
{
'ctt': 10
},
{
'ctt': 14
},
{
'ctt': 18
},
]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for codon, location in mutant.items()
])
yfpmutants[key] = list(yfp0)
leucodon_number = 0
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
# proceed only if the codon is a Leu codon (which are all CTG in yfp0)
if currentcodon not in ['CTG']:
continue
leucodon_number += 1
for codon in mutant.keys():
if leucodon_number == mutant[codon]:
yfpmutants[key][position:position + 3] = codon.upper()
yfpmutants[key] = ''.join(yfpmutants[key])
defaultMrnaCopyNumber = 1 # per cell
defaultInitationRate = 0.3 # s-1, This is the median initiation rate
listOfInitiationRates = [defaultInitationRate]
for initiationRate in listOfInitiationRates:
for mutant in yfpmutants:
outputFile = '../annotations/simulations/run2/' + \
'%s_initiationrate_%0.4f.csv' % (mutant, initiationRate)
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File = open(outputFile, 'w')
File.write("%0.4f\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp0[:-3])))
File.write("%0.4f\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
In [38]:
stallstrengthranges = {
'trafficjam': {
'cta': np.linspace(
0.01, 0.2, num=20),
'ctc': np.linspace(
0.02, 0.4, num=20),
'ctt': np.linspace(
0.02, 0.4, num=20),
},
'5primepreterm': {
'cta': np.linspace(
0.02, 0.4, num=20),
'ctc': np.linspace(
0.05, 1, num=20),
'ctt': np.linspace(
0.05, 1, num=20),
},
'selpreterm': {
'cta': np.linspace(
0.05, 1, num=20),
'ctc': np.linspace(
0.5, 10, num=20),
'ctt': np.linspace(
0.25, 5, num=20),
}
}
# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
leupositions = dict()
leucodon_number = 1
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
if currentcodon == 'CTG':
leupositions[leucodon_number] = position / 3
leucodon_number += 1
for model in stallstrengthranges:
for index in range(20):
fitdata = list()
for mutant in mutation_locations:
if len(mutant) > 1:
raise "Only single mutants allowed"
codon = mutant.keys()[0]
pos = mutant.values()[0]
fitdata.append({
'codon': codonDict[codon.upper()],
'pos': leupositions[pos],
'stallstrength': stallstrengthranges[model][codon][index]
})
fitdata = pd.DataFrame(fitdata)
fitdata.to_csv(
'../annotations/simulations/run2/{0}_stallstrengthindex_{1}.tsv'.
format(model, index),
sep='\t',
index=False)
In [44]:
%%writefile simulation_run_2.py
#!/usr/bin/env python
# SBATCH --mem=8000
import subprocess as sp
import os
import sys
import itertools
import numpy as np
jobindex = int(sys.argv[1])
currentindex = -1
allfiles = os.listdir('../annotations/simulations/run2/')
mrnafiles = list(filter(lambda x: x.startswith('yfp'), allfiles))
mrnafiles = ['../annotations/simulations/run2/' + File for File in mrnafiles]
terminationRates = {
'trafficjam': {'--5prime-preterm-rate': 0},
'5primepreterm': {'--5prime-preterm-rate': 1},
'selpreterm': {'--selective-preterm-rate': 1},
}
for (mrnafile,
typeOfTermination
) in list(itertools.product(mrnafiles, terminationRates)):
currentindex += 1
if currentindex != jobindex:
continue
termoption = terminationRates[typeOfTermination].keys()[0]
termvalue = terminationRates[typeOfTermination].values()[0]
stallstrengthfiles = list(
filter(lambda x: x.startswith(typeOfTermination), allfiles))
stallstrengthfiles = [
'../annotations/simulations/run2/' + File for File in stallstrengthfiles]
for stallstrengthfile in stallstrengthfiles:
cmd = ' '.join([
'./reporter_simulation',
'--trna-concn', '../annotations/simulations/leucine.starvation.average.trna.concentrations.tsv',
termoption, '%0.2f' % termvalue,
'--threshold-accommodation-rate', '22',
'--output-prefix', '../rawdata/simulations/run2/',
'--stall-strength-file', stallstrengthfile,
'--input-genes', mrnafile
])
sp.check_output(cmd, shell=True)
In [45]:
import subprocess as sp
# loop submits each simulation to a different node of the cluster
for index in range(75):
sp.check_output([
'sbatch', # for SLURM cluster; this line can be commented out if running locally
'-t',
'300', # for SLURM cluster; this line can be commented out if running locally
'-n',
'1', # for SLURM cluster; this line can be commented out if running locally
'simulation_run_2.py',
str(index)
])
In [41]:
mutation_locations = [
{
'tcg': 2
},
{
'tcg': 3
},
{
'tcg': 4
},
{
'tcg': 5
},
{
'tcg': 6
},
{
'tcg': 7
},
]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for codon, location in mutant.items()
])
yfpmutants[key] = list(yfp_agc)
codon_number = 0
for position in range(0, len(yfp_agc), 3):
currentcodon = yfp_agc[position:position + 3]
# proceed only if the codon is a Leu codon (which are all CTG in yfp0)
if currentcodon not in ['AGC']:
continue
codon_number += 1
for codon in mutant.keys():
if codon_number == mutant[codon]:
yfpmutants[key][position:position + 3] = codon.upper()
yfpmutants[key] = ''.join(yfpmutants[key])
defaultMrnaCopyNumber = 1 # per cell
defaultInitationRate = 0.3 # s-1, This is the median initiation rate
listOfInitiationRates = [defaultInitationRate]
for initiationRate in listOfInitiationRates:
for mutant in yfpmutants:
outputFile = '../annotations/simulations/run13/' + \
'%s_initiationrate_%0.4f.csv' % (mutant, initiationRate)
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File = open(outputFile, 'w')
File.write("%0.4f\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp_agc[:-3])))
File.write("%0.4f\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
In [42]:
stallstrengthranges = {
'trafficjam': {
'tcg': np.linspace(
0.02, 0.4, num=20),
},
'5primepreterm': {
'tcg': np.linspace(
0.05, 1, num=20),
},
'selpreterm': {
'tcg': np.linspace(
0.5, 10, num=20),
}
}
# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
serpositions = dict()
sercodon_number = 1
for position in range(0, len(yfp_agc), 3):
currentcodon = yfp_agc[position:position + 3]
if currentcodon == 'AGC':
serpositions[sercodon_number] = position / 3
sercodon_number += 1
for model in stallstrengthranges:
for index in range(20):
fitdata = list()
for mutant in mutation_locations:
if len(mutant) > 1:
raise "Only single mutants allowed"
codon = mutant.keys()[0]
pos = mutant.values()[0]
fitdata.append({
'codon': codonDict[codon.upper()],
'pos': serpositions[pos],
'stallstrength': stallstrengthranges[model][codon][index]
})
fitdata = pd.DataFrame(fitdata)
fitdata.to_csv(
'../annotations/simulations/run13/{0}_stallstrengthindex_{1}.tsv'.
format(model, index),
sep='\t',
index=False)
In [46]:
%%writefile simulation_run_13.py
#!/usr/bin/env python
# SBATCH --mem=8000
import subprocess as sp
import os
import sys
import itertools
import numpy as np
jobindex = int(sys.argv[1])
currentindex = -1
allfiles = os.listdir('../annotations/simulations/run13/')
mrnafiles = list(filter(lambda x: x.startswith('yfp'), allfiles))
mrnafiles = ['../annotations/simulations/run13/' + File for File in mrnafiles]
terminationRates = {
'trafficjam': {'--5prime-preterm-rate': 0},
'5primepreterm': {'--5prime-preterm-rate': 1},
'selpreterm': {'--selective-preterm-rate': 1},
}
for (mrnafile,
typeOfTermination
) in list(itertools.product(mrnafiles, terminationRates)):
currentindex += 1
if currentindex != jobindex:
continue
termoption = terminationRates[typeOfTermination].keys()[0]
termvalue = terminationRates[typeOfTermination].values()[0]
stallstrengthfiles = list(
filter(lambda x: x.startswith(typeOfTermination), allfiles))
stallstrengthfiles = [
'../annotations/simulations/run13/' + File for File in stallstrengthfiles]
for stallstrengthfile in stallstrengthfiles:
cmd = ' '.join([
'./reporter_simulation',
'--trna-concn', '../annotations/simulations/serine.starvation.average.trna.concentrations.tsv',
termoption, '%0.2f' % termvalue,
'--threshold-accommodation-rate', '22',
'--output-prefix', '../rawdata/simulations/run13/',
'--stall-strength-file', stallstrengthfile,
'--input-genes', mrnafile
])
sp.check_output(cmd, shell=True)
In [47]:
import subprocess as sp
# loop submits each simulation to a different node of the cluster
for index in range(21):
sp.check_output([
'sbatch', # for SLURM cluster; this line can be commented out if running locally
'-t',
'300', # for SLURM cluster; this line can be commented out if running locally
'-n',
'1', # for SLURM cluster; this line can be commented out if running locally
'simulation_run_13.py',
str(index)
])