Create input files for simulations based on experimental fits

Table of Contents

  1. Globals
  2. Create stall strength file for Run 3 initiation mutant prediction based on fit of Run 2 to single mutant data in 3 different models (for fig. 4A, fig. 4 supplement 1A-G)
  3. Create mRNA sequence files for Run 3 to predict initiation mutant YFP synthesis rates in 3 different models (for fig. 4A, fig. 4 supplement 1A-G)
  4. Create stall strength file for Run 14 serine initiation mutant prediction based on fit of Run 13 to single mutant data in 3 different models (for fig. 4 supplement 1H)
  5. Create mRNA sequence files for Run 14 to predict serine initiation mutant YFP synthesis rates in 3 different models (for fig. 4 supplement 1H)
  6. Create stall strength file for Run 4 CTC, CTT double mutant prediction based on fit of Run 2 to single mutant data (for fig. 5 supplement 1A-B)
  7. Create mRNA sequence files for Run 4 to predict CTC CTT double mutant YFP synthesis rates in different models (for fig. 5 supplement 1A-B)
  8. Create stall strength file for Run 16 CTA multiple mutant prediction based on fit of Run 2 to single mutant data (for fig. 5)
  9. Create mRNA sequence files for Run 16 to predict multiple mutant YFP synthesis rates in different models (for fig. 5)
  10. Create stall strength file for Run 15 serine double mutant prediction based on fit of Run 13 to single mutant data (for fig. 5 supplement 1C)
  11. Create mRNA sequence files for Run 15 to predict serine double mutant YFP synthesis rates in different models (for fig. 5 supplement 1C)
  12. Create stall strength file for Run 5 CTC distance mutant prediction based on fit of Run 2 to single mutant data (for fig. 6 supplement 1)
  13. Create mRNA sequence files for Run 5 to predict CTC distance mutant YFP synthesis rates in different models (for fig. 6 supplement 1)
  14. Create stall strength file for Runs 6,7,8 to systematically vary initiation rate, number of stall sites and distance between stall sites in different models (for Fig. 3)
  15. Create mRNA sequence files for Run 6 to vary initiation rate in different models (for Fig. 3A)
  16. Create mRNA sequence files for Run 7 to vary number of stall sites in different models (for Fig. 3B)
  17. Create mRNA sequence files for Run 8 to vary distance between stall sites in different models (for Fig. 3C)
  18. Create stall strength file for Run 11 CTA distance mutant prediction based on fit of Run 2 to single mutant data (for Fig. 6)
  19. Create mRNA sequence files for Run 11 to predict CTA distance mutant YFP synthesis rates in different models (for Fig. 6)

These input files are for simulation runs 3 – 16.

Globals


In [7]:
# os commands
import os
# sequence input and output
import Bio.SeqIO
# provides dictionary of codon names
from Bio.SeqUtils.CodonUsage import SynonymousCodons
# for converting 3 letter amino acid code to 1 letter code
from Bio.SeqUtils import seq1
# for tab data processing
import pandas as pd
# numeric and matrix library
import numpy as np
# shell utilities
import shutil
# for submitting shell commands
import subprocess as sp

# create a dictionary of codon names and number (arranged alphabetically by aa)
codonnum = 0
codonDict = dict()
for aa in sorted(SynonymousCodons, key=lambda aa3: seq1(aa3)):
    if aa == 'STOP':
        continue
    for codon in sorted(SynonymousCodons[aa]):
        # these two codons are numbered out of order for consistent notation
        # with Subramaniam et al. Cell 2014
        if codon in ['AGC']:
            codonDict['AGC'] = 59
        elif codon in ['AGT']:
            codonDict['AGT'] = 60
        else:
            codonDict[codon] = codonnum
            codonnum += 1


# to convert 3 letter codons to numbers between 0 and 63
def get_numerical_codon_sequence(seq):
    numseq = list()
    for pos in range(0, len(seq) - 3, 3):
        try:
            numseq.append(str(codonDict[seq[pos:pos + 3]]))
        except KeyError:
            numseq.append('-1')
            raise
            return None
    return ' '.join(numseq)


# starting yfp sequence for leucine starvation expts
yfp0 = Bio.SeqIO.read('../annotations/simulations/yfp0.fa', 'fasta')
yfp0 = str(yfp0.seq)

# starting sequence for serine starvation expts
# all ser codons in yfp0 were AGC
yfp_agc = list(yfp0)
for pos in range(0, len(yfp0), 3):
    current_codon = yfp0[pos:pos + 3]
    if current_codon in SynonymousCodons['SER']:
        yfp_agc[pos:pos + 3] = 'AGC'
yfp_agc = ''.join(yfp_agc)

In [10]:
%matplotlib inline
import numpy as np
import pandas as pd
import pyfaidx
import os
import shutil
import HTSeq
import matplotlib.pyplot as plt
import cPickle as pickle  # python native format storage library.
import Bio.SeqIO  # dna sequence input and output.
from Bio.SeqUtils import seq1  # for converting 3 letter amino acid code to 1 letter code
from Bio.SeqUtils import CodonUsage  # provides dictionary of codon names
from Bio.SeqUtils.CodonUsage import SynonymousCodons  # provides dictionary of codon names
from Bio.SeqUtils import CodonUsageIndices  # provides the Ecoli Codon 
# Adaptation Index class.
from Bio.SeqRecord import SeqRecord  # to read .gb and .fasta files.
import re  # regular expression 
import numpy.random  # for creating random numbers
from Bio import Entrez  # to retrieve records from NCBI
import itertools  # iterator tools
from IPython.core.display import clear_output, HTML  # for refreshing and displaying output
import copy

# create a dictionary of codon names and number (arranged alphabetically by aa)
codonnum = 0
codonDict = dict()
for aa in sorted(SynonymousCodons, key=lambda aa3: seq1(aa3)):
    if aa == 'STOP':
        continue
    for codon in sorted(SynonymousCodons[aa]):
        # these two codons are numbered out of order for consistent notation
        # with Subramaniam et al. Cell 2014
        if codon in ['AGC']:
            codonDict['AGC'] = 59
        elif codon in ['AGT']:
            codonDict['AGT'] = 60
        else:
            codonDict[codon] = codonnum
            codonnum += 1


# to convert 3 letter codons to numbers between 0 and 63
def get_numerical_codon_sequence(seq):
    numseq = list()
    for pos in range(0, len(seq) - 3, 3):
        try:
            numseq.append(str(codonDict[seq[pos:pos + 3]]))
        except KeyError:
            numseq.append('-1')
            raise
            return None
    return ' '.join(numseq)


# starting yfp sequence for leucine starvation expts
yfp0 = Bio.SeqIO.read('../annotations/simulations/yfp0.fa', 'fasta')
yfp0 = str(yfp0.seq)

# starting sequence for serine starvation expts
# all ser codons in yfp0 were AGC
yfp_agc = list(yfp0)
for pos in range(0, len(yfp0), 3):
    current_codon = yfp0[pos:pos + 3]
    if current_codon in SynonymousCodons['SER']:
        yfp_agc[pos:pos + 3] = 'AGC'
yfp_agc = ''.join(yfp_agc)

Create stall strength file for Run 3 initiation mutant prediction based on fit of Run 2 to single mutant data in 3 different models (for fig. 4A, fig. 4 supplement 1A-G)

Also print elongation rate fit shown in Table S2


In [8]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
    fitdatafolder + File for File in fitdatafiles
    if File.startswith('run2_fit_stallstrength_for_initiation_')
]

# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
leupositions = dict()
leucodon_number = 1
for position in range(0, len(yfp0), 3):
    currentcodon = yfp0[position:position + 3]
    if currentcodon == 'CTG':
        leupositions[leucodon_number] = position / 3
        leucodon_number += 1

codonnames = {23: 'CTA', 24: 'CTC', 26: 'CTT'}
modelnames = {
    'trafficjam': 'TJ',
    'selpreterm': 'SAT',
    '5primepreterm': 'CSAT',
}

for fitdatafile in fitdatafiles:
    fitdata = pd.read_table(fitdatafile)
    fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
    fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
    fitdata = fitdata.drop(['mutant'], axis=1)
    fitdata['pos'] = fitdata['pos'].apply(lambda pos: leupositions[pos])
    fitdata['codon'] = fitdata['codon'].apply(
        lambda codon: codonDict[codon.upper()])
    fitdata = fitdata[['codon', 'pos', 'stallstrength']]
    model = fitdatafile.split('_')[-1].split('.')[0]
    fitdata.to_csv(
        '../processeddata/simulations/run3_stallstrengthfits_' + model +
        '.tsv',
        sep='\t',
        index=False)
    for row in fitdata.iterrows():
        codon = row[1]['codon']
        pos = row[1]['pos']
        rate = row[1]['stallstrength']
        print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
            codonnames[codon], int(pos), modelnames[model], rate)


tRNA accommodation rate at CTA67 (TJ model)	0.07s-1
tRNA accommodation rate at CTA140 (TJ model)	0.12s-1
tRNA accommodation rate at CTA200 (TJ model)	0.107s-1
tRNA accommodation rate at CTA45 (TJ model)	0.091s-1
tRNA accommodation rate at CTC67 (TJ model)	0.184s-1
tRNA accommodation rate at CTC140 (TJ model)	0.211s-1
tRNA accommodation rate at CTT67 (TJ model)	0.0949s-1
tRNA accommodation rate at CTT45 (TJ model)	0.133s-1
tRNA accommodation rate at CTA67 (CSAT model)	0.0896s-1
tRNA accommodation rate at CTA140 (CSAT model)	0.173s-1
tRNA accommodation rate at CTA200 (CSAT model)	0.154s-1
tRNA accommodation rate at CTA45 (CSAT model)	0.129s-1
tRNA accommodation rate at CTC67 (CSAT model)	0.344s-1
tRNA accommodation rate at CTC140 (CSAT model)	0.444s-1
tRNA accommodation rate at CTT67 (CSAT model)	0.135s-1
tRNA accommodation rate at CTT45 (CSAT model)	0.205s-1
tRNA accommodation rate at CTA67 (SAT model)	0.292s-1
tRNA accommodation rate at CTA140 (SAT model)	0.637s-1
tRNA accommodation rate at CTA200 (SAT model)	0.55s-1
tRNA accommodation rate at CTA45 (SAT model)	0.444s-1
tRNA accommodation rate at CTC67 (SAT model)	1.45s-1
tRNA accommodation rate at CTC140 (SAT model)	1.83s-1
tRNA accommodation rate at CTT67 (SAT model)	0.469s-1
tRNA accommodation rate at CTT45 (SAT model)	0.713s-1

Create mRNA sequence files for Run 3 to predict initiation mutant YFP synthesis rates in 3 different models (for fig. 4A, fig. 4 supplement 1A-G)

Also print initiation rate fit shown in Table S2


In [10]:
initiationRateFile = '../processeddata/platereader/inferred_initiationrates_for_initiation_simulations.tsv'

mutation_locations = [
    {
        6: 'cta'
    },
    {
        10: 'cta'
    },
    {
        14: 'cta'
    },
    {
        18: 'cta'
    },
    {
        10: 'ctc'
    },
    {
        14: 'ctc'
    },
    {
        6: 'ctt'
    },
    {
        10: 'ctt'
    },
]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for location, codon in mutant.items()
    ])
    yfpmutants[key] = list(yfp0)
    leucodon_number = 0
    for position in range(0, len(yfp0), 3):
        currentcodon = yfp0[position:position + 3]
        # proceed only if the codon is a Leu codon (which are all CTG in yfp0)
        if currentcodon not in ['CTG']:
            continue
        leucodon_number += 1
        for location in mutant.keys():
            if leucodon_number == location:
                yfpmutants[key][position:position + 3] = mutant[
                    location].upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

defaultMrnaCopyNumber = 1  # per cell
defaultInitationRate = 0.3  # s-1, This is the median initiation rate

experimentInitiationRates = pd.read_table(initiationRateFile, index_col=0)

experimentInitiationRates[
    'initiationRateForSimulation'] = experimentInitiationRates[
        'inferredInitiationRate'].apply(lambda x: x * defaultInitationRate)

print('Inferred initiation rates (s-1)')
for initiationRate in experimentInitiationRates['initiationRateForSimulation']:
    for mutant in yfpmutants:
        outputFile = '../annotations/simulations/run3/' + \
        '%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File = open(outputFile, 'w')

        File.write("%0.4g\t%d\t%s\n" %
                   (initiationRate, defaultMrnaCopyNumber,
                    get_numerical_codon_sequence(yfp0[:-3])))

        File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()
    print '%0.3f' % initiationRate


Inferred initiation rates (s-1)
0.143
0.219
0.334
0.101
0.300

Create stall strength file for Run 14 serine initiation mutant prediction based on fit of Run 13 to single mutant data in 3 different models (for fig. 4 supplement 1H)

Also print elongation rate fit shown in Table S2


In [12]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
    fitdatafolder + File for File in fitdatafiles
    if File.startswith('run13_serine_fit_stallstrength_for_initiation_')
]

# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
serpositions = dict()
sercodon_number = 1
for position in range(0, len(yfp_agc), 3):
    currentcodon = yfp_agc[position:position + 3]
    if currentcodon == 'AGC':
        serpositions[sercodon_number] = position / 3
        sercodon_number += 1

codonnames = {46: 'TCG'}
modelnames = {
    'trafficjam': 'TJ',
    'selpreterm': 'SAT',
    '5primepreterm': 'CSAT',
}

for fitdatafile in fitdatafiles:
    fitdata = pd.read_table(fitdatafile)
    fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
    fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
    fitdata = fitdata.drop(['mutant'], axis=1)
    fitdata['pos'] = fitdata['pos'].apply(lambda pos: serpositions[pos])
    fitdata['codon'] = fitdata['codon'].apply(
        lambda codon: codonDict[codon.upper()])
    fitdata = fitdata[['codon', 'pos', 'stallstrength']]
    model = fitdatafile.split('_')[-1].split('.')[0]
    fitdata.to_csv(
        '../processeddata/simulations/run14_stallstrengthfits_' + model +
        '.tsv',
        sep='\t',
        index=False)
    for row in fitdata.iterrows():
        codon = row[1]['codon']
        pos = row[1]['pos']
        rate = row[1]['stallstrength']
        print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
            codonnames[codon], int(pos), modelnames[model], rate)


tRNA accommodation rate at TCG146 (TJ model)	0.273s-1
tRNA accommodation rate at TCG146 (CSAT model)	0.767s-1
tRNA accommodation rate at TCG146 (SAT model)	4.59s-1

Create mRNA sequence files for Run 14 to predict serine initiation mutant YFP synthesis rates in 3 different models (for fig. 4 supplement 1H)

Also print initiation rate fit shown in Table S2


In [15]:
initiationRateFile = '../processeddata/platereader/inferred_initiationrates_for_initiation_simulations.tsv'

mutation_locations = [
    {
        4: 'tcg'
    },
    {
        5: 'tcg'
    },
    {
        6: 'tcg'
    },
]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for location, codon in mutant.items()
    ])
    yfpmutants[key] = list(yfp_agc)
    sercodon_number = 0
    for position in range(0, len(yfp_agc), 3):
        currentcodon = yfp_agc[position:position + 3]
        # proceed only if the codon is a Ser codon (which are all AGC in yfp_agc)
        if currentcodon not in ['AGC']:
            continue
        sercodon_number += 1
        for location in mutant.keys():
            if sercodon_number == location:
                yfpmutants[key][position:position + 3] = mutant[
                    location].upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

defaultMrnaCopyNumber = 1  # per cell
defaultInitationRate = 0.3  # s-1, This is the median initiation rate

experimentInitiationRates = pd.read_table(initiationRateFile, index_col=0)

experimentInitiationRates[
    'initiationRateForSimulation'] = experimentInitiationRates[
        'inferredInitiationRate'].apply(lambda x: x * defaultInitationRate)
print('Inferred initiation rates (s-1)')
for initiationRate in experimentInitiationRates['initiationRateForSimulation']:
    for mutant in yfpmutants:
        outputFile = '../annotations/simulations/run14/' + \
        '%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File = open(outputFile, 'w')

        File.write("%0.4g\t%d\t%s\n" %
                   (initiationRate, defaultMrnaCopyNumber,
                    get_numerical_codon_sequence(yfp_agc[:-3])))

        File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()
    print '%0.3f' % initiationRate


Inferred initiation rates (s-1)
0.143
0.219
0.334
0.101
0.300

Create stall strength file for Run 4 CTC, CTT double mutant prediction based on fit of Run 2 to single mutant data (for fig. 5 supplement 1A-B)

Also print elongation rate fit shown in Table S3


In [16]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
    fitdatafolder + File for File in fitdatafiles
    if File.startswith('run2_fit_stallstrength_for_double_')
]

# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
leupositions = dict()
leucodon_number = 1
for position in range(0, len(yfp0), 3):
    currentcodon = yfp0[position:position + 3]
    if currentcodon == 'CTG':
        leupositions[leucodon_number] = position / 3
        leucodon_number += 1

codonnames = {23: 'CTA', 24: 'CTC', 26: 'CTT'}
modelnames = {
    'trafficjam': 'TJ',
    'selpreterm': 'SAT',
    '5primepreterm': 'CSAT',
}

for fitdatafile in fitdatafiles:
    fitdata = pd.read_table(fitdatafile)
    fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
    fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
    fitdata = fitdata.drop(['mutant'], axis=1)
    fitdata['pos'] = fitdata['pos'].apply(lambda pos: leupositions[pos])
    fitdata['codon'] = fitdata['codon'].apply(
        lambda codon: codonDict[codon.upper()])
    fitdata = fitdata[['codon', 'pos', 'stallstrength']]
    model = fitdatafile.split('_')[-1].split('.')[0]
    fitdata.to_csv(
        '../processeddata/simulations/run4_stallstrengthfits_' + model +
        '.tsv',
        sep='\t',
        index=False)
    for row in fitdata.iterrows():
        codon = row[1]['codon']
        pos = row[1]['pos']
        rate = row[1]['stallstrength']
        print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
            codonnames[codon], int(pos), modelnames[model], rate)


tRNA accommodation rate at CTA67 (TJ model)	0.046s-1
tRNA accommodation rate at CTA140 (TJ model)	0.0962s-1
tRNA accommodation rate at CTA200 (TJ model)	0.0914s-1
tRNA accommodation rate at CTA14 (TJ model)	0.0515s-1
tRNA accommodation rate at CTA45 (TJ model)	0.0667s-1
tRNA accommodation rate at CTC67 (TJ model)	0.267s-1
tRNA accommodation rate at CTC140 (TJ model)	0.286s-1
tRNA accommodation rate at CTC200 (TJ model)	0.284s-1
tRNA accommodation rate at CTC14 (TJ model)	0.137s-1
tRNA accommodation rate at CTC45 (TJ model)	0.205s-1
tRNA accommodation rate at CTT67 (TJ model)	0.139s-1
tRNA accommodation rate at CTT140 (TJ model)	0.27s-1
tRNA accommodation rate at CTT200 (TJ model)	0.246s-1
tRNA accommodation rate at CTT14 (TJ model)	0.125s-1
tRNA accommodation rate at CTT45 (TJ model)	0.172s-1
tRNA accommodation rate at CTA67 (SAT model)	0.186s-1
tRNA accommodation rate at CTA140 (SAT model)	0.464s-1
tRNA accommodation rate at CTA200 (SAT model)	0.425s-1
tRNA accommodation rate at CTA14 (SAT model)	0.231s-1
tRNA accommodation rate at CTA45 (SAT model)	0.287s-1
tRNA accommodation rate at CTC67 (SAT model)	3.06s-1
tRNA accommodation rate at CTC140 (SAT model)	4.4s-1
tRNA accommodation rate at CTC200 (SAT model)	4.6s-1
tRNA accommodation rate at CTC14 (SAT model)	0.764s-1
tRNA accommodation rate at CTC45 (SAT model)	1.59s-1
tRNA accommodation rate at CTT67 (SAT model)	0.777s-1
tRNA accommodation rate at CTT140 (SAT model)	3.94s-1
tRNA accommodation rate at CTT200 (SAT model)	2.85s-1
tRNA accommodation rate at CTT14 (SAT model)	0.649s-1
tRNA accommodation rate at CTT45 (SAT model)	1.09s-1
tRNA accommodation rate at CTA67 (CSAT model)	0.0585s-1
tRNA accommodation rate at CTA140 (CSAT model)	0.139s-1
tRNA accommodation rate at CTA200 (CSAT model)	0.13s-1
tRNA accommodation rate at CTA14 (CSAT model)	0.0649s-1
tRNA accommodation rate at CTA45 (CSAT model)	0.0875s-1
tRNA accommodation rate at CTC67 (CSAT model)	0.6s-1
tRNA accommodation rate at CTC140 (CSAT model)	0.817s-1
tRNA accommodation rate at CTC200 (CSAT model)	0.754s-1
tRNA accommodation rate at CTC14 (CSAT model)	0.193s-1
tRNA accommodation rate at CTC45 (CSAT model)	0.379s-1
tRNA accommodation rate at CTT67 (CSAT model)	0.213s-1
tRNA accommodation rate at CTT140 (CSAT model)	0.69s-1
tRNA accommodation rate at CTT200 (CSAT model)	0.582s-1
tRNA accommodation rate at CTT14 (CSAT model)	0.171s-1
tRNA accommodation rate at CTT45 (CSAT model)	0.285s-1

Create mRNA sequence files for Run 4 to predict CTC CTT double mutant YFP synthesis rates in different models (for fig. 5 supplement 1A-B)


In [17]:
mutation_locations = [
    {
        2: 'ctc'
    },
    {
        6: 'ctc'
    },
    {
        10: 'ctc'
    },
    {
        14: 'ctc'
    },
    {
        18: 'ctc'
    },
    {
        2: 'ctc',
        6: 'ctc'
    },
    {
        2: 'ctc',
        10: 'ctc'
    },
    {
        2: 'ctc',
        14: 'ctc'
    },
    {
        2: 'ctc',
        18: 'ctc'
    },
    {
        6: 'ctc',
        10: 'ctc'
    },
    {
        6: 'ctc',
        14: 'ctc'
    },
    {
        6: 'ctc',
        18: 'ctc'
    },
    {
        10: 'ctc',
        14: 'ctc'
    },
    {
        10: 'ctc',
        18: 'ctc'
    },
    {
        14: 'ctc',
        18: 'ctc'
    },
    {
        2: 'ctt'
    },
    {
        6: 'ctt'
    },
    {
        10: 'ctt'
    },
    {
        14: 'ctt'
    },
    {
        18: 'ctt'
    },
    {
        2: 'ctt',
        6: 'ctt'
    },
    {
        2: 'ctt',
        10: 'ctt'
    },
    {
        2: 'ctt',
        14: 'ctt'
    },
    {
        2: 'ctt',
        18: 'ctt'
    },
    {
        6: 'ctt',
        10: 'ctt'
    },
    {
        6: 'ctt',
        14: 'ctt'
    },
    {
        6: 'ctt',
        18: 'ctt'
    },
    {
        10: 'ctt',
        14: 'ctt'
    },
    {
        10: 'ctt',
        18: 'ctt'
    },
    {
        14: 'ctt',
        18: 'ctt'
    },
]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for location, codon in mutant.items()
    ])
    yfpmutants[key] = list(yfp0)
    leucodon_number = 0
    for position in range(0, len(yfp0), 3):
        currentcodon = yfp0[position:position + 3]
        # proceed only if the codon is a Leu codon (which are all CTG in yfp0)
        if currentcodon not in ['CTG']:
            continue
        leucodon_number += 1
        for location in mutant.keys():
            if leucodon_number == location:
                yfpmutants[key][position:position + 3] = mutant[
                    location].upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

defaultMrnaCopyNumber = 1  # per cell
defaultInitationRate = 0.3  # s-1, This is the median initiation rate

listOfInitiationRates = [defaultInitationRate]

for initiationRate in listOfInitiationRates:
    for mutant in yfpmutants:
        outputFile = '../annotations/simulations/run4/' + \
        '%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File = open(outputFile, 'w')

        File.write("%0.4g\t%d\t%s\n" %
                   (initiationRate, defaultMrnaCopyNumber,
                    get_numerical_codon_sequence(yfp0[:-3])))

        File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()

Create stall strength file for Run 16 CTA multiple mutant prediction based on fit of Run 2 to single mutant data (for fig. 5)

Also print elongation rate fit shown in Table S3


In [19]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
    fitdatafolder + File for File in fitdatafiles
    if File.startswith('run2_fit_stallstrength_for_leucine_multiple_')
]

# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
leupositions = dict()
leucodon_number = 1
for position in range(0, len(yfp0), 3):
    currentcodon = yfp0[position:position + 3]
    if currentcodon == 'CTG':
        leupositions[leucodon_number] = position / 3
        leucodon_number += 1

codonnames = {23: 'CTA', 24: 'CTC', 26: 'CTT'}
modelnames = {
    'trafficjam': 'TJ',
    'selpreterm': 'SAT',
    '5primepreterm': 'CSAT',
}

for fitdatafile in fitdatafiles:
    fitdata = pd.read_table(fitdatafile)
    fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
    fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
    fitdata = fitdata.drop(['mutant'], axis=1)
    fitdata['pos'] = fitdata['pos'].apply(lambda pos: leupositions[pos])
    fitdata['codon'] = fitdata['codon'].apply(
        lambda codon: codonDict[codon.upper()])
    fitdata = fitdata[['codon', 'pos', 'stallstrength']]
    model = fitdatafile.split('_')[-1].split('.')[0]
    fitdata.to_csv(
        '../processeddata/simulations/run16_stallstrengthfits_' + model +
        '.tsv',
        sep='\t',
        index=False)
    for row in fitdata.iterrows():
        codon = row[1]['codon']
        pos = row[1]['pos']
        rate = row[1]['stallstrength']
        print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
            codonnames[codon], int(pos), modelnames[model], rate)


tRNA accommodation rate at CTA67 (SAT model)	0.26s-1
tRNA accommodation rate at CTA140 (SAT model)	0.513s-1
tRNA accommodation rate at CTA200 (SAT model)	0.404s-1
tRNA accommodation rate at CTA14 (SAT model)	0.266s-1
tRNA accommodation rate at CTA45 (SAT model)	0.376s-1
tRNA accommodation rate at CTA67 (TJ model)	0.0617s-1
tRNA accommodation rate at CTA140 (TJ model)	0.1s-1
tRNA accommodation rate at CTA200 (TJ model)	0.0876s-1
tRNA accommodation rate at CTA14 (TJ model)	0.0567s-1
tRNA accommodation rate at CTA45 (TJ model)	0.0837s-1
tRNA accommodation rate at CTA67 (CSAT model)	0.0769s-1
tRNA accommodation rate at CTA140 (CSAT model)	0.147s-1
tRNA accommodation rate at CTA200 (CSAT model)	0.123s-1
tRNA accommodation rate at CTA14 (CSAT model)	0.0726s-1
tRNA accommodation rate at CTA45 (CSAT model)	0.113s-1

Create mRNA sequence files for Run 16 to predict multiple mutant YFP synthesis rates in different models (for fig. 5)


In [21]:
mutation_locations = [
    {
        2: 'cta'
    },
    {
        6: 'cta'
    },
    {
        10: 'cta'
    },
    {
        14: 'cta'
    },
    {
        18: 'cta'
    },
    {
        2: 'cta',
        6: 'cta'
    },
    {
        2: 'cta',
        10: 'cta'
    },
    {
        2: 'cta',
        14: 'cta'
    },
    {
        2: 'cta',
        18: 'cta'
    },
    {
        6: 'cta',
        10: 'cta'
    },
    {
        6: 'cta',
        14: 'cta'
    },
    {
        6: 'cta',
        18: 'cta'
    },
    {
        10: 'cta',
        14: 'cta'
    },
    {
        10: 'cta',
        18: 'cta'
    },
    {
        14: 'cta',
        18: 'cta'
    },
    {
        6: 'cta',
        10: 'cta',
        14: 'cta'
    },
    {
        6: 'cta',
        10: 'cta',
        18: 'cta'
    },
    {
        6: 'cta',
        10: 'cta',
        14: 'cta',
        18: 'cta'
    },
]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for location, codon in mutant.items()
    ])
    yfpmutants[key] = list(yfp0)
    leucodon_number = 0
    for position in range(0, len(yfp0), 3):
        currentcodon = yfp0[position:position + 3]
        # proceed only if the codon is a Leu codon (which are all CTG in yfp0)
        if currentcodon not in ['CTG']:
            continue
        leucodon_number += 1
        for location in mutant.keys():
            if leucodon_number == location:
                yfpmutants[key][position:position + 3] = mutant[
                    location].upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

defaultMrnaCopyNumber = 1  # per cell
defaultInitationRate = 0.3  # s-1, This is the median initiation rate

listOfInitiationRates = [defaultInitationRate]

for initiationRate in listOfInitiationRates:
    for mutant in yfpmutants:
        outputFile = '../annotations/simulations/run16/' + \
        '%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File = open(outputFile, 'w')

        File.write("%0.4g\t%d\t%s\n" %
                   (initiationRate, defaultMrnaCopyNumber,
                    get_numerical_codon_sequence(yfp0[:-3])))

        File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()

Create stall strength file for Run 15 serine double mutant prediction based on fit of Run 13 to single mutant data (for fig. 5 supplement 1C)

Also print elongation rate fit shown in Table S3


In [22]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
    fitdatafolder + File for File in fitdatafiles
    if File.startswith('run13_serine_fit_stallstrength_for_double_')
]

# find the location of all serine codons to convert ser codon serial number
# to absolute position along yfp in codon units for simulation
serpositions = dict()
sercodon_number = 1
for position in range(0, len(yfp_agc), 3):
    currentcodon = yfp_agc[position:position + 3]
    if currentcodon == 'AGC':
        serpositions[sercodon_number] = position / 3
        sercodon_number += 1

codonnames = {46: 'TCG'}
modelnames = {
    'trafficjam': 'TJ',
    'selpreterm': 'SAT',
    '5primepreterm': 'CSAT',
}

for fitdatafile in fitdatafiles:
    fitdata = pd.read_table(fitdatafile)
    fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
    fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
    fitdata = fitdata.drop(['mutant'], axis=1)
    fitdata['pos'] = fitdata['pos'].apply(lambda pos: serpositions[pos])
    fitdata['codon'] = fitdata['codon'].apply(
        lambda codon: codonDict[codon.upper()])
    fitdata = fitdata[['codon', 'pos', 'stallstrength']]
    model = fitdatafile.split('_')[-1].split('.')[0]
    fitdata.to_csv(
        '../processeddata/simulations/run15_stallstrengthfits_' + model +
        '.tsv',
        sep='\t',
        index=False)
    for row in fitdata.iterrows():
        codon = row[1]['codon']
        pos = row[1]['pos']
        rate = row[1]['stallstrength']
        print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
            codonnames[codon], int(pos), modelnames[model], rate)


tRNA accommodation rate at TCG27 (TJ model)	0.263s-1
tRNA accommodation rate at TCG29 (TJ model)	0.266s-1
tRNA accommodation rate at TCG85 (TJ model)	0.281s-1
tRNA accommodation rate at TCG146 (TJ model)	0.273s-1
tRNA accommodation rate at TCG201 (TJ model)	0.252s-1
tRNA accommodation rate at TCG204 (TJ model)	0.29s-1
tRNA accommodation rate at TCG27 (CSAT model)	0.488s-1
tRNA accommodation rate at TCG29 (CSAT model)	0.509s-1
tRNA accommodation rate at TCG85 (CSAT model)	0.725s-1
tRNA accommodation rate at TCG146 (CSAT model)	0.767s-1
tRNA accommodation rate at TCG201 (CSAT model)	0.601s-1
tRNA accommodation rate at TCG204 (CSAT model)	0.935s-1
tRNA accommodation rate at TCG27 (SAT model)	2.19s-1
tRNA accommodation rate at TCG29 (SAT model)	2.13s-1
tRNA accommodation rate at TCG85 (SAT model)	3.82s-1
tRNA accommodation rate at TCG146 (SAT model)	4.59s-1
tRNA accommodation rate at TCG201 (SAT model)	3.22s-1
tRNA accommodation rate at TCG204 (SAT model)	5.15s-1

Create mRNA sequence files for Run 15 to predict serine double mutant YFP synthesis rates in different models (for fig. 5 supplement 1C)


In [23]:
mutation_locations = [
    {
        2: 'tcg'
    },
    {
        3: 'tcg'
    },
    {
        4: 'tcg'
    },
    {
        5: 'tcg'
    },
    {
        6: 'tcg'
    },
    {
        7: 'tcg'
    },
    {
        2: 'tcg',
        5: 'tcg'
    },
    {
        2: 'tcg',
        6: 'tcg'
    },
    {
        2: 'tcg',
        7: 'tcg'
    },
    {
        3: 'tcg',
        5: 'tcg'
    },
    {
        3: 'tcg',
        6: 'tcg'
    },
    {
        3: 'tcg',
        7: 'tcg'
    },
    {
        4: 'tcg',
        5: 'tcg'
    },
    {
        4: 'tcg',
        6: 'tcg'
    },
    {
        4: 'tcg',
        7: 'tcg'
    },
]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for location, codon in mutant.items()
    ])
    yfpmutants[key] = list(yfp_agc)
    sercodon_number = 0
    for position in range(0, len(yfp_agc), 3):
        currentcodon = yfp_agc[position:position + 3]
        # proceed only if the codon is a Ser codon (which are all AGC in yfp_agc)
        if currentcodon not in ['AGC']:
            continue
        sercodon_number += 1
        for location in mutant.keys():
            if sercodon_number == location:
                yfpmutants[key][position:position + 3] = mutant[
                    location].upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

defaultMrnaCopyNumber = 1  # per cell
defaultInitationRate = 0.3  # s-1, This is the median initiation rate

listOfInitiationRates = [defaultInitationRate]

for initiationRate in listOfInitiationRates:
    for mutant in yfpmutants:
        outputFile = '../annotations/simulations/run15/' + \
        '%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File = open(outputFile, 'w')

        File.write("%0.4g\t%d\t%s\n" %
                   (initiationRate, defaultMrnaCopyNumber,
                    get_numerical_codon_sequence(yfp_agc[:-3])))

        File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()

Create stall strength file for Run 5 CTC distance mutant prediction based on fit of Run 2 to single mutant data (for fig. 6 supplement 1)

Also print elongation rate fit shown in Table S5


In [27]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
    fitdatafolder + File for File in fitdatafiles
    if File.startswith('run2_fit_stallstrength_for_ctc_distance_')
]

# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
leupositions = dict()
leucodon_number = 1
for position in range(0, len(yfp0), 3):
    currentcodon = yfp0[position:position + 3]
    if currentcodon == 'CTG':
        leupositions[leucodon_number] = position / 3
        leucodon_number += 1

codonnames = {23: 'CTA', 24: 'CTC', 26: 'CTT'}
modelnames = {
    'trafficjam': 'TJ',
    'selpreterm': 'SAT',
    '5primepreterm': 'CSAT',
}

for fitdatafile in fitdatafiles:
    fitdata = pd.read_table(fitdatafile)
    fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
    fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
    fitdata = fitdata.drop(['mutant'], axis=1)
    fitdata['pos'] = fitdata['pos'].apply(lambda pos: leupositions[pos])
    fitdata['codon'] = fitdata['codon'].apply(
        lambda codon: codonDict[codon.upper()])
    fitdata = fitdata[['codon', 'pos', 'stallstrength']]
    model = fitdatafile.split('_')[-1].split('.')[0]
    fitdata.to_csv(
        '../processeddata/simulations/run5_stallstrengthfits_' + model +
        '.tsv',
        sep='\t',
        index=False)
    for row in fitdata.iterrows():
        codon = row[1]['codon']
        pos = row[1]['pos']
        rate = row[1]['stallstrength']
        print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
            codonnames[codon], int(pos), modelnames[model], rate)


tRNA accommodation rate at CTC67 (TJ model)	0.226s-1
tRNA accommodation rate at CTC118 (TJ model)	0.199s-1
tRNA accommodation rate at CTC124 (TJ model)	0.205s-1
tRNA accommodation rate at CTC136 (TJ model)	0.236s-1
tRNA accommodation rate at CTC140 (TJ model)	0.273s-1
tRNA accommodation rate at CTC59 (TJ model)	0.228s-1
tRNA accommodation rate at CTC63 (TJ model)	0.214s-1
tRNA accommodation rate at CTC67 (CSAT model)	0.468s-1
tRNA accommodation rate at CTC118 (CSAT model)	0.387s-1
tRNA accommodation rate at CTC124 (CSAT model)	0.394s-1
tRNA accommodation rate at CTC136 (CSAT model)	0.512s-1
tRNA accommodation rate at CTC140 (CSAT model)	0.777s-1
tRNA accommodation rate at CTC59 (CSAT model)	0.483s-1
tRNA accommodation rate at CTC63 (CSAT model)	0.433s-1
tRNA accommodation rate at CTC67 (SAT model)	2.17s-1
tRNA accommodation rate at CTC118 (SAT model)	1.71s-1
tRNA accommodation rate at CTC124 (SAT model)	1.67s-1
tRNA accommodation rate at CTC136 (SAT model)	2.51s-1
tRNA accommodation rate at CTC140 (SAT model)	4.26s-1
tRNA accommodation rate at CTC59 (SAT model)	2.15s-1
tRNA accommodation rate at CTC63 (SAT model)	1.85s-1

Create mRNA sequence files for Run 5 to predict CTC distance mutant YFP synthesis rates in different models (for fig. 6 supplement 1)


In [28]:
mutation_locations = [
    {
        8: 'ctc'
    },
    {
        9: 'ctc'
    },
    {
        10: 'ctc'
    },
    {
        11: 'ctc'
    },
    {
        12: 'ctc'
    },
    {
        13: 'ctc'
    },
    {
        14: 'ctc'
    },
    {
        8: 'ctc',
        9: 'ctc'
    },
    {
        8: 'ctc',
        10: 'ctc'
    },
    {
        8: 'ctc',
        11: 'ctc'
    },
    {
        8: 'ctc',
        12: 'ctc'
    },
    {
        8: 'ctc',
        13: 'ctc'
    },
    {
        8: 'ctc',
        14: 'ctc'
    },
    {
        9: 'ctc',
        10: 'ctc'
    },
    {
        9: 'ctc',
        11: 'ctc'
    },
    {
        9: 'ctc',
        12: 'ctc'
    },
    {
        9: 'ctc',
        13: 'ctc'
    },
    {
        9: 'ctc',
        14: 'ctc'
    },
    {
        11: 'ctc',
        12: 'ctc'
    },
    {
        13: 'ctc',
        14: 'ctc'
    },
]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for location, codon in mutant.items()
    ])
    yfpmutants[key] = list(yfp0)
    leucodon_number = 0
    for position in range(0, len(yfp0), 3):
        currentcodon = yfp0[position:position + 3]
        # proceed only if the codon is a Leu codon (which are all CTG in yfp0)
        if currentcodon not in ['CTG']:
            continue
        leucodon_number += 1
        for location in mutant.keys():
            if leucodon_number == location:
                yfpmutants[key][position:position + 3] = mutant[
                    location].upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

defaultMrnaCopyNumber = 1  # per cell
defaultInitationRate = 0.3  # s-1, This is the median initiation rate

listOfInitiationRates = [defaultInitationRate]

for initiationRate in listOfInitiationRates:
    for mutant in yfpmutants:
        outputFile = '../annotations/simulations/run5/' + \
        '%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File = open(outputFile, 'w')

        File.write("%0.4g\t%d\t%s\n" %
                   (initiationRate, defaultMrnaCopyNumber,
                    get_numerical_codon_sequence(yfp0[:-3])))

        File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()

Create stall strength file for Runs 6,7,8 to systematically vary initiation rate, number of stall sites and distance between stall sites in different models (for Fig. 3)


In [29]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
    fitdatafolder + File for File in fitdatafiles
    if File.startswith('run4_stallstrengthfits_')
]

for fitdatafile in fitdatafiles:
    fitdata = pd.read_table(fitdatafile)
    fitdata = fitdata[['codon', 'pos', 'stallstrength']]
    fitdata = fitdata[fitdata['codon'] == 23]  # CTA codon
    # Use CTA 6 (Leu45) values for all codons
    commonstallstrength = fitdata[fitdata['pos'] == 45].reset_index().ix[0][
        'stallstrength']
    newdataframe = pd.DataFrame(
        [[23, loop, commonstallstrength] for loop in range(1, 238)],
        columns=fitdata.columns)
    model = fitdatafile.split('_')[-1].split('.')[0]
    newdataframe.to_csv(
        '../processeddata/simulations/runs678_stallstrengthfits_' + model +
        '.tsv',
        sep='\t',
        index=False)

Create mRNA sequence files for Run 6 to vary initiation rate in different models (for Fig. 3A)


In [31]:
mutation_locations = [{6: 'cta'}, ]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for location, codon in mutant.items()
    ])
    yfpmutants[key] = list(yfp0)
    leucodon_number = 0
    for position in range(0, len(yfp0), 3):
        currentcodon = yfp0[position:position + 3]
        # proceed only if the codon is a Leu codon (which are all CTG in yfp0)
        if currentcodon not in ['CTG']:
            continue
        leucodon_number += 1
        for location in mutant.keys():
            if leucodon_number == location:
                yfpmutants[key][position:position + 3] = mutant[
                    location].upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

defaultMrnaCopyNumber = 1  # per cell
defaultInitationRate = 0.3  # s-1, This is the median initiation rate

listOfInitiationRates = defaultInitationRate * 2**np.array(
    range(-4, 4, 1), dtype=np.float64)

for initiationRate in listOfInitiationRates:
    for mutant in yfpmutants:
        outputFile = '../annotations/simulations/run6/' + \
        '%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File = open(outputFile, 'w')

        File.write("%0.4g\t%d\t%s\n" %
                   (initiationRate, defaultMrnaCopyNumber,
                    get_numerical_codon_sequence(yfp0[:-3])))

        File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()

Create mRNA sequence files for Run 7 to vary number of stall sites in different models (for Fig. 3B)


In [32]:
mutation_locations = [
    {
        2: 'cta'
    },
    {
        6: 'cta'
    },
    {
        10: 'cta'
    },
    {
        14: 'cta'
    },
    {
        18: 'cta'
    },
    {
        2: 'cta',
        6: 'cta'
    },
    {
        2: 'cta',
        6: 'cta',
        10: 'cta'
    },
    {
        2: 'cta',
        6: 'cta',
        10: 'cta',
        14: 'cta'
    },
    {
        2: 'cta',
        6: 'cta',
        10: 'cta',
        14: 'cta',
        18: 'cta'
    },
]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for location, codon in mutant.items()
    ])
    yfpmutants[key] = list(yfp0)
    leucodon_number = 0
    for position in range(0, len(yfp0), 3):
        currentcodon = yfp0[position:position + 3]
        # proceed only if the codon is a Leu codon (which are all CTG in yfp0)
        if currentcodon not in ['CTG']:
            continue
        leucodon_number += 1
        for location in mutant.keys():
            if leucodon_number == location:
                yfpmutants[key][position:position + 3] = mutant[
                    location].upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

defaultMrnaCopyNumber = 1  # per cell
defaultInitationRate = 0.3  # s-1, This is the median initiation rate

listOfInitiationRates = [defaultInitationRate]

for initiationRate in listOfInitiationRates:
    for mutant in yfpmutants:
        outputFile = '../annotations/simulations/run7/' + \
        '%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File = open(outputFile, 'w')

        File.write("%0.4g\t%d\t%s\n" %
                   (initiationRate, defaultMrnaCopyNumber,
                    get_numerical_codon_sequence(yfp0[:-3])))

        File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()

Create mRNA sequence files for Run 8 to vary distance between stall sites in different models (for Fig. 3C)


In [33]:
mutation_locations = [{loop: 'cta'} for loop in range(1, 238)]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for location, codon in mutant.items()
    ])
    yfpmutants[key] = list(yfp0)
    codon_number = 0
    for position in range(0, len(yfp0), 3):
        currentcodon = yfp0[position:position + 3]
        codon_number += 1
        for location in mutant.keys():
            if codon_number == location:
                yfpmutants[key][position:position + 3] = mutant[
                    location].upper()
        yfpmutants
        if position == 177:
            yfpmutants[key][position:position + 3] = 'CTA'
    yfpmutants[key] = ''.join(yfpmutants[key])

defaultMrnaCopyNumber = 1  # per cell
defaultInitationRate = 0.3  # s-1, This is the median initiation rate

listOfInitiationRates = [defaultInitationRate]

for initiationRate in listOfInitiationRates:
    for mutant in yfpmutants:
        outputFile = '../annotations/simulations/run8/' + \
        '%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File = open(outputFile, 'w')

        File.write("%0.4g\t%d\t%s\n" %
                   (initiationRate, defaultMrnaCopyNumber,
                    get_numerical_codon_sequence(yfp0[:-3])))

        File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()

Create stall strength file for Run 11 CTA distance mutant prediction based on fit of Run 2 to single mutant data (for Fig. 6)

Also print elongation rate fit shown in Table S5


In [35]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
    fitdatafolder + File for File in fitdatafiles
    if File.startswith('run2_fit_stallstrength_for_cta_distance_')
]

# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
leupositions = dict()
leucodon_number = 1
for position in range(0, len(yfp0), 3):
    currentcodon = yfp0[position:position + 3]
    if currentcodon == 'CTG':
        leupositions[leucodon_number] = position / 3
        leucodon_number += 1

# extracted from simulation run 1
trnaaccommodationrates = {23: 0.132824, 24: 0.398473, 26: 0.255022}
codonnames = {23: 'CTA', 24: 'CTC', 26: 'CTT'}
modelnames = {
    'trafficjam': 'TJ',
    'selpreterm': 'SAT',
    '5primepreterm': 'CSAT',
}

for fitdatafile in fitdatafiles:
    fitdata = pd.read_table(fitdatafile)
    fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
    fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
    fitdata = fitdata.drop(['mutant'], axis=1)
    fitdata['pos'] = fitdata['pos'].apply(lambda pos: leupositions[pos])
    fitdata['codon'] = fitdata['codon'].apply(
        lambda codon: codonDict[codon.upper()])
    fitdata = fitdata[['codon', 'pos', 'stallstrength']]
    model = fitdatafile.split('_')[-1].split('.')[0]
    fitdata.to_csv(
        '../processeddata/simulations/run11_stallstrengthfits_' + model +
        '.tsv',
        sep='\t',
        index=False)
    fitdata['trnaaccommodationrate'] = fitdata['codon'].apply(
        lambda x: trnaaccommodationrates[x])
    fitdata['trnaaccommodationrate'] = fitdata[
        'trnaaccommodationrate'] * fitdata['stallstrength']
    for row in fitdata.iterrows():
        codon = row[1]['codon']
        pos = row[1]['pos']
        rate = row[1]['trnaaccommodationrate']
        print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
            codonnames[codon], int(pos), modelnames[model], rate)


tRNA accommodation rate at CTA67 (CSAT model)	0.0103s-1
tRNA accommodation rate at CTA118 (CSAT model)	0.0146s-1
tRNA accommodation rate at CTA124 (CSAT model)	0.0158s-1
tRNA accommodation rate at CTA136 (CSAT model)	0.0159s-1
tRNA accommodation rate at CTA140 (CSAT model)	0.0183s-1
tRNA accommodation rate at CTA59 (CSAT model)	0.0183s-1
tRNA accommodation rate at CTA63 (CSAT model)	0.012s-1
tRNA accommodation rate at CTA67 (TJ model)	0.00825s-1
tRNA accommodation rate at CTA118 (TJ model)	0.0112s-1
tRNA accommodation rate at CTA124 (TJ model)	0.0113s-1
tRNA accommodation rate at CTA136 (TJ model)	0.0116s-1
tRNA accommodation rate at CTA140 (TJ model)	0.0127s-1
tRNA accommodation rate at CTA59 (TJ model)	0.013s-1
tRNA accommodation rate at CTA63 (TJ model)	0.00923s-1
tRNA accommodation rate at CTA67 (SAT model)	0.0348s-1
tRNA accommodation rate at CTA118 (SAT model)	0.0463s-1
tRNA accommodation rate at CTA124 (SAT model)	0.0529s-1
tRNA accommodation rate at CTA136 (SAT model)	0.0517s-1
tRNA accommodation rate at CTA140 (SAT model)	0.0607s-1
tRNA accommodation rate at CTA59 (SAT model)	0.0672s-1
tRNA accommodation rate at CTA63 (SAT model)	0.0408s-1

Create mRNA sequence files for Run 11 to predict CTA distance mutant YFP synthesis rates in different models (for Fig. 6)


In [36]:
mutation_locations = [
    {
        8: 'cta'
    },
    {
        9: 'cta'
    },
    {
        10: 'cta'
    },
    {
        11: 'cta'
    },
    {
        12: 'cta'
    },
    {
        13: 'cta'
    },
    {
        14: 'cta'
    },
    {
        8: 'cta',
        9: 'cta'
    },
    {
        8: 'cta',
        10: 'cta'
    },
    {
        8: 'cta',
        11: 'cta'
    },
    {
        8: 'cta',
        12: 'cta'
    },
    {
        8: 'cta',
        13: 'cta'
    },
    {
        8: 'cta',
        14: 'cta'
    },
    {
        9: 'cta',
        10: 'cta'
    },
    {
        9: 'cta',
        11: 'cta'
    },
    {
        9: 'cta',
        12: 'cta'
    },
    {
        9: 'cta',
        13: 'cta'
    },
    {
        9: 'cta',
        14: 'cta'
    },
    {
        11: 'cta',
        12: 'cta'
    },
    {
        13: 'cta',
        14: 'cta'
    },
]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for location, codon in mutant.items()
    ])
    yfpmutants[key] = list(yfp0)
    leucodon_number = 0
    for position in range(0, len(yfp0), 3):
        currentcodon = yfp0[position:position + 3]
        # proceed only if the codon is a Leu codon (which are all CTG in yfp0)
        if currentcodon not in ['CTG']:
            continue
        leucodon_number += 1
        for location in mutant.keys():
            if leucodon_number == location:
                yfpmutants[key][position:position + 3] = mutant[
                    location].upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

defaultMrnaCopyNumber = 1  # per cell
defaultInitationRate = 0.3  # s-1, This is the median initiation rate

listOfInitiationRates = [defaultInitationRate]

for initiationRate in listOfInitiationRates:
    for mutant in yfpmutants:
        outputFile = '../annotations/simulations/run11/' + \
        '%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File = open(outputFile, 'w')

        File.write("%0.4g\t%d\t%s\n" %
                   (initiationRate, defaultMrnaCopyNumber,
                    get_numerical_codon_sequence(yfp0[:-3])))

        File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()