Globals


In [29]:
# sequence input and output
import Bio.SeqIO
# provides dictionary of codon names
from Bio.SeqUtils.CodonUsage import SynonymousCodons
# for converting 3 letter amino acid code to 1 letter code
from Bio.SeqUtils import seq1
# for fast access of fasta files
import pyfaidx
# for parsing GFF3 files
import HTSeq
# for tab data processing
import pandas as pd
# numeric and matrix library
import numpy as np
# shell utilities
import shutil
# for submitting shell commands
import subprocess as sp
# for string matching
import re

# create a dictionary of codon names and number (arranged alphabetically by aa)
codonnum = 0
codonDict = dict()
for aa in sorted(SynonymousCodons, key=lambda aa3: seq1(aa3)):
    if aa == 'STOP':
        continue
    for codon in sorted(SynonymousCodons[aa]):
        # these two codons are numbered out of order for consistent notation
        # with Subramaniam et al. Cell 2014
        if codon in ['AGC']:
            codonDict['AGC'] = 59
        elif codon in ['AGT']:
            codonDict['AGT'] = 60
        else:
            codonDict[codon] = codonnum
            codonnum += 1

# to convert 3 letter codons to numbers between 0 and 63


def get_numerical_codon_sequence(seq):
    numseq = list()
    for pos in range(0, len(seq) - 3, 3):
        try:
            numseq.append(str(codonDict[seq[pos:pos + 3]]))
        except KeyError:
            numseq.append('-1')
            raise
            return None
    return ' '.join(numseq)


# starting yfp sequence for leucine starvation expts
yfp0 = Bio.SeqIO.read('../annotations/simulations/yfp0.fa', 'fasta')
yfp0 = str(yfp0.seq)

# starting sequence for serine starvation expts
# all ser codons in yfp0 were AGC
yfp_agc = list(yfp0)
for pos in range(0, len(yfp0), 3):
    current_codon = yfp0[pos:pos + 3]
    if current_codon in SynonymousCodons['SER']:
        yfp_agc[pos:pos + 3] = 'AGC'
yfp_agc = ''.join(yfp_agc)

Get codon sequence of all E. coli genes


In [19]:
genome = '../annotations/simulations/NC_000913.fna'
annotations = '../annotations/simulations/NC_000913.gff'

# read genome as a python dictionary
genome = pyfaidx.Fasta(genome)

annotationrecords = HTSeq.GFF_Reader(annotations)

annotationdf = dict()
for record in annotationrecords:
    if record.type != 'CDS':
        continue
    if 'pseudo' in record.attr.keys() and record.attr['pseudo'] == 'true':
        continue
    # fdnG, fdoG, fdhF have TGA encoded selenocysteine codons
    if record.attr['gene'] in ['fdnG', 'fdoG', 'fdhF']:
        continue
    sequence = str(genome[record.iv.chrom][record.iv.start:record.iv.end])
    if record.iv.strand == '-':
        sequence = str(HTSeq.Sequence(sequence).get_reverse_complement())

    annotationdf[record.attr['ID']] = {
        'iv': record.iv,
        'gene': record.attr['gene'],
        'product': record.attr['product'],
        'start': record.iv.start,
        'end': record.iv.end,
        'strand': record.iv.strand,
        'sequence': sequence,
    }
annotationdf = pd.DataFrame.from_dict(annotationdf, orient='index')
annotationdf['length'] = annotationdf['iv'].apply(lambda x: x.length)

annotationdf['numsequence'] = annotationdf['sequence'].apply(
    get_numerical_codon_sequence)

Calculate mRNA copy numbers and translation initiation rate for all E. coli mRNAs


In [22]:
# Number of ribosomes (molecules per cell), 37C, 20 min doubling
# Bremer 2008
nRibo = 73000

# Number of mrna (nt per cell), 37C, 20 min doubling
# Bremer 2008
nMrna = 4.3e6

# Bound fraction of ribosomes, 37C, 20 min doubling
# Bremer 2008
boundRiboFract = 0.85

# Mean elongation rate (s^-1), 37C, 20 min doubling
# Bremer 2008
meanElongationRate = 22

rawdata = pd.read_table(
    '../annotations/simulations/ecoli.mrna.concn.and.te.li.2014.csv',
    sep=',', )

rawdata = rawdata.dropna().set_index('Gene')
rawdata['mRNA level (RPKM)'] = rawdata['mRNA level (RPKM)'].apply(int)
combined = annotationdf.join(rawdata, on='gene', how='inner')

# the total number of mrnaribonucleotides in the cell should be equal to
# the number of each mrna species multiplied by its length

combined['rpkm_nt'] = combined['mRNA level (RPKM)'] * combined['length']

mrnaNormalization = combined['rpkm_nt'].sum() / nMrna

combined['mrnaCopyNumber'] = combined['mRNA level (RPKM)'].apply(
    lambda x: int(np.round(x / mrnaNormalization)))

combined = combined[combined.mrnaCopyNumber > 0]

# basic idea for TE normalization below is the equality:
# total number of mrna bound ribosomes in the cell =
# sum over all mrnas( initation rate * mrna copy number
#                                       * length / elongation rate)

combined['boundribosomes'] = (combined['length'] *
                              combined['Translation efficiency (AU)'] *
                              combined['mrnaCopyNumber'])

initiationRateNormalization = (combined['boundribosomes'].sum() /
                               (nRibo * boundRiboFract * meanElongationRate))

combined['initiationRate'] = (
    combined['Translation efficiency (AU)'] /
    initiationRateNormalization).apply(lambda x: np.round(x, 4))

# Write initiation rate, mRNA copy number and the codon sequence to a file
# for using in the simulation

temp = combined[['initiationRate', 'mrnaCopyNumber', 'numsequence'
                 ]].sort_values(
                     by=['mrnaCopyNumber'], ascending=False)
temp.to_csv(
    '../annotations/simulations/run1/run1_ecoli_mrnas.csv',
    sep='\t',
    index=False,
    header=None)

Prepare tRNA input files

  1. tRNA concentration
  2. tRNA-codon cognate pairs
  3. tRNA-codon cognate weights
  4. tRNA aminoacylation rate constants

In [23]:
max_aminoacylation_rate = 2.0e10

# Read tRNA concentrations, anticodon, cognate codons
# from Table 2 in Dong et al. J. Mol. Biol. 1996.
# Gly1 and Ile2 entries were manually deleted from the table
# since these were not measured in this study.
# Sec tRNA was deleted since we are not considering
# this non-canonical translation.
# A typo for Val1 anticodon was corrected (TAG → TAC).
lines = open('../annotations/simulations/ecoli.trna.abundance.dong1996.txt'
             ).read().splitlines()
trnas = dict()
# read every 6th line until last but one.
for trnaindex, trna in enumerate(lines[0:-1:6]):
    trnas[trna] = dict()
    trnas[trna]['anticodon'] = lines[trnaindex * 6 + 1].strip().replace('U',
                                                                        'T')
    trnas[trna]['codons'] = [
        codon.strip().replace('U', 'T')
        for codon in lines[trnaindex * 6 + 2].split(',')
    ]
    trnas[trna]['mean_concn'] = int(lines[trnaindex * 6 + 4].split('(')[0])
    trnas[trna]['std_concn'] = int(
        lines[trnaindex * 6 + 4].split('(')[1].split(')')[0].strip())
    trnas[trna]['fraction'] = float(lines[trnaindex * 6 + 5].strip())
    trnas[trna]['aminoacid'] = trna[:3].lower()

# Ile2 was not measured in Dong 1996. So assigning it to Ile1.
trnas['Ile1']['codons'].append('ATA')

# Calculate the factors for codon-tRNA interaction
# that multiply the kcat/KM for codon-tRNA reading:
# w = 1 for Watson-Crick base pairs,
# w = 0.64 for purine-pyrimidine mismatch,
# w = 0.61 for purine-purine mismatch. (Weights based on Shah 2013).

# If two trnas have the same anticodon
# they are considered as a single trna species in the simulation.
concentrations = dict()
cognate_pairings = dict()
weights = dict()

for trna in trnas:
    cognatecodons = trnas[trna]['codons']
    if trnas[trna]['anticodon'] not in concentrations:
        concentrations[trnas[trna]['anticodon']] = trnas[trna]['mean_concn']
    else:
        concentrations[trnas[trna]['anticodon']] += trnas[trna]['mean_concn']

    for codon in cognatecodons:
        # pairing at the wobble position.
        pairing = codon[-1:] + trnas[trna]['anticodon'][:1]

        watson_crick_pairs = ['GC', 'AT', 'TA', 'CG']
        pur_pyr_pairs = ['GT', 'TG', 'CA']
        pur_pur_pairs = ['TT', 'AA']

        if pairing in watson_crick_pairs:
            weight = 1
        elif pairing in pur_pyr_pairs:
            weight = 0.64
        else:
            weight = 0.61

        if codon not in cognate_pairings:
            cognate_pairings[codon] = [trnas[trna]['anticodon']]
            weights[codon] = [weight]
        elif trnas[trna]['anticodon'] not in cognate_pairings[codon]:
            cognate_pairings[codon].append(trnas[trna]['anticodon'])
            weights[codon].append(weight)

# trnaindices are sorted alphabetically.

# concentrations file.
outputFile = open(
    "../annotations/simulations/wholecell.trna.concentrations.tsv", "write")
outputFile.write('trnaindex\tanticodon\tmolpercell\taminoacid\n')
# trnaindex 0 is a dummy trna without any anticodon.
outputFile.write('0\tNNN\t0\txxx\n')
trnaindices = dict()
for trnaindex, anticodon in enumerate(sorted(concentrations)):
    aminoacid = [
        trnas[trna]['aminoacid'] for trna in trnas
        if trnas[trna]['anticodon'] == anticodon
    ][0]
    trnaindices[anticodon] = trnaindex + 1
    outputFile.write(
        str(trnaindex + 1) + '\t' + anticodon + '\t' + '%d' %
        concentrations[anticodon] + '\t' + aminoacid + '\n')
outputFile.close()

# aminoacylation rates file.
outputFile = open(
    "../annotations/simulations/wholecell.trna.aminoacylation.rate.tsv",
    "write")
outputFile.write('trnaindex\tanticodon\tkcatBykm\n')
outputFile.write('0\tNNN\t0\n')
trnaindices = dict()
for trnaindex, anticodon in enumerate(sorted(concentrations)):
    trnaindices[anticodon] = trnaindex + 1
    outputFile.write(
        str(trnaindex + 1) + '\t' + anticodon + '\t' + '%d' %
        max_aminoacylation_rate + '\n')
outputFile.close()

# cognate pairings file.
outputFile = open("../annotations/simulations/wholecell.cognate.pairs.tsv",
                  "write")
outputFile.write('codonindex\tcodon\ttrnaindex1\ttrnaindex2\n')
for codon in sorted(codonDict, key=lambda x: codonDict[x]):
    if len(cognate_pairings[codon]) == 1:
        outputFile.write(
            str(codonDict[codon]) + '\t' + codon + '\t' + '%d' %
            trnaindices[cognate_pairings[codon][0]] + '\t0\n')
    elif len(cognate_pairings[codon]) == 2:
        outputFile.write(
            str(codonDict[codon]) + '\t' + codon + '\t' + '%d' %
            trnaindices[cognate_pairings[codon][0]] + '\t' + '%d' %
            trnaindices[cognate_pairings[codon][1]] + '\n')
outputFile.close()

# cognate weights file.
outputFile = open("../annotations/simulations/wholecell.cognate.weights.tsv",
                  "write")
outputFile.write('codonindex\tcodon\ttrnaweight1\ttrnaweight2\n')
for codon in sorted(codonDict, key=lambda x: codonDict[x]):
    if len(weights[codon]) == 1:
        outputFile.write(
            str(codonDict[codon]) + '\t' + codon + '\t' + '%0.2f' %
            weights[codon][0] + '\t0.00\n')
    elif len(weights[codon]) == 2:
        outputFile.write(
            str(codonDict[codon]) + '\t' + codon + '\t' + '%0.2f' %
            weights[codon][0] + '\t' + '%0.2f' % weights[codon][1] + '\n')
outputFile.close()

Create mRNA sequence file for leucine starvation whole-cell simulation (Run 1)


In [24]:
mutation_locations = [
    {
        15: 'cta'
    },
    {
        10: 'cta',
        14: 'cta',
        15: 'cta'
    },
]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for location, codon in mutant.items()
    ])
    yfpmutants[key] = list(yfp0)
    leucodon_number = 0
    for position in range(0, len(yfp0), 3):
        currentcodon = yfp0[position:position + 3]
        # proceed only if the codon is a Leu codon (which are all CTG in yfp0)
        if currentcodon not in ['CTG']:
            continue
        leucodon_number += 1
        for location in mutant.keys():
            if leucodon_number == location:
                yfpmutants[key][position:position + 3] = mutant[
                    location].upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

# mrna copy number is arbitrary, but kept low so that
# ribosomes are not overloaded
defaultMrnaCopyNumber = 10  # per cell
# median initiation rate of all E. coli mRNAs in the simulation, s-1
defaultInitationRate = 0.3  # s-1,

listOfInitiationRates = [defaultInitationRate]

outputFile = '../annotations/simulations/run1/run1_ecoli_and_reporter_mrnas.csv'
shutil.copyfile('../annotations/simulations/run1/run1_ecoli_mrnas.csv',
                outputFile)

for initiationRate in listOfInitiationRates:
    File = open(outputFile, 'a')
    File.write("%0.3f\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                    get_numerical_codon_sequence(yfp0[:-3])))
    for mutant in sorted(yfpmutants, reverse=True):
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File.write("%0.3f\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()

Simulation run 1: Single whole-cell simulation to calculate steady state charged tRNA fraction during Leu starvation


In [25]:
%%writefile simulation_run_1.py
#!/usr/bin/env python
# SBATCH --mem=8000

import subprocess as sp
import sys

jobindex = int(sys.argv[1])
currentindex = -1

for starvationfactor in [0.01]:
    currentindex += 1
    if currentindex != jobindex:
        continue
    sp.check_output(' '.join([
        './wholecell_simulation',
        '--threshold-time', '100',
        '--total-time', '150',
        '--aminoacid', 'leu',
        'starvationfactor', '%0.2f' % starvationfactor,
        '--aminoacid', 'leu',
        'relativetrnaaminoacylationrates', '1', '2.2', '1.2', '1', '0.1',
        '--output-prefix', '../rawdata/simulations/run1/',
        '--input-genes', '../annotations/simulations/run1/run1_ecoli_and_reporter_mrnas.csv'
    ]), shell=True)


Overwriting simulation_run_1.py

In [13]:
import subprocess as sp
# loop submits each simulation to a different node of the cluster
for index in range(1):
    sp.check_output([
        'sbatch',  # for SLURM cluster; this line can be commented out if running locally
        '-t',
        '15',  # for SLURM cluster; this line can be commented out if running locally
        '-n',
        '1',  # for SLURM cluster; this line can be commented out if running locally
        'simulation_run_1.py',
        str(index)
    ])

Create mRNA sequence for serine starvation whole cell simulation (Run 12)


In [26]:
aa = 'SER'

mutation_locations = [{
    2: 'tcg',
    3: 'tcg',
    4: 'tcg',
    5: 'tcg',
    6: 'tcg',
    7: 'tcg',
    8: 'tcg'
}, ]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for location, codon in mutant.items()
    ])
    yfpmutants[key] = list(yfp_agc)
    codon_number = 0
    for position in range(0, len(yfp_agc), 3):
        currentcodon = yfp_agc[position:position + 3]
        # proceed only if the codon is a Leu codon (which are all CTG in yfp0)
        if currentcodon not in SynonymousCodons[aa]:
            continue
        codon_number += 1
        for location in mutant.keys():
            if codon_number == location:
                yfpmutants[key][position:position + 3] = mutant[
                    location].upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

# mrna copy number is arbitrary, but kept low so that
# ribosomes are not overloaded
defaultMrnaCopyNumber = 10  # per cell
# median initiation rate of all E. coli mRNAs in the simulation, s-1
defaultInitationRate = 0.3  # s-1,

listOfInitiationRates = [defaultInitationRate]

outputFile = '../annotations/simulations/run12/run12_ecoli_and_reporter_mrnas.csv'
shutil.copyfile('../annotations/simulations/run1/run1_ecoli_mrnas.csv',
                outputFile)

for initiationRate in listOfInitiationRates:
    File = open(outputFile, 'a')
    File.write("%0.3f\t%d\t%s\n" %
               (initiationRate, defaultMrnaCopyNumber,
                get_numerical_codon_sequence(yfp_agc[:-3])))
    for mutant in sorted(yfpmutants, reverse=True):
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File.write("%0.3f\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()

Simulation run 12: Single whole-cell simulation to calculate steady state charged tRNA fraction during Ser starvation


In [33]:
%%writefile simulation_run_12.py
#!/usr/bin/env python
# SBATCH --mem=8000

import subprocess as sp
import sys

jobindex = int(sys.argv[1])
currentindex = -1

for starvationfactor in [0.01]:
    currentindex += 1
    if currentindex != jobindex:
        continue
    sp.check_output(' '.join([
        './wholecell_simulation',
        '--threshold-time', '100',
        '--total-time', '150',
        '--aminoacid', 'ser',
        'starvationfactor', '%0.2f' % starvationfactor,
        '--aminoacid', 'ser',
        'relativetrnaaminoacylationrates', '0.1', '1.5', '1.5', '1',
        '--output-prefix', '../rawdata/simulations/run12/',
        '--input-genes', '../annotations/simulations/run12/run12_ecoli_and_reporter_mrnas.csv'
    ]), shell=True)


Overwriting simulation_run_12.py

In [34]:
import subprocess as sp
# loop submits each simulation to a different node of the cluster
for index in range(1):
    sp.check_output([
        'sbatch',  # for SLURM cluster; this line can be commented out if running locally
        '-t',
        '15',  # for SLURM cluster; this line can be commented out if running locally
        '-n',
        '1',  # for SLURM cluster; this line can be commented out if running locally
        'simulation_run_12.py',
        str(index)
    ])

Create average tRNA concentration file during leucine starvation based on whole-cell simulation Run 1


In [30]:
inputfile = '../rawdata/simulations/run1/mrnafile_run1_ecoli_and_reporter_mrnas_thresholdtime_100_totaltime_150_leu_starvationfactor_0.01_leu_relativetrnaaminoacylationrates_1_2.2_1.2_1_0.1/avg_ribo_tRNA.out'
trnaconcentrationfile = pd.read_table(
    '../annotations/simulations/wholecell.trna.concentrations.tsv')
trnaconcnlist = open(inputfile).readlines()
trnaConcn = list()
for trnatype in ['acylated']:
    temp = map(
        lambda x: re.findall('^' + trnatype + '_trna_(\d+)_([ACTG]{3})\t([\w\.]+)', x),
        trnaconcnlist)
    temp = filter(lambda x: len(x), temp)
    temp = pd.DataFrame(
        [x[0] for x in temp], columns=['trnaindex', 'anticodon', 'molpercell'])
    trnaConcn.append(temp)
trnaConcn = pd.concat(trnaConcn, join='inner', axis=1)
trnaConcn = trnaConcn.merge(
    trnaconcentrationfile[['anticodon', 'aminoacid']], how='right').fillna(0)
trnaConcn['trnaindex'] = trnaConcn['trnaindex'].apply(int)
trnaConcn['molpercell'] = trnaConcn['molpercell'].apply(
    lambda x: int(float(x)))
trnaConcn.sort_values(by='trnaindex').to_csv(
    '../annotations/simulations/leucine.starvation.average.trna.concentrations.tsv',
    index=False,
    sep='\t')

Create average tRNA concentration file during serine starvation based on whole-cell simulation Run 12


In [35]:
inputfile = (
    '../rawdata/simulations/run12/mrnafile_run12_ecoli_and_reporter_mrnas_'
    'thresholdtime_100_totaltime_150_ser_starvationfactor_0.01_'
    'ser_relativetrnaaminoacylationrates_0.1_1.5_1.5_1/avg_ribo_tRNA.out')
trnaconcentrationfile = pd.read_table(
    '../annotations/simulations/wholecell.trna.concentrations.tsv')
trnaconcnlist = open(inputfile).readlines()
trnaConcn = list()
for trnatype in ['acylated']:
    temp = map(
        lambda x: re.findall('^' + trnatype + '_trna_(\d+)_([ACTG]{3})\t([\w\.]+)', x),
        trnaconcnlist)
    temp = filter(lambda x: len(x), temp)
    temp = pd.DataFrame(
        [x[0] for x in temp], columns=['trnaindex', 'anticodon', 'molpercell'])
    trnaConcn.append(temp)
trnaConcn = pd.concat(trnaConcn, join='inner', axis=1)
trnaConcn = trnaConcn.merge(
    trnaconcentrationfile[['anticodon', 'aminoacid']], how='right').fillna(0)
trnaConcn['trnaindex'] = trnaConcn['trnaindex'].apply(int)
trnaConcn['molpercell'] = trnaConcn['molpercell'].apply(
    lambda x: int(float(x)))
trnaConcn.sort_values(by='trnaindex').to_csv(
    '../annotations/simulations/serine.starvation.average.trna.concentrations.tsv',
    index=False,
    sep='\t')

Create mRNA sequences for simulation run 2 with systematically varying tRNA accommodation rate (stall duration) at Leu codons


In [37]:
mutation_locations = [
    {
        'cta': 2
    },
    {
        'cta': 6
    },
    {
        'cta': 8
    },
    {
        'cta': 9
    },
    {
        'cta': 10
    },
    {
        'cta': 11
    },
    {
        'cta': 12
    },
    {
        'cta': 13
    },
    {
        'cta': 14
    },
    {
        'cta': 18
    },
    {
        'ctc': 2
    },
    {
        'ctc': 6
    },
    {
        'ctc': 8
    },
    {
        'ctc': 9
    },
    {
        'ctc': 10
    },
    {
        'ctc': 11
    },
    {
        'ctc': 12
    },
    {
        'ctc': 13
    },
    {
        'ctc': 14
    },
    {
        'ctc': 18
    },
    {
        'ctt': 2
    },
    {
        'ctt': 6
    },
    {
        'ctt': 10
    },
    {
        'ctt': 14
    },
    {
        'ctt': 18
    },
]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for codon, location in mutant.items()
    ])
    yfpmutants[key] = list(yfp0)
    leucodon_number = 0
    for position in range(0, len(yfp0), 3):
        currentcodon = yfp0[position:position + 3]
        # proceed only if the codon is a Leu codon (which are all CTG in yfp0)
        if currentcodon not in ['CTG']:
            continue
        leucodon_number += 1
        for codon in mutant.keys():
            if leucodon_number == mutant[codon]:
                yfpmutants[key][position:position + 3] = codon.upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

defaultMrnaCopyNumber = 1  # per cell
defaultInitationRate = 0.3  # s-1, This is the median initiation rate

listOfInitiationRates = [defaultInitationRate]

for initiationRate in listOfInitiationRates:
    for mutant in yfpmutants:
        outputFile = '../annotations/simulations/run2/' + \
            '%s_initiationrate_%0.4f.csv' % (mutant, initiationRate)
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File = open(outputFile, 'w')

        File.write("%0.4f\t%d\t%s\n" %
                   (initiationRate, defaultMrnaCopyNumber,
                    get_numerical_codon_sequence(yfp0[:-3])))

        File.write("%0.4f\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()

Create varying tRNA accommodation rate (stall duration) at Leu codons for simulation run 2


In [38]:
stallstrengthranges = {
    'trafficjam': {
        'cta': np.linspace(
            0.01, 0.2, num=20),
        'ctc': np.linspace(
            0.02, 0.4, num=20),
        'ctt': np.linspace(
            0.02, 0.4, num=20),
    },
    '5primepreterm': {
        'cta': np.linspace(
            0.02, 0.4, num=20),
        'ctc': np.linspace(
            0.05, 1, num=20),
        'ctt': np.linspace(
            0.05, 1, num=20),
    },
    'selpreterm': {
        'cta': np.linspace(
            0.05, 1, num=20),
        'ctc': np.linspace(
            0.5, 10, num=20),
        'ctt': np.linspace(
            0.25, 5, num=20),
    }
}

# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
leupositions = dict()
leucodon_number = 1
for position in range(0, len(yfp0), 3):
    currentcodon = yfp0[position:position + 3]
    if currentcodon == 'CTG':
        leupositions[leucodon_number] = position / 3
        leucodon_number += 1

for model in stallstrengthranges:
    for index in range(20):
        fitdata = list()
        for mutant in mutation_locations:
            if len(mutant) > 1:
                raise "Only single mutants allowed"
            codon = mutant.keys()[0]
            pos = mutant.values()[0]
            fitdata.append({
                'codon': codonDict[codon.upper()],
                'pos': leupositions[pos],
                'stallstrength': stallstrengthranges[model][codon][index]
            })
        fitdata = pd.DataFrame(fitdata)
        fitdata.to_csv(
            '../annotations/simulations/run2/{0}_stallstrengthindex_{1}.tsv'.
            format(model, index),
            sep='\t',
            index=False)

Simulation run 2: Reporter simulation with systematically varying duration of ribosome stalling during leucine starvation


In [44]:
%%writefile simulation_run_2.py
#!/usr/bin/env python
# SBATCH --mem=8000

import subprocess as sp
import os
import sys
import itertools
import numpy as np

jobindex = int(sys.argv[1])
currentindex = -1

allfiles = os.listdir('../annotations/simulations/run2/')

mrnafiles = list(filter(lambda x: x.startswith('yfp'), allfiles))
mrnafiles = ['../annotations/simulations/run2/' + File for File in mrnafiles]

terminationRates = {
    'trafficjam': {'--5prime-preterm-rate': 0},
    '5primepreterm': {'--5prime-preterm-rate': 1},
    'selpreterm': {'--selective-preterm-rate': 1},
}

for (mrnafile,
     typeOfTermination
     ) in list(itertools.product(mrnafiles, terminationRates)):
    currentindex += 1
    if currentindex != jobindex:
        continue
    termoption = terminationRates[typeOfTermination].keys()[0]
    termvalue = terminationRates[typeOfTermination].values()[0]
    stallstrengthfiles = list(
        filter(lambda x: x.startswith(typeOfTermination), allfiles))
    stallstrengthfiles = [
        '../annotations/simulations/run2/' + File for File in stallstrengthfiles]
    for stallstrengthfile in stallstrengthfiles:
        cmd = ' '.join([
            './reporter_simulation',
            '--trna-concn', '../annotations/simulations/leucine.starvation.average.trna.concentrations.tsv',
            termoption, '%0.2f' % termvalue,
            '--threshold-accommodation-rate', '22',
            '--output-prefix', '../rawdata/simulations/run2/',
            '--stall-strength-file', stallstrengthfile,
            '--input-genes', mrnafile
        ])
        sp.check_output(cmd, shell=True)


Overwriting simulation_run_2.py

In [45]:
import subprocess as sp
# loop submits each simulation to a different node of the cluster
for index in range(75):
    sp.check_output([
        'sbatch',  # for SLURM cluster; this line can be commented out if running locally
        '-t',
        '300',  # for SLURM cluster; this line can be commented out if running locally
        '-n',
        '1',  # for SLURM cluster; this line can be commented out if running locally
        'simulation_run_2.py',
        str(index)
    ])

Create mRNA sequences for simulation run 13 with systematically varying tRNA accommodation rate (stall duration) at Ser codons


In [41]:
mutation_locations = [
    {
        'tcg': 2
    },
    {
        'tcg': 3
    },
    {
        'tcg': 4
    },
    {
        'tcg': 5
    },
    {
        'tcg': 6
    },
    {
        'tcg': 7
    },
]

yfpmutants = dict()
for mutant in mutation_locations:
    key = '_'.join(['yfp'] + [
        codon + str(location) for codon, location in mutant.items()
    ])
    yfpmutants[key] = list(yfp_agc)
    codon_number = 0
    for position in range(0, len(yfp_agc), 3):
        currentcodon = yfp_agc[position:position + 3]
        # proceed only if the codon is a Leu codon (which are all CTG in yfp0)
        if currentcodon not in ['AGC']:
            continue
        codon_number += 1
        for codon in mutant.keys():
            if codon_number == mutant[codon]:
                yfpmutants[key][position:position + 3] = codon.upper()
    yfpmutants[key] = ''.join(yfpmutants[key])

defaultMrnaCopyNumber = 1  # per cell
defaultInitationRate = 0.3  # s-1, This is the median initiation rate

listOfInitiationRates = [defaultInitationRate]

for initiationRate in listOfInitiationRates:
    for mutant in yfpmutants:
        outputFile = '../annotations/simulations/run13/' + \
            '%s_initiationrate_%0.4f.csv' % (mutant, initiationRate)
        num_seq = ''.join(
            get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
        File = open(outputFile, 'w')

        File.write("%0.4f\t%d\t%s\n" %
                   (initiationRate, defaultMrnaCopyNumber,
                    get_numerical_codon_sequence(yfp_agc[:-3])))

        File.write("%0.4f\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
                                        num_seq))
    File.close()

Create varying tRNA accommodation rate (stall duration) at Ser codons for simulation run 13


In [42]:
stallstrengthranges = {
    'trafficjam': {
        'tcg': np.linspace(
            0.02, 0.4, num=20),
    },
    '5primepreterm': {
        'tcg': np.linspace(
            0.05, 1, num=20),
    },
    'selpreterm': {
        'tcg': np.linspace(
            0.5, 10, num=20),
    }
}

# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
serpositions = dict()
sercodon_number = 1
for position in range(0, len(yfp_agc), 3):
    currentcodon = yfp_agc[position:position + 3]
    if currentcodon == 'AGC':
        serpositions[sercodon_number] = position / 3
        sercodon_number += 1

for model in stallstrengthranges:
    for index in range(20):
        fitdata = list()
        for mutant in mutation_locations:
            if len(mutant) > 1:
                raise "Only single mutants allowed"
            codon = mutant.keys()[0]
            pos = mutant.values()[0]
            fitdata.append({
                'codon': codonDict[codon.upper()],
                'pos': serpositions[pos],
                'stallstrength': stallstrengthranges[model][codon][index]
            })
        fitdata = pd.DataFrame(fitdata)
        fitdata.to_csv(
            '../annotations/simulations/run13/{0}_stallstrengthindex_{1}.tsv'.
            format(model, index),
            sep='\t',
            index=False)

Simulation run 13: Reporter simulation with systematically varying duration of ribosome stalling during serine starvation


In [46]:
%%writefile simulation_run_13.py
#!/usr/bin/env python
# SBATCH --mem=8000

import subprocess as sp
import os
import sys
import itertools
import numpy as np

jobindex = int(sys.argv[1])
currentindex = -1

allfiles = os.listdir('../annotations/simulations/run13/')

mrnafiles = list(filter(lambda x: x.startswith('yfp'), allfiles))
mrnafiles = ['../annotations/simulations/run13/' + File for File in mrnafiles]

terminationRates = {
    'trafficjam': {'--5prime-preterm-rate': 0},
    '5primepreterm': {'--5prime-preterm-rate': 1},
    'selpreterm': {'--selective-preterm-rate': 1},
}

for (mrnafile,
     typeOfTermination
     ) in list(itertools.product(mrnafiles, terminationRates)):
    currentindex += 1
    if currentindex != jobindex:
        continue
    termoption = terminationRates[typeOfTermination].keys()[0]
    termvalue = terminationRates[typeOfTermination].values()[0]
    stallstrengthfiles = list(
        filter(lambda x: x.startswith(typeOfTermination), allfiles))
    stallstrengthfiles = [
        '../annotations/simulations/run13/' + File for File in stallstrengthfiles]
    for stallstrengthfile in stallstrengthfiles:
        cmd = ' '.join([
            './reporter_simulation',
            '--trna-concn', '../annotations/simulations/serine.starvation.average.trna.concentrations.tsv',
            termoption, '%0.2f' % termvalue,
            '--threshold-accommodation-rate', '22',
            '--output-prefix', '../rawdata/simulations/run13/',
            '--stall-strength-file', stallstrengthfile,
            '--input-genes', mrnafile
        ])
        sp.check_output(cmd, shell=True)


Writing simulation_run_13.py

In [47]:
import subprocess as sp
# loop submits each simulation to a different node of the cluster
for index in range(21):
    sp.check_output([
        'sbatch',  # for SLURM cluster; this line can be commented out if running locally
        '-t',
        '300',  # for SLURM cluster; this line can be commented out if running locally
        '-n',
        '1',  # for SLURM cluster; this line can be commented out if running locally
        'simulation_run_13.py',
        str(index)
    ])