These input files are for simulation runs 3 – 16.
In [7]:
# os commands
import os
# sequence input and output
import Bio.SeqIO
# provides dictionary of codon names
from Bio.SeqUtils.CodonUsage import SynonymousCodons
# for converting 3 letter amino acid code to 1 letter code
from Bio.SeqUtils import seq1
# for tab data processing
import pandas as pd
# numeric and matrix library
import numpy as np
# shell utilities
import shutil
# for submitting shell commands
import subprocess as sp
# create a dictionary of codon names and number (arranged alphabetically by aa)
codonnum = 0
codonDict = dict()
for aa in sorted(SynonymousCodons, key=lambda aa3: seq1(aa3)):
if aa == 'STOP':
continue
for codon in sorted(SynonymousCodons[aa]):
# these two codons are numbered out of order for consistent notation
# with Subramaniam et al. Cell 2014
if codon in ['AGC']:
codonDict['AGC'] = 59
elif codon in ['AGT']:
codonDict['AGT'] = 60
else:
codonDict[codon] = codonnum
codonnum += 1
# to convert 3 letter codons to numbers between 0 and 63
def get_numerical_codon_sequence(seq):
numseq = list()
for pos in range(0, len(seq) - 3, 3):
try:
numseq.append(str(codonDict[seq[pos:pos + 3]]))
except KeyError:
numseq.append('-1')
raise
return None
return ' '.join(numseq)
# starting yfp sequence for leucine starvation expts
yfp0 = Bio.SeqIO.read('../annotations/simulations/yfp0.fa', 'fasta')
yfp0 = str(yfp0.seq)
# starting sequence for serine starvation expts
# all ser codons in yfp0 were AGC
yfp_agc = list(yfp0)
for pos in range(0, len(yfp0), 3):
current_codon = yfp0[pos:pos + 3]
if current_codon in SynonymousCodons['SER']:
yfp_agc[pos:pos + 3] = 'AGC'
yfp_agc = ''.join(yfp_agc)
In [10]:
%matplotlib inline
import numpy as np
import pandas as pd
import pyfaidx
import os
import shutil
import HTSeq
import matplotlib.pyplot as plt
import cPickle as pickle # python native format storage library.
import Bio.SeqIO # dna sequence input and output.
from Bio.SeqUtils import seq1 # for converting 3 letter amino acid code to 1 letter code
from Bio.SeqUtils import CodonUsage # provides dictionary of codon names
from Bio.SeqUtils.CodonUsage import SynonymousCodons # provides dictionary of codon names
from Bio.SeqUtils import CodonUsageIndices # provides the Ecoli Codon
# Adaptation Index class.
from Bio.SeqRecord import SeqRecord # to read .gb and .fasta files.
import re # regular expression
import numpy.random # for creating random numbers
from Bio import Entrez # to retrieve records from NCBI
import itertools # iterator tools
from IPython.core.display import clear_output, HTML # for refreshing and displaying output
import copy
# create a dictionary of codon names and number (arranged alphabetically by aa)
codonnum = 0
codonDict = dict()
for aa in sorted(SynonymousCodons, key=lambda aa3: seq1(aa3)):
if aa == 'STOP':
continue
for codon in sorted(SynonymousCodons[aa]):
# these two codons are numbered out of order for consistent notation
# with Subramaniam et al. Cell 2014
if codon in ['AGC']:
codonDict['AGC'] = 59
elif codon in ['AGT']:
codonDict['AGT'] = 60
else:
codonDict[codon] = codonnum
codonnum += 1
# to convert 3 letter codons to numbers between 0 and 63
def get_numerical_codon_sequence(seq):
numseq = list()
for pos in range(0, len(seq) - 3, 3):
try:
numseq.append(str(codonDict[seq[pos:pos + 3]]))
except KeyError:
numseq.append('-1')
raise
return None
return ' '.join(numseq)
# starting yfp sequence for leucine starvation expts
yfp0 = Bio.SeqIO.read('../annotations/simulations/yfp0.fa', 'fasta')
yfp0 = str(yfp0.seq)
# starting sequence for serine starvation expts
# all ser codons in yfp0 were AGC
yfp_agc = list(yfp0)
for pos in range(0, len(yfp0), 3):
current_codon = yfp0[pos:pos + 3]
if current_codon in SynonymousCodons['SER']:
yfp_agc[pos:pos + 3] = 'AGC'
yfp_agc = ''.join(yfp_agc)
In [8]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
fitdatafolder + File for File in fitdatafiles
if File.startswith('run2_fit_stallstrength_for_initiation_')
]
# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
leupositions = dict()
leucodon_number = 1
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
if currentcodon == 'CTG':
leupositions[leucodon_number] = position / 3
leucodon_number += 1
codonnames = {23: 'CTA', 24: 'CTC', 26: 'CTT'}
modelnames = {
'trafficjam': 'TJ',
'selpreterm': 'SAT',
'5primepreterm': 'CSAT',
}
for fitdatafile in fitdatafiles:
fitdata = pd.read_table(fitdatafile)
fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
fitdata = fitdata.drop(['mutant'], axis=1)
fitdata['pos'] = fitdata['pos'].apply(lambda pos: leupositions[pos])
fitdata['codon'] = fitdata['codon'].apply(
lambda codon: codonDict[codon.upper()])
fitdata = fitdata[['codon', 'pos', 'stallstrength']]
model = fitdatafile.split('_')[-1].split('.')[0]
fitdata.to_csv(
'../processeddata/simulations/run3_stallstrengthfits_' + model +
'.tsv',
sep='\t',
index=False)
for row in fitdata.iterrows():
codon = row[1]['codon']
pos = row[1]['pos']
rate = row[1]['stallstrength']
print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
codonnames[codon], int(pos), modelnames[model], rate)
In [10]:
initiationRateFile = '../processeddata/platereader/inferred_initiationrates_for_initiation_simulations.tsv'
mutation_locations = [
{
6: 'cta'
},
{
10: 'cta'
},
{
14: 'cta'
},
{
18: 'cta'
},
{
10: 'ctc'
},
{
14: 'ctc'
},
{
6: 'ctt'
},
{
10: 'ctt'
},
]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for location, codon in mutant.items()
])
yfpmutants[key] = list(yfp0)
leucodon_number = 0
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
# proceed only if the codon is a Leu codon (which are all CTG in yfp0)
if currentcodon not in ['CTG']:
continue
leucodon_number += 1
for location in mutant.keys():
if leucodon_number == location:
yfpmutants[key][position:position + 3] = mutant[
location].upper()
yfpmutants[key] = ''.join(yfpmutants[key])
defaultMrnaCopyNumber = 1 # per cell
defaultInitationRate = 0.3 # s-1, This is the median initiation rate
experimentInitiationRates = pd.read_table(initiationRateFile, index_col=0)
experimentInitiationRates[
'initiationRateForSimulation'] = experimentInitiationRates[
'inferredInitiationRate'].apply(lambda x: x * defaultInitationRate)
print('Inferred initiation rates (s-1)')
for initiationRate in experimentInitiationRates['initiationRateForSimulation']:
for mutant in yfpmutants:
outputFile = '../annotations/simulations/run3/' + \
'%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File = open(outputFile, 'w')
File.write("%0.4g\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp0[:-3])))
File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
print '%0.3f' % initiationRate
In [12]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
fitdatafolder + File for File in fitdatafiles
if File.startswith('run13_serine_fit_stallstrength_for_initiation_')
]
# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
serpositions = dict()
sercodon_number = 1
for position in range(0, len(yfp_agc), 3):
currentcodon = yfp_agc[position:position + 3]
if currentcodon == 'AGC':
serpositions[sercodon_number] = position / 3
sercodon_number += 1
codonnames = {46: 'TCG'}
modelnames = {
'trafficjam': 'TJ',
'selpreterm': 'SAT',
'5primepreterm': 'CSAT',
}
for fitdatafile in fitdatafiles:
fitdata = pd.read_table(fitdatafile)
fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
fitdata = fitdata.drop(['mutant'], axis=1)
fitdata['pos'] = fitdata['pos'].apply(lambda pos: serpositions[pos])
fitdata['codon'] = fitdata['codon'].apply(
lambda codon: codonDict[codon.upper()])
fitdata = fitdata[['codon', 'pos', 'stallstrength']]
model = fitdatafile.split('_')[-1].split('.')[0]
fitdata.to_csv(
'../processeddata/simulations/run14_stallstrengthfits_' + model +
'.tsv',
sep='\t',
index=False)
for row in fitdata.iterrows():
codon = row[1]['codon']
pos = row[1]['pos']
rate = row[1]['stallstrength']
print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
codonnames[codon], int(pos), modelnames[model], rate)
In [15]:
initiationRateFile = '../processeddata/platereader/inferred_initiationrates_for_initiation_simulations.tsv'
mutation_locations = [
{
4: 'tcg'
},
{
5: 'tcg'
},
{
6: 'tcg'
},
]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for location, codon in mutant.items()
])
yfpmutants[key] = list(yfp_agc)
sercodon_number = 0
for position in range(0, len(yfp_agc), 3):
currentcodon = yfp_agc[position:position + 3]
# proceed only if the codon is a Ser codon (which are all AGC in yfp_agc)
if currentcodon not in ['AGC']:
continue
sercodon_number += 1
for location in mutant.keys():
if sercodon_number == location:
yfpmutants[key][position:position + 3] = mutant[
location].upper()
yfpmutants[key] = ''.join(yfpmutants[key])
defaultMrnaCopyNumber = 1 # per cell
defaultInitationRate = 0.3 # s-1, This is the median initiation rate
experimentInitiationRates = pd.read_table(initiationRateFile, index_col=0)
experimentInitiationRates[
'initiationRateForSimulation'] = experimentInitiationRates[
'inferredInitiationRate'].apply(lambda x: x * defaultInitationRate)
print('Inferred initiation rates (s-1)')
for initiationRate in experimentInitiationRates['initiationRateForSimulation']:
for mutant in yfpmutants:
outputFile = '../annotations/simulations/run14/' + \
'%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File = open(outputFile, 'w')
File.write("%0.4g\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp_agc[:-3])))
File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
print '%0.3f' % initiationRate
In [16]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
fitdatafolder + File for File in fitdatafiles
if File.startswith('run2_fit_stallstrength_for_double_')
]
# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
leupositions = dict()
leucodon_number = 1
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
if currentcodon == 'CTG':
leupositions[leucodon_number] = position / 3
leucodon_number += 1
codonnames = {23: 'CTA', 24: 'CTC', 26: 'CTT'}
modelnames = {
'trafficjam': 'TJ',
'selpreterm': 'SAT',
'5primepreterm': 'CSAT',
}
for fitdatafile in fitdatafiles:
fitdata = pd.read_table(fitdatafile)
fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
fitdata = fitdata.drop(['mutant'], axis=1)
fitdata['pos'] = fitdata['pos'].apply(lambda pos: leupositions[pos])
fitdata['codon'] = fitdata['codon'].apply(
lambda codon: codonDict[codon.upper()])
fitdata = fitdata[['codon', 'pos', 'stallstrength']]
model = fitdatafile.split('_')[-1].split('.')[0]
fitdata.to_csv(
'../processeddata/simulations/run4_stallstrengthfits_' + model +
'.tsv',
sep='\t',
index=False)
for row in fitdata.iterrows():
codon = row[1]['codon']
pos = row[1]['pos']
rate = row[1]['stallstrength']
print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
codonnames[codon], int(pos), modelnames[model], rate)
In [17]:
mutation_locations = [
{
2: 'ctc'
},
{
6: 'ctc'
},
{
10: 'ctc'
},
{
14: 'ctc'
},
{
18: 'ctc'
},
{
2: 'ctc',
6: 'ctc'
},
{
2: 'ctc',
10: 'ctc'
},
{
2: 'ctc',
14: 'ctc'
},
{
2: 'ctc',
18: 'ctc'
},
{
6: 'ctc',
10: 'ctc'
},
{
6: 'ctc',
14: 'ctc'
},
{
6: 'ctc',
18: 'ctc'
},
{
10: 'ctc',
14: 'ctc'
},
{
10: 'ctc',
18: 'ctc'
},
{
14: 'ctc',
18: 'ctc'
},
{
2: 'ctt'
},
{
6: 'ctt'
},
{
10: 'ctt'
},
{
14: 'ctt'
},
{
18: 'ctt'
},
{
2: 'ctt',
6: 'ctt'
},
{
2: 'ctt',
10: 'ctt'
},
{
2: 'ctt',
14: 'ctt'
},
{
2: 'ctt',
18: 'ctt'
},
{
6: 'ctt',
10: 'ctt'
},
{
6: 'ctt',
14: 'ctt'
},
{
6: 'ctt',
18: 'ctt'
},
{
10: 'ctt',
14: 'ctt'
},
{
10: 'ctt',
18: 'ctt'
},
{
14: 'ctt',
18: 'ctt'
},
]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for location, codon in mutant.items()
])
yfpmutants[key] = list(yfp0)
leucodon_number = 0
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
# proceed only if the codon is a Leu codon (which are all CTG in yfp0)
if currentcodon not in ['CTG']:
continue
leucodon_number += 1
for location in mutant.keys():
if leucodon_number == location:
yfpmutants[key][position:position + 3] = mutant[
location].upper()
yfpmutants[key] = ''.join(yfpmutants[key])
defaultMrnaCopyNumber = 1 # per cell
defaultInitationRate = 0.3 # s-1, This is the median initiation rate
listOfInitiationRates = [defaultInitationRate]
for initiationRate in listOfInitiationRates:
for mutant in yfpmutants:
outputFile = '../annotations/simulations/run4/' + \
'%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File = open(outputFile, 'w')
File.write("%0.4g\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp0[:-3])))
File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
In [19]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
fitdatafolder + File for File in fitdatafiles
if File.startswith('run2_fit_stallstrength_for_leucine_multiple_')
]
# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
leupositions = dict()
leucodon_number = 1
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
if currentcodon == 'CTG':
leupositions[leucodon_number] = position / 3
leucodon_number += 1
codonnames = {23: 'CTA', 24: 'CTC', 26: 'CTT'}
modelnames = {
'trafficjam': 'TJ',
'selpreterm': 'SAT',
'5primepreterm': 'CSAT',
}
for fitdatafile in fitdatafiles:
fitdata = pd.read_table(fitdatafile)
fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
fitdata = fitdata.drop(['mutant'], axis=1)
fitdata['pos'] = fitdata['pos'].apply(lambda pos: leupositions[pos])
fitdata['codon'] = fitdata['codon'].apply(
lambda codon: codonDict[codon.upper()])
fitdata = fitdata[['codon', 'pos', 'stallstrength']]
model = fitdatafile.split('_')[-1].split('.')[0]
fitdata.to_csv(
'../processeddata/simulations/run16_stallstrengthfits_' + model +
'.tsv',
sep='\t',
index=False)
for row in fitdata.iterrows():
codon = row[1]['codon']
pos = row[1]['pos']
rate = row[1]['stallstrength']
print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
codonnames[codon], int(pos), modelnames[model], rate)
In [21]:
mutation_locations = [
{
2: 'cta'
},
{
6: 'cta'
},
{
10: 'cta'
},
{
14: 'cta'
},
{
18: 'cta'
},
{
2: 'cta',
6: 'cta'
},
{
2: 'cta',
10: 'cta'
},
{
2: 'cta',
14: 'cta'
},
{
2: 'cta',
18: 'cta'
},
{
6: 'cta',
10: 'cta'
},
{
6: 'cta',
14: 'cta'
},
{
6: 'cta',
18: 'cta'
},
{
10: 'cta',
14: 'cta'
},
{
10: 'cta',
18: 'cta'
},
{
14: 'cta',
18: 'cta'
},
{
6: 'cta',
10: 'cta',
14: 'cta'
},
{
6: 'cta',
10: 'cta',
18: 'cta'
},
{
6: 'cta',
10: 'cta',
14: 'cta',
18: 'cta'
},
]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for location, codon in mutant.items()
])
yfpmutants[key] = list(yfp0)
leucodon_number = 0
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
# proceed only if the codon is a Leu codon (which are all CTG in yfp0)
if currentcodon not in ['CTG']:
continue
leucodon_number += 1
for location in mutant.keys():
if leucodon_number == location:
yfpmutants[key][position:position + 3] = mutant[
location].upper()
yfpmutants[key] = ''.join(yfpmutants[key])
defaultMrnaCopyNumber = 1 # per cell
defaultInitationRate = 0.3 # s-1, This is the median initiation rate
listOfInitiationRates = [defaultInitationRate]
for initiationRate in listOfInitiationRates:
for mutant in yfpmutants:
outputFile = '../annotations/simulations/run16/' + \
'%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File = open(outputFile, 'w')
File.write("%0.4g\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp0[:-3])))
File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
In [22]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
fitdatafolder + File for File in fitdatafiles
if File.startswith('run13_serine_fit_stallstrength_for_double_')
]
# find the location of all serine codons to convert ser codon serial number
# to absolute position along yfp in codon units for simulation
serpositions = dict()
sercodon_number = 1
for position in range(0, len(yfp_agc), 3):
currentcodon = yfp_agc[position:position + 3]
if currentcodon == 'AGC':
serpositions[sercodon_number] = position / 3
sercodon_number += 1
codonnames = {46: 'TCG'}
modelnames = {
'trafficjam': 'TJ',
'selpreterm': 'SAT',
'5primepreterm': 'CSAT',
}
for fitdatafile in fitdatafiles:
fitdata = pd.read_table(fitdatafile)
fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
fitdata = fitdata.drop(['mutant'], axis=1)
fitdata['pos'] = fitdata['pos'].apply(lambda pos: serpositions[pos])
fitdata['codon'] = fitdata['codon'].apply(
lambda codon: codonDict[codon.upper()])
fitdata = fitdata[['codon', 'pos', 'stallstrength']]
model = fitdatafile.split('_')[-1].split('.')[0]
fitdata.to_csv(
'../processeddata/simulations/run15_stallstrengthfits_' + model +
'.tsv',
sep='\t',
index=False)
for row in fitdata.iterrows():
codon = row[1]['codon']
pos = row[1]['pos']
rate = row[1]['stallstrength']
print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
codonnames[codon], int(pos), modelnames[model], rate)
In [23]:
mutation_locations = [
{
2: 'tcg'
},
{
3: 'tcg'
},
{
4: 'tcg'
},
{
5: 'tcg'
},
{
6: 'tcg'
},
{
7: 'tcg'
},
{
2: 'tcg',
5: 'tcg'
},
{
2: 'tcg',
6: 'tcg'
},
{
2: 'tcg',
7: 'tcg'
},
{
3: 'tcg',
5: 'tcg'
},
{
3: 'tcg',
6: 'tcg'
},
{
3: 'tcg',
7: 'tcg'
},
{
4: 'tcg',
5: 'tcg'
},
{
4: 'tcg',
6: 'tcg'
},
{
4: 'tcg',
7: 'tcg'
},
]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for location, codon in mutant.items()
])
yfpmutants[key] = list(yfp_agc)
sercodon_number = 0
for position in range(0, len(yfp_agc), 3):
currentcodon = yfp_agc[position:position + 3]
# proceed only if the codon is a Ser codon (which are all AGC in yfp_agc)
if currentcodon not in ['AGC']:
continue
sercodon_number += 1
for location in mutant.keys():
if sercodon_number == location:
yfpmutants[key][position:position + 3] = mutant[
location].upper()
yfpmutants[key] = ''.join(yfpmutants[key])
defaultMrnaCopyNumber = 1 # per cell
defaultInitationRate = 0.3 # s-1, This is the median initiation rate
listOfInitiationRates = [defaultInitationRate]
for initiationRate in listOfInitiationRates:
for mutant in yfpmutants:
outputFile = '../annotations/simulations/run15/' + \
'%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File = open(outputFile, 'w')
File.write("%0.4g\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp_agc[:-3])))
File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
In [27]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
fitdatafolder + File for File in fitdatafiles
if File.startswith('run2_fit_stallstrength_for_ctc_distance_')
]
# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
leupositions = dict()
leucodon_number = 1
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
if currentcodon == 'CTG':
leupositions[leucodon_number] = position / 3
leucodon_number += 1
codonnames = {23: 'CTA', 24: 'CTC', 26: 'CTT'}
modelnames = {
'trafficjam': 'TJ',
'selpreterm': 'SAT',
'5primepreterm': 'CSAT',
}
for fitdatafile in fitdatafiles:
fitdata = pd.read_table(fitdatafile)
fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
fitdata = fitdata.drop(['mutant'], axis=1)
fitdata['pos'] = fitdata['pos'].apply(lambda pos: leupositions[pos])
fitdata['codon'] = fitdata['codon'].apply(
lambda codon: codonDict[codon.upper()])
fitdata = fitdata[['codon', 'pos', 'stallstrength']]
model = fitdatafile.split('_')[-1].split('.')[0]
fitdata.to_csv(
'../processeddata/simulations/run5_stallstrengthfits_' + model +
'.tsv',
sep='\t',
index=False)
for row in fitdata.iterrows():
codon = row[1]['codon']
pos = row[1]['pos']
rate = row[1]['stallstrength']
print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
codonnames[codon], int(pos), modelnames[model], rate)
In [28]:
mutation_locations = [
{
8: 'ctc'
},
{
9: 'ctc'
},
{
10: 'ctc'
},
{
11: 'ctc'
},
{
12: 'ctc'
},
{
13: 'ctc'
},
{
14: 'ctc'
},
{
8: 'ctc',
9: 'ctc'
},
{
8: 'ctc',
10: 'ctc'
},
{
8: 'ctc',
11: 'ctc'
},
{
8: 'ctc',
12: 'ctc'
},
{
8: 'ctc',
13: 'ctc'
},
{
8: 'ctc',
14: 'ctc'
},
{
9: 'ctc',
10: 'ctc'
},
{
9: 'ctc',
11: 'ctc'
},
{
9: 'ctc',
12: 'ctc'
},
{
9: 'ctc',
13: 'ctc'
},
{
9: 'ctc',
14: 'ctc'
},
{
11: 'ctc',
12: 'ctc'
},
{
13: 'ctc',
14: 'ctc'
},
]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for location, codon in mutant.items()
])
yfpmutants[key] = list(yfp0)
leucodon_number = 0
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
# proceed only if the codon is a Leu codon (which are all CTG in yfp0)
if currentcodon not in ['CTG']:
continue
leucodon_number += 1
for location in mutant.keys():
if leucodon_number == location:
yfpmutants[key][position:position + 3] = mutant[
location].upper()
yfpmutants[key] = ''.join(yfpmutants[key])
defaultMrnaCopyNumber = 1 # per cell
defaultInitationRate = 0.3 # s-1, This is the median initiation rate
listOfInitiationRates = [defaultInitationRate]
for initiationRate in listOfInitiationRates:
for mutant in yfpmutants:
outputFile = '../annotations/simulations/run5/' + \
'%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File = open(outputFile, 'w')
File.write("%0.4g\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp0[:-3])))
File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
In [29]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
fitdatafolder + File for File in fitdatafiles
if File.startswith('run4_stallstrengthfits_')
]
for fitdatafile in fitdatafiles:
fitdata = pd.read_table(fitdatafile)
fitdata = fitdata[['codon', 'pos', 'stallstrength']]
fitdata = fitdata[fitdata['codon'] == 23] # CTA codon
# Use CTA 6 (Leu45) values for all codons
commonstallstrength = fitdata[fitdata['pos'] == 45].reset_index().ix[0][
'stallstrength']
newdataframe = pd.DataFrame(
[[23, loop, commonstallstrength] for loop in range(1, 238)],
columns=fitdata.columns)
model = fitdatafile.split('_')[-1].split('.')[0]
newdataframe.to_csv(
'../processeddata/simulations/runs678_stallstrengthfits_' + model +
'.tsv',
sep='\t',
index=False)
In [31]:
mutation_locations = [{6: 'cta'}, ]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for location, codon in mutant.items()
])
yfpmutants[key] = list(yfp0)
leucodon_number = 0
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
# proceed only if the codon is a Leu codon (which are all CTG in yfp0)
if currentcodon not in ['CTG']:
continue
leucodon_number += 1
for location in mutant.keys():
if leucodon_number == location:
yfpmutants[key][position:position + 3] = mutant[
location].upper()
yfpmutants[key] = ''.join(yfpmutants[key])
defaultMrnaCopyNumber = 1 # per cell
defaultInitationRate = 0.3 # s-1, This is the median initiation rate
listOfInitiationRates = defaultInitationRate * 2**np.array(
range(-4, 4, 1), dtype=np.float64)
for initiationRate in listOfInitiationRates:
for mutant in yfpmutants:
outputFile = '../annotations/simulations/run6/' + \
'%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File = open(outputFile, 'w')
File.write("%0.4g\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp0[:-3])))
File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
In [32]:
mutation_locations = [
{
2: 'cta'
},
{
6: 'cta'
},
{
10: 'cta'
},
{
14: 'cta'
},
{
18: 'cta'
},
{
2: 'cta',
6: 'cta'
},
{
2: 'cta',
6: 'cta',
10: 'cta'
},
{
2: 'cta',
6: 'cta',
10: 'cta',
14: 'cta'
},
{
2: 'cta',
6: 'cta',
10: 'cta',
14: 'cta',
18: 'cta'
},
]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for location, codon in mutant.items()
])
yfpmutants[key] = list(yfp0)
leucodon_number = 0
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
# proceed only if the codon is a Leu codon (which are all CTG in yfp0)
if currentcodon not in ['CTG']:
continue
leucodon_number += 1
for location in mutant.keys():
if leucodon_number == location:
yfpmutants[key][position:position + 3] = mutant[
location].upper()
yfpmutants[key] = ''.join(yfpmutants[key])
defaultMrnaCopyNumber = 1 # per cell
defaultInitationRate = 0.3 # s-1, This is the median initiation rate
listOfInitiationRates = [defaultInitationRate]
for initiationRate in listOfInitiationRates:
for mutant in yfpmutants:
outputFile = '../annotations/simulations/run7/' + \
'%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File = open(outputFile, 'w')
File.write("%0.4g\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp0[:-3])))
File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
In [33]:
mutation_locations = [{loop: 'cta'} for loop in range(1, 238)]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for location, codon in mutant.items()
])
yfpmutants[key] = list(yfp0)
codon_number = 0
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
codon_number += 1
for location in mutant.keys():
if codon_number == location:
yfpmutants[key][position:position + 3] = mutant[
location].upper()
yfpmutants
if position == 177:
yfpmutants[key][position:position + 3] = 'CTA'
yfpmutants[key] = ''.join(yfpmutants[key])
defaultMrnaCopyNumber = 1 # per cell
defaultInitationRate = 0.3 # s-1, This is the median initiation rate
listOfInitiationRates = [defaultInitationRate]
for initiationRate in listOfInitiationRates:
for mutant in yfpmutants:
outputFile = '../annotations/simulations/run8/' + \
'%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File = open(outputFile, 'w')
File.write("%0.4g\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp0[:-3])))
File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()
In [35]:
fitdatafolder = '../processeddata/simulations/'
fitdatafiles = os.listdir(fitdatafolder)
fitdatafiles = [
fitdatafolder + File for File in fitdatafiles
if File.startswith('run2_fit_stallstrength_for_cta_distance_')
]
# find the location of all leucine codons to convert leu codon serial number
# to absolute position along yfp in codon units for simulation
leupositions = dict()
leucodon_number = 1
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
if currentcodon == 'CTG':
leupositions[leucodon_number] = position / 3
leucodon_number += 1
# extracted from simulation run 1
trnaaccommodationrates = {23: 0.132824, 24: 0.398473, 26: 0.255022}
codonnames = {23: 'CTA', 24: 'CTC', 26: 'CTT'}
modelnames = {
'trafficjam': 'TJ',
'selpreterm': 'SAT',
'5primepreterm': 'CSAT',
}
for fitdatafile in fitdatafiles:
fitdata = pd.read_table(fitdatafile)
fitdata['codon'] = fitdata['mutant'].apply(lambda string: string[:3])
fitdata['pos'] = fitdata['mutant'].apply(lambda string: int(string[3:]))
fitdata = fitdata.drop(['mutant'], axis=1)
fitdata['pos'] = fitdata['pos'].apply(lambda pos: leupositions[pos])
fitdata['codon'] = fitdata['codon'].apply(
lambda codon: codonDict[codon.upper()])
fitdata = fitdata[['codon', 'pos', 'stallstrength']]
model = fitdatafile.split('_')[-1].split('.')[0]
fitdata.to_csv(
'../processeddata/simulations/run11_stallstrengthfits_' + model +
'.tsv',
sep='\t',
index=False)
fitdata['trnaaccommodationrate'] = fitdata['codon'].apply(
lambda x: trnaaccommodationrates[x])
fitdata['trnaaccommodationrate'] = fitdata[
'trnaaccommodationrate'] * fitdata['stallstrength']
for row in fitdata.iterrows():
codon = row[1]['codon']
pos = row[1]['pos']
rate = row[1]['trnaaccommodationrate']
print 'tRNA accommodation rate at {0}{1} ({2} model)\t{3:.3}s-1'.format(
codonnames[codon], int(pos), modelnames[model], rate)
In [36]:
mutation_locations = [
{
8: 'cta'
},
{
9: 'cta'
},
{
10: 'cta'
},
{
11: 'cta'
},
{
12: 'cta'
},
{
13: 'cta'
},
{
14: 'cta'
},
{
8: 'cta',
9: 'cta'
},
{
8: 'cta',
10: 'cta'
},
{
8: 'cta',
11: 'cta'
},
{
8: 'cta',
12: 'cta'
},
{
8: 'cta',
13: 'cta'
},
{
8: 'cta',
14: 'cta'
},
{
9: 'cta',
10: 'cta'
},
{
9: 'cta',
11: 'cta'
},
{
9: 'cta',
12: 'cta'
},
{
9: 'cta',
13: 'cta'
},
{
9: 'cta',
14: 'cta'
},
{
11: 'cta',
12: 'cta'
},
{
13: 'cta',
14: 'cta'
},
]
yfpmutants = dict()
for mutant in mutation_locations:
key = '_'.join(['yfp'] + [
codon + str(location) for location, codon in mutant.items()
])
yfpmutants[key] = list(yfp0)
leucodon_number = 0
for position in range(0, len(yfp0), 3):
currentcodon = yfp0[position:position + 3]
# proceed only if the codon is a Leu codon (which are all CTG in yfp0)
if currentcodon not in ['CTG']:
continue
leucodon_number += 1
for location in mutant.keys():
if leucodon_number == location:
yfpmutants[key][position:position + 3] = mutant[
location].upper()
yfpmutants[key] = ''.join(yfpmutants[key])
defaultMrnaCopyNumber = 1 # per cell
defaultInitationRate = 0.3 # s-1, This is the median initiation rate
listOfInitiationRates = [defaultInitationRate]
for initiationRate in listOfInitiationRates:
for mutant in yfpmutants:
outputFile = '../annotations/simulations/run11/' + \
'%s_initiationrate_%0.4g.csv'%(mutant, initiationRate)
num_seq = ''.join(
get_numerical_codon_sequence(yfpmutants[mutant][:-3]))
File = open(outputFile, 'w')
File.write("%0.4g\t%d\t%s\n" %
(initiationRate, defaultMrnaCopyNumber,
get_numerical_codon_sequence(yfp0[:-3])))
File.write("%0.4g\t%d\t%s\n" % (initiationRate, defaultMrnaCopyNumber,
num_seq))
File.close()