The run numbers above refer to the simulation runs in scripts/run_simulation.ipynb
.
In [1]:
%matplotlib inline
import pandas as pd
from scipy.optimize import curve_fit
import numpy as np
from scipy.stats import tsem
from IPython.display import display, Markdown
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('ticks')
od_background = 0.047 # based on well with just growth medium
yfp_background = 69 # based on well with just growth medium
fitpoints = {
'rich': {
'2016-01-14': {
'start': 120,
'end': 300
},
'2016-01-27': {
'start': 120,
'end': 300
},
'2016-02-04': {
'start': 120,
'end': 300
},
'2016-03-14': {
'start': 120,
'end': 300
},
'2016-04-19': {
'start': 120,
'end': 300
},
'2017-02-01': {
'start': 120,
'end': 300
},
'2017-02-16': {
'ser': {
'start': 120,
'end': 300
},
'leu': {
'start': 120,
'end': 300
},
},
},
'starvation': {
'2016-01-14': {
'start': 250,
'end': 600
},
'2016-01-27': {
'start': 250,
'end': 600
},
'2016-02-04': {
'start': 250,
'end': 600
},
'2016-03-14': {
'start': 250,
'end': 600
},
'2016-04-19': {
'start': 250,
'end': 600
},
'2017-02-01': {
'start': 250,
'end': 600
},
'2017-02-16': {
'ser': {
'start': 200,
'end': 400
},
'leu': {
'start': 250,
'end': 450
},
},
}
}
# find points for fitting synthesis rate during rich and starvation
def check_fit(row, growth=['rich', 'starvation']):
date = row['date']
time = row['time']
if date not in fitpoints[growth].keys():
raise KeyError
if 'limited_aa' in row:
aa = row['limited_aa']
start = fitpoints[growth][date][aa]['start']
end = fitpoints[growth][date][aa]['end']
else:
start = fitpoints[growth][date]['start']
end = fitpoints[growth][date]['end']
return (time >= start and time <= end)
# linear function used for fitting fluorescence data during starvation
def Poly1(x, a, b):
return b * x / 60 - a / 60
# find synthesis rate for single sample during rich and starvation conditions
def find_synthesis_rate(df):
# presynthesis rate
# sort by time and reset index to serial integers
df = df.sort_values(by='time').reset_index()
#excludedPoints = y["time"] < pointsForPreFit[y['date']]['start']
#startingPoint = np.argmax(y['time'][excludedPoints]) + 1
#minimumPoint = np.argmin(y['yfp/od'][pointsForPreRate]) + startingPoint
# the average over 3 points where the yfp/od hits minimum is calculated
#return np.mean(y['yfp/od'][minimumPoint-2:minimumPoint+1])
# find min pre-starv yfp/od
minidx = df['yfp/od'][df['richfit']].argmin()
# find average of yfp/od for 3 points upto min
richrate = df.loc[minidx - 2:minidx, 'yfp/od'].mean()
# fit straight line to starvation phase
BestFitParam, BestFitCOV = curve_fit(Poly1,
list(df['time'][df['starvefit']]),
list(df['yfp'][df['starvefit']]))
# slope is post rate
starverate = BestFitParam[1]
# print(df.drop('time', 1).ix[0])
return (df.drop([
'time', 'od', 'yfp', 'richfit', 'starvefit', 'yfp/od', 'index', 'date',
'plate', 'well'
], 1).ix[0].append(
pd.Series({
'richrate': richrate,
'starverate': starverate
})))
# get synthesis rate for all samples
def get_synthesis_rate(df):
df['od'] = df['od'] - od_background
df['yfp'] = df['yfp'] - yfp_background
df['yfp/od'] = df['yfp'] / df['od']
# calculate time points for rich and starvation YFP synthesis rates
df['richfit'] = df.apply(lambda row: check_fit(row, 'rich'), axis=1)
df['starvefit'] = df.apply(
lambda row: check_fit(row, 'starvation'), axis=1)
# group all data points for each well
wells = df.groupby(['date', 'plate', 'well'])
# find pre and post synthesis rates for each label
welldata = wells.apply(find_synthesis_rate).reset_index()
return welldata
# average synthesis rate over replicate wells
def get_av_synthesis_rate(df):
# groupby date and sampelname and calculate mean and stderr
groupedReplicates = df.groupby('sample')
average = groupedReplicates[['richrate', 'starverate']].agg(np.mean).rename(
columns={'richrate': 'richrate_mean',
'starverate': 'starverate_mean'})
error = groupedReplicates[['richrate', 'starverate']].agg(tsem).rename(
columns={'richrate': 'richrate_err',
'starverate': 'starverate_err'})
wellannotations = df.drop(['richrate', 'starverate'],
1).groupby('sample').first()
averagedData = pd.concat(
[average, error, wellannotations], join='inner', axis=1)
return averagedData
In [2]:
# read in raw data for each well and corresponding annotations and merge
rawdata1 = pd.read_table('../rawdata/platereader/initiation_cta_data.tsv')
annotations1 = pd.read_table(
'../rawdata/platereader/initiation_cta_annotations.tsv')
rawdata1 = rawdata1.merge(annotations1, on=['plate', 'well'], how='inner')
rawdata2 = pd.read_table('../rawdata/platereader/initiation_ctc_ctt_data.tsv')
annotations2 = pd.read_table(
'../rawdata/platereader/initiation_ctc_ctt_annotations.tsv')
rawdata2 = rawdata2.merge(annotations2, on=['plate', 'well'], how='inner')
rawdata = pd.concat([rawdata1, rawdata2])
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# group by all variants having the same initiation region
groupedReplicates = welldata[['richrate', 'initiation']].groupby('initiation')
# infer mean initiation rate for each set of initiation rate mutants
# normalize by the 'wt' mutant
inferredInitRate = groupedReplicates.agg(np.mean)
inferredInitRate = inferredInitRate / inferredInitRate['richrate'].ix['wt']
inferredInitRate.rename(
columns={'richrate': 'inferredInitiationRate'}, inplace=True)
inferredInitRate[['inferredInitiationRate']].to_csv(
'../processeddata/platereader/inferred_initiationrates_for_initiation_simulations.tsv',
sep='\t')
inferredInitRate
Out[2]:
In [3]:
# read in raw data for each well and corresponding annotations and merge
rawdata1 = pd.read_table('../rawdata/platereader/initiation_cta_data.tsv')
annotations1 = pd.read_table(
'../rawdata/platereader/initiation_cta_annotations.tsv')
rawdata1 = rawdata1.merge(annotations1, on=['plate', 'well'], how='inner')
rawdata2 = pd.read_table('../rawdata/platereader/initiation_ctc_ctt_data.tsv')
annotations2 = pd.read_table(
'../rawdata/platereader/initiation_ctc_ctt_annotations.tsv')
rawdata2 = rawdata2.merge(annotations2, on=['plate', 'well'], how='inner')
rawdata = pd.concat([rawdata1, rawdata2])
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
# select only wt RBS samples
subsetdata = sampledata[((sampledata['initiation'] == 'wt'))]
# normalize by wt RBS variant without any pause site
normindex = subsetdata['pausecodon'].apply(lambda x: pd.isnull(x))
normalizationdata = float(subsetdata.loc[normindex, 'starverate_mean'])
# select only single stall site samples
subsetdata = subsetdata[~normindex]
subsetdata = subsetdata[subsetdata['pauselocation'].apply(
lambda x: len(x.split(',')) == 1)]
# normalization
subsetdata['measuredRateNormalized'] = subsetdata[
'starverate_mean'] / normalizationdata
# define label
subsetdata['label'] = subsetdata.apply(
lambda row: row['pausecodon'].lower() + row['pauselocation'], axis=1)
subsetdata['pauselocation'] = subsetdata['pauselocation'].apply(int)
subsetdata = subsetdata.sort_values(by=['pausecodon', 'pauselocation'])
summarydata = subsetdata[['label', 'measuredRateNormalized']]
summarydata = summarydata.set_index('label')
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_for_initiation_simulations.tsv',
sep='\t')
summarydata
Out[3]:
In [4]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/doubles_data.tsv')
annotations = pd.read_table('../rawdata/platereader/doubles_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata.apply(
lambda row: row['strain_id'][:-2] + '_' + str(row['plate']), axis=1)
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
# normalize by max value within each plate (corresponding to no pause variant)
sampledata['measuredRateNormalized'] = sampledata.groupby(
'plate')['starverate_mean'].transform(lambda x: x / x.max())
subsetdata = sampledata
# select only single stall site samples
normindex = subsetdata['pausecodon'].apply(lambda x: pd.isnull(x))
subsetdata = subsetdata[~normindex]
subsetdata = subsetdata[subsetdata['pauselocation'].apply(
lambda x: len(x.split(',')) == 1)]
# define label
subsetdata['label'] = subsetdata.apply(
lambda row: row['pausecodon'].lower() + row['pauselocation'], axis=1)
subsetdata['pauselocation'] = subsetdata['pauselocation'].apply(int)
subsetdata = subsetdata.sort_values(by=['pausecodon', 'pauselocation'])
summarydata = subsetdata[['label', 'measuredRateNormalized']]
summarydata = summarydata.set_index('label')
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_for_double_simulations.tsv',
sep='\t')
summarydata
Out[4]:
In [5]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/serine_multiple_cta_data.tsv')
annotations = pd.read_table(
'../rawdata/platereader/serine_multiple_cta_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
# select only wt RBS samples
subsetdata = sampledata[((sampledata['initiation_variant'] == 'wt'))]
# normalize by wt RBS variant without any pause site
normindex = subsetdata['pausecodon'].apply(lambda x: pd.isnull(x))
normalizationdata = float(subsetdata.loc[normindex, 'starverate_mean'])
# select only single stall samples
subsetdata = subsetdata[~normindex]
subsetdata = subsetdata[subsetdata['pauselocation'].apply(lambda x: x == '5')]
# normalization
subsetdata['measuredRateNormalized'] = subsetdata[
'starverate_mean'] / normalizationdata
# define label
subsetdata['label'] = subsetdata.apply(
lambda row: row['pausecodon'].lower() + row['pauselocation'], axis=1)
subsetdata['pauselocation'] = subsetdata['pauselocation'].apply(int)
subsetdata = subsetdata.sort_values(by=['pausecodon', 'pauselocation'])
summarydata = subsetdata[['label', 'measuredRateNormalized']]
summarydata = summarydata.set_index('label')
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_for_serine_initiation_simulations.tsv',
sep='\t',
index_label='')
summarydata
Out[5]:
In [6]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/serine_multiple_cta_data.tsv')
annotations = pd.read_table(
'../rawdata/platereader/serine_multiple_cta_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
# select only serine mutants used for double mutant expt
sampledata = sampledata[sampledata['initiation_variant'].apply(
lambda x: pd.isnull(x) or x == 'wt')]
sampledata = sampledata[sampledata['limited_aa'].apply(lambda x: x == 'ser')]
# normalize by max value within each plate (corresponding to no pause variant)
sampledata['measuredRateNormalized'] = sampledata.groupby(
'plate')['starverate_mean'].transform(lambda x: x / x.max())
subsetdata = sampledata
# select only single stall site samples
normindex = subsetdata['pausecodon'].apply(lambda x: pd.isnull(x))
subsetdata = subsetdata[~normindex]
subsetdata = subsetdata[subsetdata['pauselocation'].apply(
lambda x: len(x.split(',')) == 1)]
# define label
subsetdata['label'] = subsetdata.apply(
lambda row: row['pausecodon'].lower() + row['pauselocation'], axis=1)
subsetdata['pauselocation'] = subsetdata['pauselocation'].apply(int)
subsetdata = subsetdata.sort_values(by=['pausecodon', 'pauselocation'])
summarydata = subsetdata[['label', 'measuredRateNormalized']]
summarydata = summarydata.set_index('label')
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_for_serine_double_simulations.tsv',
sep='\t',
index_label='')
summarydata
Out[6]:
In [7]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/serine_multiple_cta_data.tsv')
annotations = pd.read_table(
'../rawdata/platereader/serine_multiple_cta_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
# select only serine mutants used for double mutant expt
sampledata = sampledata[sampledata['limited_aa'].apply(lambda x: x == 'leu')]
# normalize by max value within each plate (corresponding to no pause variant)
sampledata['measuredRateNormalized'] = sampledata.groupby(
'plate')['starverate_mean'].transform(lambda x: x / x.max())
subsetdata = sampledata
# select only single stall site samples
normindex = subsetdata['pausecodon'].apply(lambda x: pd.isnull(x))
subsetdata = subsetdata[~normindex]
subsetdata = subsetdata[subsetdata['pauselocation'].apply(
lambda x: len(x.split(',')) == 1)]
# define label
subsetdata['label'] = subsetdata.apply(
lambda row: row['pausecodon'].lower() + row['pauselocation'], axis=1)
subsetdata['pauselocation'] = subsetdata['pauselocation'].apply(int)
subsetdata = subsetdata.sort_values(by=['pausecodon', 'pauselocation'])
summarydata = subsetdata[['label', 'measuredRateNormalized']]
summarydata = summarydata.set_index('label')
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_for_leucine_multiple_simulations.tsv',
sep='\t',
index_label='')
summarydata
Out[7]:
In [8]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/distance_ctc_data.tsv')
annotations = pd.read_table(
'../rawdata/platereader/distance_ctc_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
# normalize by max value (corresponding to no pause variant)
sampledata['measuredRateNormalized'] = sampledata[
'starverate_mean'] / sampledata['starverate_mean'].max()
subsetdata = sampledata
# select only single stall site samples
normindex = subsetdata['pausecodon'].apply(lambda x: pd.isnull(x))
subsetdata = subsetdata[~normindex]
subsetdata = subsetdata[subsetdata['pauselocation'].apply(
lambda x: len(x.split(',')) == 1)]
# define label
subsetdata['label'] = subsetdata.apply(
lambda row: row['pausecodon'].lower() + row['pauselocation'], axis=1)
subsetdata['pauselocation'] = subsetdata['pauselocation'].apply(int)
subsetdata = subsetdata.sort_values(by=['pausecodon', 'pauselocation'])
summarydata = subsetdata[['label', 'measuredRateNormalized']]
summarydata = summarydata.set_index('label')
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_for_distance_simulations.tsv',
sep='\t')
summarydata
Out[8]:
In [9]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/distance_cta_data.tsv')
annotations = pd.read_table(
'../rawdata/platereader/distance_cta_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
# normalize by max value (corresponding to no pause variant)
sampledata['measuredRateNormalized'] = sampledata[
'starverate_mean'] / sampledata['starverate_mean'].max()
subsetdata = sampledata
# select only single stall site samples
normindex = subsetdata['pausecodon'].apply(lambda x: pd.isnull(x))
subsetdata = subsetdata[~normindex]
subsetdata = subsetdata[subsetdata['pauselocation'].apply(
lambda x: len(x.split(',')) == 1)]
# define label
subsetdata['label'] = subsetdata.apply(
lambda row: row['pausecodon'].lower() + row['pauselocation'], axis=1)
subsetdata['pauselocation'] = subsetdata['pauselocation'].apply(int)
subsetdata = subsetdata.sort_values(by=['pausecodon', 'pauselocation'])
summarydata = subsetdata[['label', 'measuredRateNormalized']]
summarydata = summarydata.set_index('label')
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_for_cta_distance_simulations.tsv',
sep='\t')
summarydata
Out[9]:
In [2]:
# read in raw data for each well and corresponding annotations and merge
rawdata1 = pd.read_table('../rawdata/platereader/initiation_cta_data.tsv')
annotations1 = pd.read_table(
'../rawdata/platereader/initiation_cta_annotations.tsv')
rawdata1 = rawdata1.merge(annotations1, on=['plate', 'well'], how='inner')
rawdata2 = pd.read_table('../rawdata/platereader/initiation_ctc_ctt_data.tsv')
annotations2 = pd.read_table(
'../rawdata/platereader/initiation_ctc_ctt_annotations.tsv')
rawdata2 = rawdata2.merge(annotations2, on=['plate', 'well'], how='inner')
rawdata = pd.concat([rawdata1, rawdata2])
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
subsetdata = sampledata[((sampledata['initiation'].apply(
lambda x: not pd.isnull(x))))]
normalization = subsetdata.loc[subsetdata['pausecodon'].apply(
lambda x: pd.isnull(x)
), ['starverate_mean', 'starverate_err', 'initiation']].set_index('initiation')
for row in subsetdata[['pausecodon', 'pauselocation']].drop_duplicates(
keep='first').iterrows():
pausecodon = row[1]['pausecodon']
pauselocation = row[1]['pauselocation']
if pd.isnull(pausecodon): # exclude no pause control
continue
if len(pauselocation.split(',')) != 1: # exclude many pause control
continue
summarydata = subsetdata.loc[
((subsetdata['pauselocation'] == pauselocation) &
(subsetdata['pausecodon'] == pausecodon)), [
'starverate_mean', 'starverate_err', 'initiation', 'richrate_mean'
]].set_index('initiation')
normalizationdata = normalization.ix[summarydata.index]
summarydata.loc[:, 'starverate_err'] = (
summarydata.loc[:, 'starverate_err'].values /
summarydata.loc[:, 'starverate_mean'].values +
normalizationdata.loc[:, 'starverate_err'].values /
normalizationdata.loc[:, 'starverate_mean'].values)
summarydata.loc[:, 'starverate_mean'] = (
summarydata.loc[:, 'starverate_mean'] /
normalizationdata.loc[:, 'starverate_mean'])
summarydata.loc[:,
'starverate_err'] *= summarydata.loc[:,
'starverate_mean'].values
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_' + pausecodon.lower()
+ pauselocation + '_initiation_mutants.tsv',
sep='\t')
display(Markdown('**{}{}**'.format(pausecodon, pauselocation)))
display(summarydata)
In [11]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/serine_multiple_cta_data.tsv')
annotations = pd.read_table(
'../rawdata/platereader/serine_multiple_cta_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
subsetdata = sampledata[((sampledata['initiation_variant'].apply(
lambda x: not pd.isnull(x))) & (sampledata['limited_aa'].apply(
lambda x: not pd.isnull(x))))]
normalizationdata = subsetdata.loc[
subsetdata['pausecodon'].apply(lambda x: pd.isnull(x)),
['starverate_mean', 'starverate_err', 'initiation_variant']].set_index(
'initiation_variant')
pausecodons = subsetdata['pausecodon'].unique()
pausecodon = pausecodons[0]
for pauselocation in subsetdata['pauselocation'].unique():
if pauselocation != '5':
continue
summarydata = subsetdata.loc[
subsetdata['pauselocation'] == pauselocation,
['starverate_mean', 'starverate_err', 'initiation_variant']].set_index(
'initiation_variant')
summarydata.loc[:, 'starverate_err'] = (
summarydata.loc[:, 'starverate_err'].values /
summarydata.loc[:, 'starverate_mean'].values +
normalizationdata.loc[:, 'starverate_err'].values /
normalizationdata.loc[:, 'starverate_mean'].values)
summarydata.loc[:, 'starverate_mean'] = (
summarydata.loc[:, 'starverate_mean'] /
normalizationdata.loc[:, 'starverate_mean'])
summarydata.loc[:,
'starverate_err'] *= summarydata.loc[:,
'starverate_mean'].values
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_' + pausecodon.lower()
+ pauselocation + '_initiation_mutants.tsv',
sep='\t')
display(Markdown('**{}{}**'.format(pausecodon, pauselocation)))
display(summarydata)
In [12]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/serine_multiple_cta_data.tsv')
annotations = pd.read_table(
'../rawdata/platereader/serine_multiple_cta_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
# select only serine mutants used for double mutant expt
sampledata = sampledata[sampledata['limited_aa'].apply(lambda x: x == 'leu')]
# normalize by max value within each plate (corresponding to no pause variant)
norm = sampledata['starverate_mean'].max()
sampledata['starverate_err'] = sampledata['starverate_err'] / norm
sampledata['starverate_mean'] = sampledata['starverate_mean'] / norm
subsetdata = sampledata
# select only 1-4 stall samples
normindex = subsetdata['pausecodon'].apply(lambda x: pd.isnull(x))
subsetdata = subsetdata[~normindex]
subsetdata = subsetdata[~subsetdata['pauselocation'].apply(
lambda x: len(x.split(',')) > 4)]
# define label
summarydata = subsetdata[
['pausecodon', 'pauselocation', 'starverate_mean', 'starverate_err']]
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_cta_multiple_mutants.tsv',
sep='\t',
index=False)
summarydata
Out[12]:
In [13]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/doubles_data.tsv')
annotations = pd.read_table('../rawdata/platereader/doubles_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata.apply(
lambda row: row['strain_id'][:-2] + '_' + str(row['plate']), axis=1)
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
# normalize by max value within each plate (corresponding to no pause variant)
norm = sampledata.groupby('plate')['starverate_mean'].transform(
lambda x: x.max())
sampledata['starverate_err'] = sampledata['starverate_err'] / norm
sampledata['starverate_mean'] = sampledata['starverate_mean'] / norm
for pausecodon in sampledata['pausecodon'].unique():
if pausecodon not in ['CTC', 'CTT', 'CTA']:
continue
subsetdata = sampledata
# select only single stall site samples
subsetdata = subsetdata[subsetdata['pausecodon'] == pausecodon]
normindex = subsetdata['pausecodon'].apply(lambda x: pd.isnull(x))
subsetdata = subsetdata[~normindex]
subsetdata = subsetdata[~subsetdata['pauselocation'].apply(
lambda x: len(x.split(',')) > 4)]
summarydata = subsetdata[
['pausecodon', 'pauselocation', 'starverate_mean', 'starverate_err']]
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_{}_double_mutants.tsv'.
format(pausecodon.lower()),
sep='\t',
index=False)
display(Markdown('**{}**'.format(pausecodon)))
display(summarydata)
In [6]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/serine_multiple_cta_data.tsv')
annotations = pd.read_table(
'../rawdata/platereader/serine_multiple_cta_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
# select only serine mutants used for double mutant expt
sampledata = sampledata[sampledata['initiation_variant'].apply(
lambda x: pd.isnull(x) or x == 'wt')]
sampledata = sampledata[sampledata['limited_aa'].apply(lambda x: x == 'ser')]
# normalize by max value within each plate (corresponding to no pause variant)
norm = sampledata['starverate_mean'].max()
sampledata['starverate_err'] = sampledata['starverate_err'] / norm
sampledata['starverate_mean'] = sampledata['starverate_mean'] / norm
for pausecodon in sampledata['pausecodon'].unique():
if pausecodon not in ['TCG']:
continue
subsetdata = sampledata
# select only single stall site samples
subsetdata = subsetdata[subsetdata['pausecodon'] == pausecodon]
normindex = subsetdata['pausecodon'].apply(lambda x: pd.isnull(x))
subsetdata = subsetdata[~normindex]
subsetdata = subsetdata[~subsetdata['pauselocation'].apply(
lambda x: len(x.split(',')) > 4)]
# TCG8 was not used for double mutant expt
subsetdata = subsetdata[subsetdata['pauselocation'].apply(
lambda x: x != '8')]
summarydata = subsetdata[
['pausecodon', 'pauselocation', 'starverate_mean', 'starverate_err']]
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_{}_double_mutants.tsv'.
format(pausecodon.lower()),
sep='\t',
index=False)
display(Markdown('**{}**'.format(pausecodon)))
display(summarydata)
In [15]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/distance_cta_data.tsv')
annotations = pd.read_table(
'../rawdata/platereader/distance_cta_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
# normalize by max value within each plate (corresponding to no pause variant)
norm = sampledata['starverate_mean'].max()
sampledata['starverate_err'] = sampledata['starverate_err'] / norm
sampledata['starverate_mean'] = sampledata['starverate_mean'] / norm
for pausecodon in sampledata['pausecodon'].unique():
if pausecodon not in ['CTA']:
continue
subsetdata = sampledata
# select only single stall site samples
subsetdata = subsetdata[subsetdata['pausecodon'] == pausecodon]
normindex = subsetdata['pausecodon'].apply(lambda x: pd.isnull(x))
subsetdata = subsetdata[~normindex]
subsetdata = subsetdata[~subsetdata['pauselocation'].apply(
lambda x: len(x.split(',')) > 4)]
subsetdata = subsetdata[~subsetdata['pauselocation'].apply(
lambda x: '9,' in x)]
summarydata = subsetdata[
['pausecodon', 'pauselocation', 'starverate_mean', 'starverate_err']]
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_{}_distance_mutants.tsv'.
format(pausecodon.lower()),
sep='\t',
index=False)
display(Markdown('**{}**'.format(pausecodon)))
display(summarydata)
In [16]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/distance_ctc_data.tsv')
annotations = pd.read_table(
'../rawdata/platereader/distance_ctc_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
# normalize by max value within each plate (corresponding to no pause variant)
norm = sampledata['starverate_mean'].max()
sampledata['starverate_err'] = sampledata['starverate_err'] / norm
sampledata['starverate_mean'] = sampledata['starverate_mean'] / norm
for pausecodon in sampledata['pausecodon'].unique():
if pausecodon not in ['CTC']:
continue
subsetdata = sampledata
# select only single stall site samples
subsetdata = subsetdata[subsetdata['pausecodon'] == pausecodon]
normindex = subsetdata['pausecodon'].apply(lambda x: pd.isnull(x))
subsetdata = subsetdata[~normindex]
subsetdata = subsetdata[~subsetdata['pauselocation'].apply(
lambda x: len(x.split(',')) > 4)]
subsetdata = subsetdata[~subsetdata['pauselocation'].apply(
lambda x: '13' in x or '14' in x)]
summarydata = subsetdata[
['pausecodon', 'pauselocation', 'starverate_mean', 'starverate_err']]
summarydata.to_csv(
'../processeddata/platereader/measured_yfprates_{}_distance_mutants.tsv'.
format(pausecodon.lower()),
sep='\t',
index=False)
display(Markdown('**{}**'.format(pausecodon)))
display(summarydata)
In [87]:
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/chrom_vs_plasmid_data.tsv')
annotations = pd.read_table(
'../rawdata/platereader/chrom_vs_plasmid_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata['strain_id'].apply(lambda x: x[:-2])
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
chromdata = sampledata[sampledata['gene_location'] == 'chromosome']
#normalize
norm = float(chromdata['starverate_mean'].max())
chromdata['starverate_mean'] = chromdata['starverate_mean'] / norm
chromdata['starverate_err'] = chromdata['starverate_err'] / norm
# read in raw data for each well and corresponding annotations and merge
rawdata = pd.read_table('../rawdata/platereader/doubles_data.tsv')
annotations = pd.read_table('../rawdata/platereader/doubles_annotations.tsv')
rawdata = rawdata.merge(annotations, on=['plate', 'well'], how='inner')
# calculate the yfp synthesis rate for each well
welldata = get_synthesis_rate(rawdata)
# define sample column for averaging over replicates
# this is specific for each experiment
welldata['sample'] = welldata.apply(
lambda row: row['strain_id'][:-2] + '_' + str(row['plate']), axis=1)
# average synthesis rate over replicates
sampledata = get_av_synthesis_rate(welldata).reset_index()
plasmiddata = sampledata[sampledata['pausecodon'] == 'CTA']
plasmiddata = sampledata[sampledata['plate'] == 1]
plasmiddata = plasmiddata.merge(
chromdata[['pauselocation']], how='right', on='pauselocation')
#normalize
norm = float(plasmiddata['starverate_mean'].max())
plasmiddata['starverate_mean'] = plasmiddata['starverate_mean'] / norm
plasmiddata['starverate_err'] = plasmiddata['starverate_err'] / norm
# normalize rich for easy comparison
norm = float(plasmiddata['richrate_mean'].max())
chromdata['richrate_mean'] = chromdata['richrate_mean'] / norm
chromdata['richrate_err'] = chromdata['richrate_err'] / norm
plasmiddata['richrate_mean'] = plasmiddata['richrate_mean'] / norm
plasmiddata['richrate_err'] = plasmiddata['richrate_err'] / norm
plasmiddata = plasmiddata.set_index('pauselocation')
chromdata = chromdata.set_index('pauselocation')
plasmiddata = plasmiddata.ix[chromdata.index]
In [88]:
fig = plt.figure()
fig.set_size_inches([3, 2])
ax = fig.add_subplot(1, 1, 1)
sns.despine(offset=5)
subset = plasmiddata
ax.errorbar(
np.arange(len(subset)) - 0.1,
subset['richrate_mean'],
subset['richrate_err'],
color='black',
capsize=10,
elinewidth=1,
linestyle='None',
marker='^',
markersize=4,
label='plasmid')
subset = chromdata
ax.errorbar(
np.arange(len(subset)) + 0.1,
subset['richrate_mean'],
subset['richrate_err'],
color='green',
capsize=10,
elinewidth=1,
linestyle='None',
marker='o',
markersize=4,
label='chromosome')
xticklabels = map(
lambda x: str(x).replace(',', ' + ').replace('nan', 'no-stall'),
subset.index)
xticklabels[1] = '7-stalls'
ax.set_ylim(bottom=-0.1, top=1.1)
ax.set_xticks(np.arange(len(subset)))
ax.set_xticklabels(xticklabels, rotation=45, ha='right')
ax.set(xlabel='Location of stall sites',
ylabel='YFP synthesis rate (a.u.)\n(Leu rich)')
ax.legend(loc=2, bbox_to_anchor=(1.2, 1))
fig.savefig('../figures/reviewer_response_fig1.svg')
In [90]:
fig = plt.figure()
fig.set_size_inches([3, 2])
ax = fig.add_subplot(1, 1, 1)
sns.despine(offset=5)
subset = plasmiddata
ax.errorbar(
np.arange(len(subset)) - 0.1,
subset['starverate_mean'],
subset['starverate_err'],
color='black',
capsize=10,
elinewidth=1,
linestyle='None',
marker='^',
markersize=4,
label='plasmid')
subset = chromdata
ax.errorbar(
np.arange(len(subset)) + 0.1,
subset['starverate_mean'],
subset['starverate_err'],
color='green',
capsize=10,
elinewidth=1,
linestyle='None',
marker='o',
markersize=4,
label='chromosome')
xticklabels = map(
lambda x: str(x).replace(',', ' + ').replace('nan', 'no-stall'),
subset.index)
xticklabels[1] = '7-stalls'
ax.set_ylim(bottom=-0.1, top=1.1)
ax.set_xticks(np.arange(len(subset)))
ax.set_xticklabels(xticklabels, rotation=45, ha='right')
ax.set(xlabel='Location of stall sites',
ylabel='YFP synthesis rate (w.r.t no-stall)\n(Leu starvation)')
ax.legend(loc=2, bbox_to_anchor=(1.2, 1))
fig.savefig('../figures/reviewer_response_fig2.svg')