In [76]:
import cobra
from cobra.manipulation import remove_genes
from cobra import Model, Reaction, Metabolite
from cobra.flux_analysis import \
single_gene_deletion, single_reaction_deletion, \
double_gene_deletion, double_reaction_deletion
import math
import pandas as pd
import sys
import matplotlib.pyplot as plt
# sys.path.append('/home/user/notebook/erol/TB Project/Modules')
# from model_troubleshooting import *
from cobra.flux_analysis import production_envelope
from cobra.flux_analysis import sample
import matplotlib
import numpy as np
This notebook was run using cobra version 0.5.11
In [77]:
print cobra.__version__
In [78]:
model_iEK = cobra.io.load_json_model("iEK1020.json")
In [79]:
print "# of reactions", len(model_iEK.reactions)
print "# of metabolites", len(model_iEK.metabolites)
print "# of genes", len(model_iEK.genes)
In [80]:
# for react in model_iEK_before.reactions:
# if react not in model_iEK.reactions:
# print react, react.reaction, react.gene_reaction_rule
In [81]:
model_iEK.id
Out[81]:
In [82]:
model_iEK.medium
Out[82]:
In [83]:
model_iEK.optimize().f
Out[83]:
In [84]:
# print len(cobra.manipulation.validate.check_mass_balance(model_iEK).keys())
mass_bal_metabs, mass_bal_reacts = [], []
for react_id, mass_bal in cobra.manipulation.validate.check_mass_balance(model_iEK).iteritems():
# ------ Don't count exchange reactions and biomass functions -----
if str(react_id)[:3]!="EX_" and "biomass" not in str(react_id):
rxn_metabs = [str(x.id) for x in model_iEK.reactions.get_by_id(react_id.id).metabolites]
mass_bal_metabs.extend(rxn_metabs)
mass_bal_reacts.append(str(react_id))
# print str(react_id), mass_bal, react_id.reaction, react_id.gene_reaction_rule
print len(mass_bal_reacts)
In [85]:
print mass_bal_reacts
Most of these are in the two FAD pathways which are independent of the rest of the model. Furthermore, these reactions do contain metabolites that lack both names, IDs, and formulas (I just need to spend a couple hours adding formulas to the metabolites). Not looking at these, only 9 reactions are unbalanced in the model.
In [86]:
# Number of reactions not mass balanced. 4 of these have artifical metabolites (i.e. such as 0.0001 cobalamin) and
# thus cannot be mass balanced - perhaps I should remove said artifical instances.
print len([x for x in mass_bal_reacts if "FAD_" not in x])
In [87]:
blocked_reacts = cobra.flux_analysis.variability.find_blocked_reactions(model_iEK,
reaction_list=None,
zero_cutoff=1e-09,
open_exchanges=True)
print len(blocked_reacts)
In [88]:
blocked_reacts
Out[88]:
In [89]:
griffin_file = "../../Dataframes/Table_S2.xlsx"
griffin_excel = pd.read_excel(griffin_file, sheetname='supp table 2',skiprows = 9,keep_default_na=False)
In [90]:
griffin_excel.head()
Out[90]:
In [91]:
def griffin_essen(model_tb, dic_return):
model = model_tb.copy()
fal_pos_dic, fal_neg_dic = {}, {}
true_neg_dic, true_pos_dic = {}, {}
growth_rates = single_gene_deletion(model)
print "Optimal growth", model.optimize().f
print model.summary()
orig_growth_thres = 0.25*model.optimize().f
print "Threshold growth", orig_growth_thres
true_pos, true_neg, fal_pos, fal_neg = 0, 0, 0, 0
# set grif essen threshold -- iSM810 paper uses 0.1 as "confident essential"
grif_thres = 0.1
print model.reactions.get_by_id("Kt3r").reaction
for index, row in griffin_excel.iterrows():
gene = str(row["Locus"])
try:
growth = growth_rates.loc[gene, "flux"]
# True negative - predicts that it grows (not essential) and is correct.
if float(row["p value"]) > grif_thres and growth > orig_growth_thres:
true_neg = true_neg + 1
true_neg_dic.update({gene: [growth, float(row["p value"])]})
# False negative - predicts that it grows (not essential) when it actually essential
if float(row["p value"]) < grif_thres and growth > orig_growth_thres:
fal_neg = fal_neg + 1
fal_neg_dic.update({gene: [growth, float(row["p value"])]})
if float(row["p value"]) < grif_thres and growth < orig_growth_thres:
true_pos = true_pos + 1
true_pos_dic.update({gene: [growth, float(row["p value"])]})
if float(row["p value"]) > grif_thres and growth < orig_growth_thres:
fal_pos = fal_pos + 1
fal_pos_dic.update({gene: [growth, float(row["p value"])]})
except:
pass
# ---Analyze and Print results ---
print "TP - TN - FP - FN"
print true_pos, true_neg, fal_pos, fal_neg
# percent of correct predictions
perc_correct = (true_pos+true_neg)/(true_pos+true_neg+fal_pos+float(fal_neg))
print "percent correct: ", perc_correct
# mcc calculation
MCC_root = math.sqrt((true_pos + fal_pos)*(true_pos + fal_neg)*(true_neg + fal_pos)*(true_neg + fal_neg))
MCC = (true_pos*true_neg - fal_pos*fal_neg)/MCC_root
print "Matthew Correlation Coefficient", MCC
if dic_return == "Yes":
return fal_neg_dic, fal_pos_dic
elif dic_return == "Yes both":
return fal_neg_dic, fal_pos_dic, true_neg_dic, true_pos_dic
Change media
In [92]:
griffin_media = {
"EX_h": 1000, # hydrogen
"EX_h2o": 1000, # water
"EX_o2": 20.0, # oxygen
"EX_asn_L": 1.0, # asparagine
"EX_nh4": 10.0, # ammonium
"EX_cit":5.0, # citrate
"EX_etoh": 5.0, # ethanol
"EX_ca2": 1000.0, # calcium for CaCl2
"EX_cl": 1000.0, # chloride for CaCl2
"EX_mg2": 1000.0, # mg for MgSO4
"EX_so4": 1000, # so4 for MgSO4
"EX_fe3":5.0, # fe3 for ferric
"EX_glyc":5.0, # glycerol
"EX_pi":1.0, # phosphate
"EX_chsterol":5.0, # cholesterol
}
In [93]:
model_iEK.medium = griffin_media
Calculate the Matthews correlation coefficient. It should be around 0.532.
In [94]:
FN_dic, FP_dic, TN_dic, TP_dic = griffin_essen(model_iEK, "Yes both")
In [95]:
%matplotlib inline
matplotlib.style.use('seaborn-darkgrid')
mcc = (0.27, 0.51, 0.52, 0.52, 0.53)
N = 5
ind = np.arange(N) # the x locations for the groups
width = .45
fig, ax = plt.subplots(figsize=(8, 5))
colors = ['yellowgreen','yellowgreen','yellowgreen','yellowgreen','lightskyblue']
rects1 = ax.bar(ind, mcc, width,align='center', color=colors)
# ---- Add some text for labels, title and axes ticks
ax.set_ylabel('Matthews Correlation Coefficient')
ax.set_xlabel('Metabolic Model')
ax.set_title('Gene Essentiality Prediction Comparisons')
ax.set_xticks(ind)
ax.set_xticklabels(('iNJ661', 'GSMN-TB1.1', 'sMtb', 'iSM810', 'iEK1020'))
# ----- Add values on top of barchart
rects = ax.patches
# Now make some labels
# labels = ["label%d" % i for i in xrange(len(rects))]
labels = mcc
for rect, label in zip(rects, labels):
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2, height+0.01, label, ha='center',
va='bottom',fontsize=13)
fig.savefig("../../Figures/gene_essentiality_comparisons.svg", bbox_inches='tight')
In [96]:
inVitro_drugTesting = {
'EX_asn_L': 1,
'EX_ca2': 1000.0,
'EX_cit': 1.0,
'EX_cl': 1000.0,
'EX_etoh': 1,
'EX_fe3': 5.0,
'EX_glyc': 1.0,
'EX_h': 1000,
'EX_mg2': 1000.0,
'EX_mobd': 1000,
'EX_nh4': 20.0,
'EX_o2': 20.0,
'EX_pi': 1000,
'EX_so4': 1000,
}
inVivo_conditions = {
'EX_ala_L': 1,
'EX_asn_L': 5,
'EX_asp_L': 5,
'EX_urea': .1,
'EX_glu_L': 1,
'EX_gln_L': 1,
'EX_ca2': 1000,
'EX_cl': 1000,
'EX_co2': 1000,
'EX_cobalt2': 1000,
'EX_cu2': 1000,
'EX_fe3': 5,
'EX_h': 1000,
'EX_hdca': 10,
'EX_k': 1000,
'EX_mg2': 1000,
'EX_mobd': 1000,
'EX_na1': 1000,
'EX_no3': 1.5,
'EX_o2': .5, # - hypoxia
'EX_pi': 1000,
'EX_ppa': 20,
'EX_so4': 1000,
'EX_ocdca': 10,
'EX_ttdca': 10,
"EX_nodcoa": 10,
'EX_chsterol': 10,
"EX_octscoa": 10,
}
In [97]:
model_iEK.medium = inVitro_drugTesting
solution = model_iEK.optimize()
pfba_norm_solution = cobra.flux_analysis.pfba(model_iEK)
print "Optimal growth: ", pfba_norm_solution.fluxes["biomass"]
print "ICL FBA flux:", solution.fluxes["ICL"]
print "ICL pFBA flux:", pfba_norm_solution.fluxes["ICL"]
print "Total model flux:", pfba_norm_solution.fluxes.sum()
fva_norm = cobra.flux_analysis.flux_variability_analysis(model_iEK,
fraction_of_optimum=.95,
solver=None)
fva_norm = pd.DataFrame.from_dict(fva_norm).T.round(5)
print "----"
print model_iEK.summary()
print "----"
print "Max ICL flux variability:", fva_norm["ICL"]["maximum"]
In [98]:
model_iEK.medium = inVivo_conditions
solution = model_iEK.optimize()
pfba_hypox_solution = cobra.flux_analysis.pfba(model_iEK)
print "Optimal growth: ", pfba_hypox_solution.fluxes["biomass"]
print "ICL FBA flux:", solution.fluxes["ICL"]
print "ICL pFBA flux:", pfba_hypox_solution.fluxes["ICL"]
print "Total model flux:", pfba_hypox_solution.fluxes.sum()
fva_hypoxia = cobra.flux_analysis.flux_variability_analysis(model_iEK,
fraction_of_optimum=.95,
solver=None)
fva_hypox = pd.DataFrame.from_dict(fva_hypoxia).T.round(5)
print "----"
print model_iEK.summary()
print "----"
print "Max ICL flux variability:", fva_hypox["ICL"]["maximum"]
In [99]:
reacts_of_interest = ['EX_succ', 'EX_ac', 'EX_nh4', 'ICL', '2MCS', 'ICDHy','KGD2', 'ENO', 'PEPCK_re', 'ATPS4r']
for react in reacts_of_interest:
print react
print "pFBA:", pfba_norm_solution.fluxes[react] ,pfba_hypox_solution.fluxes[react]
print "FVA max:", fva_norm[react]["maximum"], fva_hypox[react]["maximum"]
print "FVA min:", fva_norm[react]["minimum"], fva_hypox[react]["minimum"]
print "-----"
in vitro conditions
In [111]:
model_iEK.medium = inVitro_drugTesting
pfba_solution = cobra.flux_analysis.pfba(model_iEK)
opt_value = pfba_solution["biomass"]
# -- constrain solution space to 0.95 of optimum --
with model_iEK:
print model_iEK.reactions.get_by_id("biomass").lower_bound
model_iEK.reactions.get_by_id("biomass").lower_bound = opt_value*.95
print model_iEK.reactions.get_by_id("biomass").lower_bound
%time inVitro_samples = sample(model_iEK, 10000, processes=4)
print inVitro_samples.head()
in vivo conditions
In [112]:
model_iEK.medium = inVivo_conditions
pfba_solution = cobra.flux_analysis.pfba(model_iEK)
opt_value = pfba_solution["biomass"]
# -- constrain solution space to 0.95 of optimum --
with model_iEK:
print model_iEK.reactions.get_by_id("biomass").lower_bound
model_iEK.reactions.get_by_id("biomass").lower_bound = opt_value*.95
print model_iEK.reactions.get_by_id("biomass").lower_bound
%time inVivo_samples = sample(model_iEK, 10000, processes=4)
print inVivo_samples.head()
In [113]:
%matplotlib inline
plt.style.use('seaborn-white')
In [114]:
for react in reacts_of_interest:
inVitro_samples[react].hist(bins=80)
inVivo_samples[react].hist(bins=80)
plt.title(react)
plt.ylabel("sample frequency")
plt.xlabel("flux")
plt.show()
In [115]:
def plot_box_plot(samples1, samples2, name1, name2, rxn, title):
box_1 = samples1[rxn].copy()
box_1.name = name1
box_2 = samples2[rxn].copy()
box_2.name = name2
box_df = pd.concat([box_1, box_2], axis=1)
color = dict(boxes='k', whiskers='k', medians='r', caps='Gray')
box_df.plot.box(sym='', title=title, color=color)
plt.ylabel("Flux")
# save_fig_name = rxn+"_sampling_boxplot.svg"
# plt.savefig(save_fig_name)
In [116]:
for react in reacts_of_interest:
plot_box_plot(inVitro_samples, inVivo_samples,
"in vitro media", "in vivo media",
react,
react)
Prepare iEK1020 reaction and metabolite sheets. The gene sheet with references was manually generated.
In [117]:
model_iEK = cobra.io.load_json_model("iEK1020.json")
print model_iEK.medium
react_dict = []
for react in model_iEK.reactions:
react_dict.append({'Reaction ID': str(react.id),
'Reaction Name': str(react.name),
'Subsystem': str(react.subsystem),
'Reaction Formula': str(react.reaction),
'Lower Bound': str(react.lower_bound),
'Upper Bound': str(react.upper_bound),
'Gene Reaction Rule': str(react.gene_reaction_rule)
})
react_df = pd.DataFrame(react_dict)
react_df = react_df[["Reaction ID", 'Reaction Name', 'Subsystem', 'Reaction Formula',
'Lower Bound', 'Upper Bound', 'Gene Reaction Rule' ]]
metab_dict = []
for metab in model_iEK.metabolites:
metab_dict.append({'Metabolite ID': str(metab.id),
'Metabolite Name': str(metab.name),
'Metabolite Formula': str(metab.formula),
})
metab_df = pd.DataFrame(metab_dict)
metab_df = metab_df[["Metabolite ID", 'Metabolite Name', 'Metabolite Formula']]
In [118]:
model_iEK.id
Out[118]:
Prepare Griffin Essentiality sheet.
In [119]:
gene_essential_dic = []
for gene in model_iEK.genes:
g = str(gene)
if g in FN_dic.keys():
ko_growth_rate = FN_dic[g][0]
griff_p_val = FN_dic[g][1]
error_type = "False Negative"
if g in FP_dic.keys():
ko_growth_rate = FP_dic[g][0]
griff_p_val = FP_dic[g][1]
error_type = "False Positive"
if g in TN_dic.keys():
ko_growth_rate = TN_dic[g][0]
griff_p_val = TN_dic[g][1]
error_type = "True Negative"
if g in TP_dic.keys():
ko_growth_rate = TP_dic[g][0]
griff_p_val = TP_dic[g][1]
error_type = "True Positive"
gene_essential_dic.append({"Gene": g,
"KO Growth Rate": ko_growth_rate,
"Griffin Essentiality P-Value": griff_p_val,
"Error Type": error_type
})
gene_essen_df = pd.DataFrame(gene_essential_dic)
gene_essen_df = gene_essen_df[["Gene", "KO Growth Rate", "Griffin Essentiality P-Value", "Error Type"]]
gene_essen_df.head()
Out[119]:
Prepare media conditions as JSON files.
In [120]:
media_conditions = [model_iEK.medium, griffin_media, inVitro_drugTesting, inVivo_conditions]
media_df = pd.DataFrame(media_conditions)
media_df = media_df.transpose()
media_df.columns = ["Middlebrook m7H10 (Acetate C-source)",
"Griffin Media (Gene Essentiality)",
"Lowenstein-Jensen Media (Drug-Testing)",
"Physiological Media (in vivo modeling)"]
media_df.head()
Out[120]:
Write everything to excel file.
In [121]:
writer = pd.ExcelWriter('iEK1020_supplementary.xlsx')
react_df.to_excel(writer, sheet_name='Reactions', index=False)
metab_df.to_excel(writer, sheet_name='Metabolites', index=False)
gene_essen_df.to_excel(writer, sheet_name="Gene Essentiality", index=False)
media_df.to_excel(writer, sheet_name="Media Conditions", index=False)
writer.save()