In [76]:

    
import cobra
from cobra.manipulation import remove_genes
from cobra import Model, Reaction, Metabolite
from cobra.flux_analysis import \
    single_gene_deletion, single_reaction_deletion, \
    double_gene_deletion, double_reaction_deletion
import math
import pandas as pd
import sys
import matplotlib.pyplot as plt
# sys.path.append('/home/user/notebook/erol/TB Project/Modules')
# from model_troubleshooting import *
from cobra.flux_analysis import production_envelope
from cobra.flux_analysis import sample
import matplotlib
import numpy as np

This notebook was run using cobra version 0.5.11



In [77]:

    
print cobra.__version__

What you need to run this notebook...

iEK1019 in json format and located in the same folder as this notebook.
Griffin Essentiality data

Load iEK1020

Although the model is far from perfect, we believe that iEK1019 is a step in the right direction.



In [78]:

    
model_iEK = cobra.io.load_json_model("iEK1020.json")



In [79]:

    
print "# of reactions", len(model_iEK.reactions)
print "# of metabolites", len(model_iEK.metabolites)
print "# of genes", len(model_iEK.genes)









    



# of reactions 1211
# of metabolites 998
# of genes 1020



In [80]:

    
# for react in model_iEK_before.reactions:
#     if react not in model_iEK.reactions:
#         print react, react.reaction, react.gene_reaction_rule



In [81]:

    
model_iEK.id









    Out[81]:





u'iEK1020'

Check default media condition

Should be initalized to approximate Middlebrook 7H10 with Acetate as the carbon source as described in Eoh and Rhee; PNAS 2014. Although not used in this study, m7H10 with acetate has been used in numerous papers and thus provides a good media intialization.



In [82]:

    
model_iEK.medium









    Out[82]:





{u'EX_ac': 10,
 u'EX_btn': 1,
 u'EX_ca2': 1000.0,
 u'EX_cit': 1.0,
 u'EX_cl': 1000.0,
 u'EX_co2': 1000,
 u'EX_cu2': 1000.0,
 u'EX_fe2': 1000.0,
 u'EX_fe3': 1000.0,
 u'EX_glu_L': 1.0,
 u'EX_h': 1000,
 u'EX_h2co3': 1000,
 u'EX_k': 1000,
 u'EX_mg2': 1000.0,
 u'EX_mobd': 1000,
 u'EX_na1': 1000,
 u'EX_nh4': 20,
 u'EX_o2': 20,
 u'EX_pi': 1000,
 u'EX_pydxn': 1,
 u'EX_so4': 1000}

Check FBA solution

Growth should be about 0.017 mmol/g DW/ hr



In [83]:

    
model_iEK.optimize().f









    Out[83]:





0.01692413778583187

Check Mass Balance

Exchange reactions are taken out.
Biomass reactions are taken out.



In [84]:

    
# print len(cobra.manipulation.validate.check_mass_balance(model_iEK).keys())
mass_bal_metabs, mass_bal_reacts = [], []
for react_id, mass_bal in cobra.manipulation.validate.check_mass_balance(model_iEK).iteritems():
    
    # ------ Don't count exchange reactions and biomass functions -----
    if str(react_id)[:3]!="EX_" and "biomass" not in str(react_id):
        rxn_metabs = [str(x.id) for x in model_iEK.reactions.get_by_id(react_id.id).metabolites]
        mass_bal_metabs.extend(rxn_metabs)
        mass_bal_reacts.append(str(react_id))
        # print str(react_id), mass_bal, react_id.reaction, react_id.gene_reaction_rule

print len(mass_bal_reacts)



In [85]:

    
print mass_bal_reacts









    



['FAD_9', 'FAD_10', 'FAD_11', 'FAD_12', 'FAD_13', 'NMO', 'FAD_14', 'FAD_15', 'FAD_16', 'MBTA1', 'FAD_19', 'FAD_20', 'FAD_21', 'FAD_22', 'FAD_24', 'FAD_25', 'FAD_26', 'FAD_27', 'MME', 'FAD_1', 'FAD_2', 'FAD_3', 'MMM2r', 'FAD_4', 'PRCOA1', 'FAD_5', 'VIUB', 'FAD_6', 'FADD3', 'FAD_7', 'PKS121', 'FAD_23', 'FAD_8', 'FADD17']

Most of these are in the two FAD pathways which are independent of the rest of the model. Furthermore, these reactions do contain metabolites that lack both names, IDs, and formulas (I just need to spend a couple hours adding formulas to the metabolites). Not looking at these, only 9 reactions are unbalanced in the model.



In [86]:

    
# Number of reactions not mass balanced. 4 of these have artifical metabolites (i.e. such as 0.0001 cobalamin) and

# thus cannot be mass balanced - perhaps I should remove said artifical instances.

print len([x for x in mass_bal_reacts if "FAD_" not in x])

Look at Blocked Reactions

Note that it's ok for many reactions to be blocked because the pathway may be incomplete or the ending metabolite has no way to leave the system.



In [87]:

    
blocked_reacts = cobra.flux_analysis.variability.find_blocked_reactions(model_iEK, 
                                                                        reaction_list=None, 
                                                                        zero_cutoff=1e-09, 
                                                                        open_exchanges=True)
print len(blocked_reacts)



In [88]:

    
blocked_reacts









    Out[88]:





['2O4EH',
 '2PLS',
 '5DGLCNR',
 'ACPS1',
 'ADCPS2',
 'ADMDCr',
 'AGMT',
 'AH7PK',
 'AHCYSNS',
 'ALATRS',
 'AMAOTr',
 'AOXSr',
 'ARABT',
 'ARGDC',
 'ARGTRS',
 'ASNTRS',
 'ASPTRS',
 'BACCL',
 'BETALDHx',
 'BH7PK',
 'BTNt',
 'CBIAT',
 'CBL2OR',
 'CBLAT',
 'CHOLD',
 'CMLDC',
 'COB2',
 'CPC6R',
 'CPPPGO2',
 'CRNabc',
 'CYANST',
 'CYSK2_I',
 'CYSTRS',
 'Clt',
 'DBTSr',
 'DCPDP',
 'DCYTD',
 'DGC3D',
 'DGK1',
 'DMHDRFS',
 'DURIK1',
 'DURIPP',
 'ERGBSYN',
 'EX_arab_L',
 'EX_atp',
 'EX_btn',
 'EX_ca2',
 'EX_chol',
 'EX_cl',
 'EX_crn',
 'EX_cu2',
 'EX_fe2',
 'EX_glyb',
 'EX_gua',
 'EX_k',
 'EX_malthp',
 'EX_mg2',
 'EX_mn2',
 'EX_mobd',
 'EX_na1',
 'EX_ni2',
 'EX_no',
 'EX_pydxn',
 'EX_tsul',
 'FAH4',
 'FASm3001',
 'FCLPA',
 'FMETTRS',
 'G6PDH',
 'GF4GL_0',
 'GF4GL_1',
 'GLNSP1',
 'GLNSP2',
 'GLNSP3',
 'GLNTRS',
 'GLUTRR',
 'GLYALDDr',
 'GLYBabc',
 'GLYCK',
 'GLYTRS',
 'HISTRS',
 'HOAS',
 'HOPNTAL',
 'ILETRS',
 'LAT',
 'LEUTRS',
 'LPFPLT',
 'LPLIPAL2E180',
 'LPPGS',
 'LYSTRS',
 'MALTHPabc',
 'MANT1',
 'MBTA3',
 'MDFDH',
 'METTRS',
 'MGt5',
 'MHPGLUT',
 'MNDNS1',
 'MNDNS2',
 'MOBA',
 'MOEA1',
 'MPML',
 'MPTAT',
 'MSHOXH',
 'MTAN',
 'MTAP',
 'NNAM',
 'NO',
 'NOt',
 'NTD1',
 'NTD3',
 'NTD6',
 'NTD8',
 'NTPTP1',
 'Ni2t',
 'ORNDC',
 'PDBL_1',
 'PDBL_2',
 'PDBL_3',
 'PDBL_4',
 'PDX5PO',
 'PGLYCP',
 'PHBADA',
 'PHBADB',
 'PHBADC',
 'PHBADD',
 'PHETRS',
 'PNCDC',
 'PPTGS',
 'PROTRS',
 'PUNP4',
 'PYAM5PO',
 'PYDXNO',
 'PYDXNtr',
 'PYDXO',
 'ROSFE',
 'S7PI',
 'SELCYSS',
 'SERTRS',
 'SERTRS2',
 'SHCHD3',
 'SPMS',
 'SPODM',
 'SR1',
 'TBSNLS1',
 'TBSNLS2',
 'TBSNLS3',
 'TBSYNLDPS',
 'THFGLUS',
 'THRTRS',
 'TMDPP',
 'TMHAS5',
 'TMHAS6',
 'TRIATS',
 'TRPTRS',
 'TSULabc',
 'TYRTRS',
 'UDPGD',
 'UPPDC2',
 'VALTRS',
 'YUMPS']

Gene Essentiality

In order to run the following gene essentiality predictions,



In [89]:

    
griffin_file = "../../Dataframes/Table_S2.xlsx"
griffin_excel = pd.read_excel(griffin_file,  sheetname='supp table 2',skiprows = 9,keep_default_na=False)



In [90]:

    
griffin_excel.head()









    Out[90]:







  
    
      
      Locus
      Gene Name
      Totals TA's
      Total Insertions
      Unoccupied Sites
      Unoccupied Sites.1
      p value
      (Sassetti et al 2003)
    
  
  
    
      0
      Rv0001
      dnaA
      32
      1
      4.365
      31
      0
      essential
    
    
      1
      Rv0002
      dnaN
      31
      0
      4.319
      31
      0
      no-data
    
    
      2
      Rv0003
      recF
      35
      11
      4.495
      7
      0.13067
      non-essential
    
    
      3
      Rv0004
      -
      8
      2
      2.353
      6
      0.06177
      non-essential
    
    
      4
      Rv0005
      gyrB
      43
      3
      4.794
      40
      0
      essential



In [91]:

    
def griffin_essen(model_tb, dic_return):
    model = model_tb.copy()
    
    fal_pos_dic, fal_neg_dic = {}, {}
    true_neg_dic, true_pos_dic = {}, {}
    
    growth_rates = single_gene_deletion(model)
    print "Optimal growth", model.optimize().f
    print model.summary()
    
    orig_growth_thres = 0.25*model.optimize().f
    print "Threshold growth", orig_growth_thres
    
    true_pos, true_neg, fal_pos, fal_neg = 0, 0, 0, 0
    
    # set grif essen threshold -- iSM810 paper uses 0.1 as "confident essential"
    grif_thres = 0.1
    print model.reactions.get_by_id("Kt3r").reaction
    for index, row in griffin_excel.iterrows():
        gene = str(row["Locus"])
        
        try:
            growth = growth_rates.loc[gene, "flux"]
            
            # True negative - predicts that it grows (not essential) and is correct.
            if float(row["p value"]) > grif_thres and growth > orig_growth_thres:
                    true_neg = true_neg + 1
                    true_neg_dic.update({gene: [growth, float(row["p value"])]})
                    
            # False negative - predicts that it grows (not essential) when it actually essential
            if float(row["p value"]) < grif_thres and growth > orig_growth_thres:
                    fal_neg = fal_neg + 1
                    fal_neg_dic.update({gene: [growth, float(row["p value"])]})
                    
            if float(row["p value"]) < grif_thres and growth < orig_growth_thres:
                    true_pos = true_pos + 1
                    true_pos_dic.update({gene: [growth, float(row["p value"])]})
                    
            if float(row["p value"]) > grif_thres and growth < orig_growth_thres:
                    fal_pos = fal_pos + 1
                    fal_pos_dic.update({gene: [growth, float(row["p value"])]})
                    
        except:
            pass            
            
    # ---Analyze and Print results ---
    print "TP - TN - FP - FN"
    print true_pos, true_neg, fal_pos, fal_neg
    
    # percent of correct predictions
    perc_correct = (true_pos+true_neg)/(true_pos+true_neg+fal_pos+float(fal_neg))
    print "percent correct: ", perc_correct
    
    # mcc calculation
    MCC_root = math.sqrt((true_pos + fal_pos)*(true_pos + fal_neg)*(true_neg + fal_pos)*(true_neg + fal_neg))
    MCC = (true_pos*true_neg - fal_pos*fal_neg)/MCC_root
    print "Matthew Correlation Coefficient", MCC
    if dic_return == "Yes":
        return fal_neg_dic, fal_pos_dic
    elif dic_return == "Yes both":
        return fal_neg_dic, fal_pos_dic, true_neg_dic, true_pos_dic

Change media



In [92]:

    
griffin_media = {
    "EX_h": 1000, # hydrogen
    "EX_h2o": 1000, # water
    "EX_o2": 20.0, # oxygen
    "EX_asn_L": 1.0, # asparagine
    "EX_nh4": 10.0, # ammonium
    "EX_cit":5.0, # citrate
    "EX_etoh": 5.0, # ethanol
    "EX_ca2": 1000.0, # calcium for CaCl2
    "EX_cl": 1000.0, # chloride for CaCl2
    "EX_mg2": 1000.0, # mg for MgSO4
    "EX_so4": 1000, # so4 for MgSO4
    "EX_fe3":5.0, # fe3 for ferric
    "EX_glyc":5.0, # glycerol
    "EX_pi":1.0, # phosphate
    "EX_chsterol":5.0, # cholesterol
}



In [93]:

    
model_iEK.medium = griffin_media

Calculate the Matthews correlation coefficient. It should be around 0.532.



In [94]:

    
FN_dic, FP_dic, TN_dic, TP_dic = griffin_essen(model_iEK, "Yes both")









    



Optimal growth 0.322258257973
IN FLUXES             OUT FLUXES         OBJECTIVES
--------------------  -----------------  --------------
o2_e        18.6      h2o_e   31.2       biomass  0.322
h_e         14.6      co2_e   13.6
cit_e        5        succ_e   7.8
etoh_e       5        hia_e    0.0315
glyc_e       5        4hba_e   0.000258
asn__L_e     1
pi_e         0.436
nh4_e        0.261
so4_e        0.0438
chsterol_e   0.0315
fe3_e        0.00229
None
Threshold growth 0.0805645644932
k_c <-- k_e
TP - TN - FP - FN
245 539 54 176
percent correct:  0.773175542406
Matthew Correlation Coefficient 0.530468805079



In [95]:

    
%matplotlib inline
matplotlib.style.use('seaborn-darkgrid')

mcc = (0.27, 0.51, 0.52, 0.52, 0.53)
N = 5
ind = np.arange(N)  # the x locations for the groups
width = .45
fig, ax = plt.subplots(figsize=(8, 5))
colors = ['yellowgreen','yellowgreen','yellowgreen','yellowgreen','lightskyblue']
rects1 = ax.bar(ind, mcc, width,align='center', color=colors)

# ---- Add some text for labels, title and axes ticks
ax.set_ylabel('Matthews Correlation Coefficient')
ax.set_xlabel('Metabolic Model')
ax.set_title('Gene Essentiality Prediction Comparisons')
ax.set_xticks(ind)
ax.set_xticklabels(('iNJ661', 'GSMN-TB1.1', 'sMtb', 'iSM810', 'iEK1020'))
# ----- Add values on top of barchart
rects = ax.patches
# Now make some labels
# labels = ["label%d" % i for i in xrange(len(rects))]
labels = mcc
for rect, label in zip(rects, labels):
    height = rect.get_height()
    ax.text(rect.get_x() + rect.get_width()/2, height+0.01, label, ha='center', 
            va='bottom',fontsize=13)

fig.savefig("../../Figures/gene_essentiality_comparisons.svg", bbox_inches='tight')

in Vitro vs in Vivo simulations

Two media conditions contrasting petri dish and host environment...
- inVitro_drugTesting
- inVivo_conditions

Definte in vitro and in vivo media conditions



In [96]:

    
inVitro_drugTesting = {
 'EX_asn_L': 1,
 'EX_ca2': 1000.0,
 'EX_cit': 1.0,
 'EX_cl': 1000.0,
 'EX_etoh': 1,
 'EX_fe3': 5.0,
 'EX_glyc': 1.0,
 'EX_h': 1000,
 'EX_mg2': 1000.0,
 'EX_mobd': 1000,
 'EX_nh4': 20.0,
 'EX_o2': 20.0,
 'EX_pi': 1000,
 'EX_so4': 1000,
}

inVivo_conditions = {
 'EX_ala_L': 1,
 'EX_asn_L': 5,
 'EX_asp_L': 5,
 'EX_urea': .1, 
 'EX_glu_L': 1,
 'EX_gln_L': 1, 
 'EX_ca2': 1000,
 'EX_cl': 1000,
 'EX_co2': 1000,
 'EX_cobalt2': 1000,
 'EX_cu2': 1000,
 'EX_fe3': 5,
 'EX_h': 1000,
 'EX_hdca': 10,
 'EX_k': 1000,
 'EX_mg2': 1000,
 'EX_mobd': 1000,
 'EX_na1': 1000,
 'EX_no3': 1.5,
 'EX_o2': .5, # - hypoxia
 'EX_pi': 1000,
 'EX_ppa': 20,
 'EX_so4': 1000,
 'EX_ocdca': 10,
 'EX_ttdca': 10,
 "EX_nodcoa": 10,
 'EX_chsterol': 10,
 "EX_octscoa": 10,

}

Compute FVA for in vitro model

Stated as 0.059 in the paper.



In [97]:

    
model_iEK.medium = inVitro_drugTesting
solution = model_iEK.optimize()
pfba_norm_solution = cobra.flux_analysis.pfba(model_iEK)

print "Optimal growth: ", pfba_norm_solution.fluxes["biomass"]
print "ICL FBA flux:", solution.fluxes["ICL"]
print "ICL pFBA flux:", pfba_norm_solution.fluxes["ICL"]
print "Total model flux:", pfba_norm_solution.fluxes.sum()

fva_norm = cobra.flux_analysis.flux_variability_analysis(model_iEK,
                                                        fraction_of_optimum=.95, 
                                                        solver=None)
fva_norm = pd.DataFrame.from_dict(fva_norm).T.round(5)
print "----"
print model_iEK.summary()
print "----"
print "Max ICL flux variability:", fva_norm["ICL"]["maximum"]









    



Optimal growth:  0.0593715098976
ICL FBA flux: 0.0
ICL pFBA flux: 0.0
Total model flux: 45.7341567095
----
IN FLUXES          OUT FLUXES        OBJECTIVES
-----------------  ----------------  ---------------
o2_e      5.07     h2co3_e  4.43     biomass  0.0564
h_e       4.56     succ_e   2
asn__L_e  1        h2o_e    1.87
cit_e     1        nh4_e    1.6
etoh_e    1        4hba_e   4.5e-05
glyc_e    1
pi_e      0.0764
so4_e     0.00767
fe3_e     0.0004
None
----
Max ICL flux variability: 0.1959

Compute FVA for in vivo model

The hypoxic growth condition is slower with a value of 0.039



In [98]:

    
model_iEK.medium = inVivo_conditions
solution = model_iEK.optimize()
pfba_hypox_solution = cobra.flux_analysis.pfba(model_iEK)

print "Optimal growth: ", pfba_hypox_solution.fluxes["biomass"]
print "ICL FBA flux:", solution.fluxes["ICL"]
print "ICL pFBA flux:", pfba_hypox_solution.fluxes["ICL"]
print "Total model flux:", pfba_hypox_solution.fluxes.sum()

fva_hypoxia = cobra.flux_analysis.flux_variability_analysis(model_iEK,
                                                            fraction_of_optimum=.95, 
                                                            solver=None)
fva_hypox = pd.DataFrame.from_dict(fva_hypoxia).T.round(5)
print "----"
print model_iEK.summary()
print "----"
print "Max ICL flux variability:", fva_hypox["ICL"]["maximum"]









    



Optimal growth:  0.0390441361382
ICL FBA flux: 0.029929360645
ICL pFBA flux: 0.029929360645
Total model flux: 17.3705449626
----
IN FLUXES             OUT FLUXES         OBJECTIVES
--------------------  -----------------  ---------------
h_e        12.7       co2_e     7.56     biomass  0.0371
asp__L_e    5         nh4_e     6.13
no3_e       1.5       ala__B_e  4.14
asn__L_e    1.06      succ_e    2.75
ala__L_e    1         val__L_e  0.451
glu__L_e    1         coa_e     0.081
o2_e        0.5       ac_e      0.00446
urea_e      0.1       4hba_e    3e-05
gln__L_e    0.0797
octscoa_e   0.0708
pi_e        0.048
nodcoa_e    0.0109
so4_e       0.00431
fe3_e       0.000263
None
----
Max ICL flux variability: 0.3024

Compare model simulations



In [99]:

    
reacts_of_interest = ['EX_succ', 'EX_ac', 'EX_nh4', 'ICL', '2MCS', 'ICDHy','KGD2', 'ENO', 'PEPCK_re', 'ATPS4r']

for react in reacts_of_interest:
    print react
    print "pFBA:", pfba_norm_solution.fluxes[react] ,pfba_hypox_solution.fluxes[react]
    print "FVA max:", fva_norm[react]["maximum"], fva_hypox[react]["maximum"]
    print "FVA min:", fva_norm[react]["minimum"], fva_hypox[react]["minimum"]
    print "-----"









    



EX_succ
pFBA: 1.9734071621 2.75967481006
FVA max: 2.07476 5.34676
FVA min: 1.39097 2.12661
-----
EX_ac
pFBA: 3.06703164376e-09 0.00469267567831
FVA max: 0.24255 0.19729
FVA min: 0.0 0.0
-----
EX_nh4
pFBA: 1.58347412574 5.7046432925
FVA max: 1.6043 8.55932
FVA min: 1.27904 4.91902
-----
ICL
pFBA: 0.0 0.029929360645
FVA max: 0.1959 0.3024
FVA min: 0.0 0.0
-----
2MCS
pFBA: 0.0 0.0
FVA max: 0.07923 0.19617
FVA min: 0.0 0.0
-----
ICDHy
pFBA: 1.98461721563 0.794742945001
FVA max: 2.02343 2.09008
FVA min: 1.3164 0.47001
-----
KGD2
pFBA: -1.95611556607 -1.85988277615
FVA max: -1.25949 -1.53189
FVA min: -1.99636 -3.15196
-----
ENO
pFBA: 0.651929080837 -0.196150178411
FVA max: 0.71368 -0.18634
FVA min: -0.48628 -0.35336
-----
PEPCK_re
pFBA: -0.0557797292758 0.207607133542
FVA max: 0.64592 4.91173
FVA min: -1.03903 0.03444
-----
ATPS4r
pFBA: 6.61484813403 4.59009968068
FVA max: 8.75653 5.07951
FVA min: 6.29049 3.13302
-----

Compare sampled spaces

in vitro conditions



In [111]:

    
model_iEK.medium = inVitro_drugTesting
pfba_solution = cobra.flux_analysis.pfba(model_iEK)
opt_value = pfba_solution["biomass"]

# -- constrain solution space to 0.95 of optimum --
with model_iEK:
    print model_iEK.reactions.get_by_id("biomass").lower_bound
    model_iEK.reactions.get_by_id("biomass").lower_bound = opt_value*.95
    print model_iEK.reactions.get_by_id("biomass").lower_bound
    %time inVitro_samples = sample(model_iEK, 10000, processes=4)
    print inVitro_samples.head()









    



0
0.0564029344167
CPU times: user 45.8 s, sys: 1min 6s, total: 1min 52s
Wall time: 1min 7s
   2AGPEAT160  2AGPEAT180  5DGLCNR   AACPS10   AACPS11    AACPS3      ABTA  \
0    0.000003    0.000006      0.0  0.003467  0.000465  0.001119  0.000006   
1    0.000004    0.000009      0.0  0.003474  0.000465  0.001163  0.000007   
2    0.000128    0.000006      0.0  0.003468  0.000534  0.001101  0.000005   
3    0.000128    0.000006      0.0  0.003467  0.000534  0.001490  0.000004   
4    0.000128    0.000006      0.0  0.003467  0.000534  0.002004  0.000004   

    ACACT1r    ACALDi  ACBIPGT     ...        ACYP_2   GTPDPDP   PPGPPDP  \
0  0.000181  1.016693      0.0     ...      0.000006  0.000002  0.000002   
1  0.000181  1.028069      0.0     ...      0.000007  0.000002  0.000002   
2  0.000181  1.023392      0.0     ...      0.000027  0.000002  0.000002   
3  0.000181  1.022842      0.0     ...      0.001156  0.000001  0.000001   
4  0.000181  1.030049      0.0     ...      0.001157  0.000001  0.000001   

      AKGDH  EX_nodcoa  NODCOAt  EX_octscoa  OCTSCOAt      ATPM      VPAMTr  
0  0.000024        0.0      0.0         0.0       0.0  3.152459 -781.283369  
1  0.000027        0.0      0.0         0.0       0.0  3.152189 -728.239791  
2  0.000128        0.0      0.0         0.0       0.0  3.154124 -663.340169  
3  0.000007        0.0      0.0         0.0       0.0  3.153643 -659.214218  
4  0.000222        0.0      0.0         0.0       0.0  3.153657 -679.151701  

[5 rows x 1211 columns]

in vivo conditions



In [112]:

    
model_iEK.medium = inVivo_conditions
pfba_solution = cobra.flux_analysis.pfba(model_iEK)
opt_value = pfba_solution["biomass"]

# -- constrain solution space to 0.95 of optimum --
with model_iEK:
    print model_iEK.reactions.get_by_id("biomass").lower_bound
    model_iEK.reactions.get_by_id("biomass").lower_bound = opt_value*.95
    print model_iEK.reactions.get_by_id("biomass").lower_bound
    %time inVivo_samples = sample(model_iEK, 10000, processes=4)
    print inVivo_samples.head()









    



0
0.0370919293313
CPU times: user 41 s, sys: 59.8 s, total: 1min 40s
Wall time: 1min 9s
   2AGPEAT160  2AGPEAT180  5DGLCNR  AACPS10   AACPS11    AACPS3      ABTA  \
0    0.000002    0.000005      0.0  0.00228  0.000300  0.000702  0.000005   
1    0.000002    0.000005      0.0  0.00228  0.000300  0.000701  0.000005   
2    0.000002    0.000005      0.0  0.00228  0.000301  0.000705  0.000005   
3    0.000002    0.000005      0.0  0.00228  0.000298  0.000726  0.000005   
4    0.000002    0.000005      0.0  0.00228  0.000302  0.000723  0.000011   

    ACACT1r    ACALDi       ACBIPGT     ...        ACYP_2   GTPDPDP   PPGPPDP  \
0  0.000119  0.000007  1.274845e-04     ...      0.000005  0.000002  0.000002   
1  0.000119  0.000007  1.272570e-04     ...      0.000005  0.000002  0.000002   
2  0.000119  0.000007  1.216423e-04     ...      0.000005  0.000002  0.000002   
3  0.000119  0.000007  9.483731e-08     ...      0.000035  0.000002  0.000002   
4  0.000119  0.000007  5.422707e-07     ...      0.000035  0.000002  0.000002   

      AKGDH  EX_nodcoa   NODCOAt  EX_octscoa  OCTSCOAt      ATPM      VPAMTr  
0  0.000026  -0.010953  0.010953   -0.070843  0.070843  3.150010 -371.888381  
1  0.000100  -0.010956  0.010956   -0.070760  0.070760  3.150009 -370.420790  
2  0.000274  -0.010933  0.010933   -0.070779  0.070779  3.150086 -369.996482  
3  0.000275  -0.010977  0.010977   -0.070802  0.070802  3.150063 -388.998057  
4  0.000139  -0.010976  0.010976   -0.070827  0.070827  3.150062 -391.929905  

[5 rows x 1211 columns]

Comparison Plots



In [113]:

    
%matplotlib inline
plt.style.use('seaborn-white')



In [114]:

    
for react in reacts_of_interest:
    inVitro_samples[react].hist(bins=80)
    inVivo_samples[react].hist(bins=80)
    plt.title(react)
    plt.ylabel("sample frequency")
    plt.xlabel("flux")
    plt.show()

Box plots



In [115]:

    
def plot_box_plot(samples1, samples2, name1, name2, rxn, title):
    box_1 = samples1[rxn].copy()
    box_1.name = name1
    box_2 = samples2[rxn].copy()
    box_2.name = name2
    box_df = pd.concat([box_1, box_2], axis=1)
    color = dict(boxes='k', whiskers='k', medians='r', caps='Gray')
    box_df.plot.box(sym='', title=title, color=color)
    plt.ylabel("Flux")
    # save_fig_name = rxn+"_sampling_boxplot.svg"
    # plt.savefig(save_fig_name)



In [116]:

    
for react in reacts_of_interest:
    plot_box_plot(inVitro_samples, inVivo_samples, 
               "in vitro media", "in vivo media", 
               react, 
               react)

Output to excel sheet.

Prepare iEK1020 reaction and metabolite sheets. The gene sheet with references was manually generated.



In [117]:

    
model_iEK = cobra.io.load_json_model("iEK1020.json")
print model_iEK.medium
react_dict = []
for react in model_iEK.reactions:
    react_dict.append({'Reaction ID': str(react.id), 
                       'Reaction Name': str(react.name), 
                       'Subsystem': str(react.subsystem), 
                       'Reaction Formula': str(react.reaction),
                       'Lower Bound': str(react.lower_bound),
                       'Upper Bound': str(react.upper_bound),
                       'Gene Reaction Rule': str(react.gene_reaction_rule)
                 })
react_df = pd.DataFrame(react_dict)
react_df = react_df[["Reaction ID", 'Reaction Name', 'Subsystem', 'Reaction Formula', 
                     'Lower Bound', 'Upper Bound', 'Gene Reaction Rule' ]]

metab_dict = []
for metab in model_iEK.metabolites:
    metab_dict.append({'Metabolite ID': str(metab.id), 
                       'Metabolite Name': str(metab.name), 
                       'Metabolite Formula': str(metab.formula), 
                 })
metab_df = pd.DataFrame(metab_dict)
metab_df = metab_df[["Metabolite ID", 'Metabolite Name', 'Metabolite Formula']]









    



{u'EX_k': 1000, u'EX_cu2': 1000.0, u'EX_fe2': 1000.0, u'EX_ca2': 1000.0, u'EX_glu_L': 1.0, u'EX_nh4': 20, u'EX_ac': 10, u'EX_co2': 1000, u'EX_fe3': 1000.0, u'EX_cl': 1000.0, u'EX_h': 1000, u'EX_btn': 1, u'EX_mobd': 1000, u'EX_na1': 1000, u'EX_pi': 1000, u'EX_h2co3': 1000, u'EX_cit': 1.0, u'EX_o2': 20, u'EX_so4': 1000, u'EX_pydxn': 1, u'EX_mg2': 1000.0}



In [118]:

    
model_iEK.id









    Out[118]:





u'iEK1020'

Prepare Griffin Essentiality sheet.



In [119]:

    
gene_essential_dic = []
for gene in model_iEK.genes:
    g = str(gene)
    if g in FN_dic.keys():
        ko_growth_rate = FN_dic[g][0]
        griff_p_val = FN_dic[g][1]
        error_type = "False Negative"
    if g in FP_dic.keys():
        ko_growth_rate = FP_dic[g][0]
        griff_p_val = FP_dic[g][1]
        error_type = "False Positive"
    if g in TN_dic.keys():
        ko_growth_rate = TN_dic[g][0]
        griff_p_val = TN_dic[g][1]
        error_type = "True Negative"
    if g in TP_dic.keys():
        ko_growth_rate = TP_dic[g][0]
        griff_p_val = TP_dic[g][1]
        error_type = "True Positive"
    gene_essential_dic.append({"Gene": g, 
                               "KO Growth Rate": ko_growth_rate, 
                               "Griffin Essentiality P-Value": griff_p_val,
                               "Error Type": error_type
                              })
    
gene_essen_df = pd.DataFrame(gene_essential_dic)
gene_essen_df = gene_essen_df[["Gene", "KO Growth Rate", "Griffin Essentiality P-Value", "Error Type"]]
gene_essen_df.head()









    Out[119]:







  
    
      
      Gene
      KO Growth Rate
      Griffin Essentiality P-Value
      Error Type
    
  
  
    
      0
      Rv1295
      -1.461829e-16
      0.00001
      True Positive
    
    
      1
      Rv2573
      1.071122e-14
      0.71349
      False Positive
    
    
      2
      Rv3815c
      -9.302549e-17
      0.97648
      False Positive
    
    
      3
      Rv3290c
      3.222583e-01
      0.96172
      True Negative
    
    
      4
      Rv2589
      3.222583e-01
      0.93677
      True Negative

Prepare media conditions as JSON files.



In [120]:

    
media_conditions = [model_iEK.medium, griffin_media, inVitro_drugTesting, inVivo_conditions]
media_df = pd.DataFrame(media_conditions)
media_df = media_df.transpose()
media_df.columns = ["Middlebrook m7H10 (Acetate C-source)", 
                    "Griffin Media (Gene Essentiality)", 
                    "Lowenstein-Jensen Media (Drug-Testing)", 
                    "Physiological Media (in vivo modeling)"]
media_df.head()









    Out[120]:







  
    
      
      Middlebrook m7H10 (Acetate C-source)
      Griffin Media (Gene Essentiality)
      Lowenstein-Jensen Media (Drug-Testing)
      Physiological Media (in vivo modeling)
    
  
  
    
      EX_ac
      10.0
      NaN
      NaN
      NaN
    
    
      EX_ala_L
      NaN
      NaN
      NaN
      1.0
    
    
      EX_asn_L
      NaN
      1.0
      1.0
      5.0
    
    
      EX_asp_L
      NaN
      NaN
      NaN
      5.0
    
    
      EX_btn
      1.0
      NaN
      NaN
      NaN

Write everything to excel file.



In [121]:

    
writer = pd.ExcelWriter('iEK1020_supplementary.xlsx')
react_df.to_excel(writer, sheet_name='Reactions', index=False)
metab_df.to_excel(writer, sheet_name='Metabolites', index=False)
gene_essen_df.to_excel(writer, sheet_name="Gene Essentiality", index=False)
media_df.to_excel(writer, sheet_name="Media Conditions", index=False)
writer.save()

	Locus	Gene Name	Totals TA's	Total Insertions	Unoccupied Sites	Unoccupied Sites.1	p value	(Sassetti et al 2003)
0	Rv0001	dnaA	32	1	4.365	31	0	essential
1	Rv0002	dnaN	31	0	4.319	31	0	no-data
2	Rv0003	recF	35	11	4.495	7	0.13067	non-essential
3	Rv0004	-	8	2	2.353	6	0.06177	non-essential
4	Rv0005	gyrB	43	3	4.794	40	0	essential

	Gene	KO Growth Rate	Griffin Essentiality P-Value	Error Type
0	Rv1295	-1.461829e-16	0.00001	True Positive
1	Rv2573	1.071122e-14	0.71349	False Positive
2	Rv3815c	-9.302549e-17	0.97648	False Positive
3	Rv3290c	3.222583e-01	0.96172	True Negative
4	Rv2589	3.222583e-01	0.93677	True Negative

	Middlebrook m7H10 (Acetate C-source)	Griffin Media (Gene Essentiality)	Lowenstein-Jensen Media (Drug-Testing)	Physiological Media (in vivo modeling)
EX_ac	10.0	NaN	NaN	NaN
EX_ala_L	NaN	NaN	NaN	1.0
EX_asn_L	NaN	1.0	1.0	5.0
EX_asp_L	NaN	NaN	NaN	5.0
EX_btn	1.0	NaN	NaN	NaN