Engineering SKOs (in silico)


In [1]:
import cobra
from cobra.io.mat import load_matlab_model
import pandas as pd
from cobra.io.sbml import create_cobra_model_from_sbml_file
from cobra.manipulation.delete import delete_model_genes, undelete_model_genes
from cobra.flux_analysis.variability import flux_variability_analysis

import os

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [2]:
# ROOT_DIR - root directory
ROOT_DIR = os.path.split(os.getcwd())[0]

#GEM Model file - can be a .xml or .mat file
EC_model =  ROOT_DIR + '/model_files/iJO1366.xml'
Engineered_model = ROOT_DIR + '/model_files/iJO1366_ENGINEERED.xml'

# Data dir
EC_data = ROOT_DIR + '/data/'

# MODEL_ORGANISM - the BRENDA organism string
EC_MODEL_ORGANISM = 'Escherichia coli'

Load cobra model and flux constraints from WT data (using phenotypic data)


In [3]:
model = create_cobra_model_from_sbml_file(Engineered_model)
flux_constraints = pd.read_csv(EC_data+'SKO_analysis_flux_bounds.csv')

In [4]:
for r in model.reactions:
    model.reactions.get_by_id(r.id).upper_bound = flux_constraints[flux_constraints.reaction == str(r)].upper.values[0]
    model.reactions.get_by_id(r.id).lower_bound = flux_constraints[flux_constraints.reaction == str(r)].lower.values[0]
model.reactions.Ec_biomass_iJO1366_core_53p95M.objective_coefficient = 1
model.reactions.Ec_biomass_iJO1366_core_53p95M.upper_bound = 1000
model.reactions.Ec_biomass_iJO1366_core_53p95M.lower_bound = 0  

for r in model.reactions:
    if model.reactions.get_by_id(r.id).lower_bound > 0:
        model.reactions.get_by_id(r.id).lower_bound = 0

check model uptake rates


In [7]:
for r in model.reactions:
    if r.startswith('EX_'):
       print r, r.lower_bound


EX_12ppd__R_e 0.0
EX_12ppd__S_e 0.0
EX_14glucan_e 0.0
EX_15dap_e 0.0
EX_23camp_e 0.0
EX_23ccmp_e 0.0
EX_23cgmp_e 0.0
EX_23cump_e 0.0
EX_23dappa_e 0.0
EX_26dap__M_e 0.0
EX_2ddglcn_e 0.0
EX_34dhpac_e 0.0
EX_3amp_e 0.0
EX_3cmp_e 0.0
EX_3gmp_e 0.0
EX_3hcinnm_e 0.0
EX_3hpp_e 0.0
EX_3hpppn_e 0.0
EX_3ump_e 0.0
EX_4abut_e 0.0
EX_4hoxpacd_e 0.0
EX_5dglcn_e 0.0
EX_5mtr_e 0.0
EX_LalaDglu_e 0.0
EX_LalaDgluMdap_e 0.0
EX_LalaDgluMdapDala_e 0.0
EX_LalaLglu_e 0.0
EX_ac_e 0
EX_acac_e 0.0
EX_acald_e 0.0
EX_acgal_e 0.0
EX_acgal1p_e 0.0
EX_acgam_e 0.0
EX_acgam1p_e 0.0
EX_acmana_e 0.0
EX_acmum_e 0.0
EX_acnam_e 0.0
EX_acolipa_e 0.0
EX_acser_e 0.0
EX_ade_e 0.0
EX_adn_e 0.0
EX_adocbl_e 0.0
EX_ag_e 0.0
EX_agm_e 0.0
EX_akg_e 0.0
EX_ala__B_e 0.0
EX_ala__D_e 0.0
EX_ala__L_e -0.0211937123186
EX_alaala_e 0.0
EX_all__D_e 0.0
EX_alltn_e 0.0
EX_amp_e 0.0
EX_anhgm_e 0.0
EX_arab__L_e 0.0
EX_arbt_e 0.0
EX_arbtn_e 0.0
EX_arbtn__fe3_e 0.0
EX_arg__L_e -0.752706166197
EX_ascb__L_e 0.0
EX_asn__L_e -0.374034352121
EX_aso3_e 0.0
EX_asp__L_e -0.0705781708342
EX_btn_e 0.0
EX_but_e 0.0
EX_butso3_e 0.0
EX_ca2_e -1000.0
EX_cbi_e 0.0
EX_cbl1_e 0.0
EX_cd2_e 0.0
EX_cgly_e 0.0
EX_chol_e 0.0
EX_chtbs_e 0.0
EX_cit_e 0
EX_cl_e -1000.0
EX_cm_e 0.0
EX_cmp_e 0.0
EX_co2_e 0.0
EX_cobalt2_e -1000.0
EX_colipa_e 0.0
EX_colipap_e 0.0
EX_cpgn_e 0.0
EX_cpgn__un_e 0.0
EX_crn_e 0.0
EX_crn__D_e 0.0
EX_csn_e 0.0
EX_cu_e 0.0
EX_cu2_e -1000.0
EX_cyan_e 0.0
EX_cynt_e 0.0
EX_cys__D_e 0.0
EX_cys__L_e -0.000288477903844
EX_cytd_e 0.0
EX_dad__2_e 0.0
EX_damp_e 0.0
EX_dca_e 0.0
EX_dcmp_e 0.0
EX_dcyt_e 0.0
EX_ddca_e 0.0
EX_dgmp_e 0.0
EX_dgsn_e 0.0
EX_dha_e 0.0
EX_dimp_e 0.0
EX_din_e 0.0
EX_dms_e 0.0
EX_dmso_e 0.0
EX_dopa_e 0.0
EX_doxrbcn_e 0.0
EX_dtmp_e 0.0
EX_dump_e 0.0
EX_duri_e 0.0
EX_eca4colipa_e 0.0
EX_enlipa_e 0.0
EX_enter_e 0.0
EX_etha_e 0.0
EX_ethso3_e 0.0
EX_etoh_e 0.0
EX_f6p_e 0.0
EX_fald_e 0.0
EX_fe2_e -1000.0
EX_fe3_e -1000.0
EX_fe3dcit_e 0.0
EX_fe3dhbzs_e 0.0
EX_fe3hox_e 0.0
EX_fe3hox__un_e 0.0
EX_fecrm_e 0.0
EX_fecrm__un_e 0.0
EX_feenter_e 0.0
EX_feoxam_e 0.0
EX_feoxam__un_e 0.0
EX_for_e 0
EX_fru_e 0.0
EX_frulys_e 0.0
EX_fruur_e 0.0
EX_fuc__L_e 0.0
EX_fum_e 0.0
EX_fusa_e 0.0
EX_g1p_e 0.0
EX_g3pc_e 0.0
EX_g3pe_e 0.0
EX_g3pg_e 0.0
EX_g3pi_e 0.0
EX_g3ps_e 0.0
EX_g6p_e 0.0
EX_gal_e 0.0
EX_gal__bD_e 0.0
EX_gal1p_e 0.0
EX_galct__D_e 0.0
EX_galctn__D_e 0.0
EX_galctn__L_e 0.0
EX_galt_e 0.0
EX_galur_e 0.0
EX_gam_e 0.0
EX_gam6p_e 0.0
EX_gbbtn_e 0.0
EX_gdp_e 0.0
EX_glc_e -4.64586890688
EX_glcn_e 0.0
EX_glcr_e 0.0
EX_glcur_e 0.0
EX_glcur1p_e 0.0
EX_gln__L_e -0.119488999539
EX_glu__L_e -0.235547629816
EX_gly_e -0.0893814033054
EX_glyald_e 0.0
EX_glyb_e 0.0
EX_glyc_e 0.0
EX_glyc__R_e 0.0
EX_glyc2p_e 0.0
EX_glyc3p_e 0.0
EX_glyclt_e 0.0
EX_gmp_e 0.0
EX_gsn_e 0.0
EX_gthox_e 0.0
EX_gthrd_e 0.0
EX_gtp_e 0.0
EX_gua_e 0.0
EX_h_e -1000.0
EX_h2_e 0.0
EX_h2o_e -1000.0
EX_h2o2_e 0.0
EX_h2s_e 0.0
EX_hacolipa_e 0.0
EX_halipa_e 0.0
EX_hdca_e 0.0
EX_hdcea_e 0.0
EX_hg2_e 0.0
EX_his__L_e -0.0456700361648
EX_hom__L_e 0.0
EX_hxa_e 0.0
EX_hxan_e 0.0
EX_idon__L_e 0.0
EX_ile__L_e -0.147153247666
EX_imp_e 0.0
EX_indole_e 0.0
EX_inost_e 0.0
EX_ins_e 0.0
EX_isetac_e 0.0
EX_k_e -1000.0
EX_kdo2lipid4_e 0.0
EX_lac__D_e 0
EX_lac__L_e 0.0
EX_lcts_e 0.0
EX_leu__L_e -0.0875119631295
EX_lipa_e 0.0
EX_lipa_cold_e 0.0
EX_lipoate_e 0.0
EX_lys__L_e 0.0
EX_lyx__L_e 0.0
EX_mal__D_e 0.0
EX_mal__L_e -0.0104201489675
EX_malt_e 0.0
EX_malthx_e 0.0
EX_maltpt_e 0.0
EX_malttr_e 0.0
EX_maltttr_e 0.0
EX_man_e 0.0
EX_man6p_e 0.0
EX_manglyc_e 0.0
EX_melib_e 0.0
EX_meoh_e 0.0
EX_met__D_e 0.0
EX_met__L_e -0.052760399503
EX_metsox__R__L_e 0.0
EX_metsox__S__L_e 0.0
EX_mg2_e -1000.0
EX_mincyc_e 0.0
EX_minohp_e 0.0
EX_mmet_e 0.0
EX_mn2_e -1000.0
EX_mnl_e 0.0
EX_mobd_e -1000.0
EX_mso3_e 0.0
EX_n2o_e 0.0
EX_na1_e -1000.0
EX_nac_e 0.0
EX_nh4_e -1000.0
EX_ni2_e -1000.0
EX_nmn_e 0.0
EX_no_e 0.0
EX_no2_e 0.0
EX_no3_e 0.0
EX_novbcn_e 0.0
EX_o16a4colipa_e 0.0
EX_o2_e -1000.0
EX_o2s_e 0.0
EX_ocdca_e 0.0
EX_ocdcea_e 0.0
EX_octa_e 0.0
EX_orn_e 0.0
EX_orot_e 0.0
EX_pacald_e 0.0
EX_peamn_e 0.0
EX_phe__L_e -0.0947976437259
EX_pheme_e 0.0
EX_pi_e -1000.0
EX_pnto__R_e 0.0
EX_ppa_e 0.0
EX_ppal_e 0.0
EX_pppn_e 0.0
EX_ppt_e 0.0
EX_pro__L_e -0.0821758238848
EX_progly_e 0.0
EX_psclys_e 0.0
EX_pser__L_e 0.0
EX_ptrc_e 0.0
EX_pydam_e 0.0
EX_pydx_e 0.0
EX_pydxn_e 0.0
EX_pyr_e -0.668904889476
EX_quin_e 0.0
EX_r5p_e 0.0
EX_rfamp_e 0.0
EX_rib__D_e 0.0
EX_rmn_e 0.0
EX_sbt__D_e 0.0
EX_sel_e -1000.0
EX_ser__D_e 0.0
EX_ser__L_e -1.50541233239
EX_skm_e 0.0
EX_slnt_e -1000.0
EX_so2_e 0.0
EX_so3_e 0.0
EX_so4_e -1000.0
EX_spmd_e 0.0
EX_succ_e 0
EX_sucr_e 0.0
EX_sulfac_e 0.0
EX_tartr__D_e 0.0
EX_tartr__L_e 0.0
EX_taur_e 0.0
EX_tcynt_e 0.0
EX_thm_e 0.0
EX_thr__L_e -0.119237910745
EX_thrp_e 0.0
EX_thym_e 0.0
EX_thymd_e 0.0
EX_tma_e 0.0
EX_tmao_e 0.0
EX_tre_e 0.0
EX_trp__L_e -0.0164850996344
EX_tsul_e 0.0
EX_ttdca_e 0.0
EX_ttdcea_e 0.0
EX_ttrcyc_e 0.0
EX_tungs_e -1000.0
EX_tym_e 0.0
EX_tyr__L_e -0.0316819195923
EX_tyrp_e 0.0
EX_uacgam_e 0.0
EX_udpacgal_e 0.0
EX_udpg_e 0.0
EX_udpgal_e 0.0
EX_udpglcur_e 0.0
EX_ump_e 0.0
EX_ura_e 0.0
EX_urea_e 0.0
EX_uri_e 0.0
EX_val__L_e -0.0648393472129
EX_xan_e 0.0
EX_xmp_e 0.0
EX_xtsn_e 0.0
EX_xyl__D_e 0.0
EX_xylu__L_e 0.0
EX_zn2_e -1000.0
EX_grdp_e 0.0
EX_atp_e 0.0
EX_ipdp_e 0.0
EX_adp_e 0.0
EX_coa_e 0.0
EX_ip_e 0.0
EX_mev_e 0.0
EX_aacoa_e 0.0
EX_nadp_e 0.0
EX_bis_e 0.0
EX_2mecdp_e 0.0
EX_hmgcoa_e 0.0
EX_4c2me_e 0.0
EX_lim_e 0.0
EX_DMPP_e 0.0
EX_ipoh_e 0.0
EX_GGDP_e 0.0
EX_5dpmev_e 0.0
EX_frdp_e 0.0
EX_h2mb4p_e 0.0
EX_accoa_e 0.0
EX_5pmev_e 0.0
EX_nadph_e 0.0
EX_dxyl5p_e 0.0
EX_nad_e 0.0
EX_nadh_e 0.0
EX_acon__C_e 0
EX_2me4p_e 0.0
EX_icit_e 0.0

solve cobra model


In [8]:
model.optimize(solver='gurobi')


Out[8]:
<Solution 0.752657708038 at 0x7f8b97ea2ad0>

Flux variability analysis (on WT)


In [9]:
# Get wt FVA
flux_var_wt = flux_variability_analysis(model)
flux_var_wt = pd.DataFrame(flux_var_wt).transpose()
flux_var_wt.rename(columns={'minimum': 'WT_min', 'maximum':'WT_max'}, inplace=True)
wt_grs = []
model.optimize()
gr = model.solution.f
wt_grs.append({'gene':'WT','gr':gr})

In [10]:
wt_grs


Out[10]:
[{'gene': 'WT', 'gr': 0.7526577080384427}]

run SKOs and FVA


In [ ]:
name ='DH1_no_secretion'
min_flux_vars = flux_var_wt
max_flux_vars = flux_var_wt
for g in model.genes:
    delete_model_genes(model, g)
    model.optimize()
    gr = model.solution.f
    wt_grs.append({'gene':g.id,'gr':gr})
    if gr > 0.01:
        flux_var_ko = flux_variability_analysis(model)
        flux_var_ko = pd.DataFrame(flux_var_ko).transpose()
        min_flux_vars = pd.merge(min_flux_vars, pd.DataFrame(flux_var_ko.minimum), left_index=True, right_index=True)
        min_flux_vars.rename(columns={'minimum': g.id}, inplace=True)
        max_flux_vars = pd.merge(max_flux_vars, pd.DataFrame(flux_var_ko.maximum), left_index=True, right_index=True)
        max_flux_vars.rename(columns={'maximum': g.id}, inplace=True)
        #print g, model.solution.f
    undelete_model_genes(model)
min_flux_vars.to_csv(name+'_min_fva.csv')
max_flux_vars.to_csv(name+'_max_fva.csv')
pd.DataFrame(wt_grs).to_csv(name+'_grs.csv')

In [12]:
min_flux_vars[:10]


Out[12]:
WT_max WT_min b2215 b1377 b0241 b0929 b4034 b4033 b4035 b4032 b4036 b4213 b2835 b2836 b3553 b0446 b1134 b1009 b0347 b3580
12DGR120tipp -0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 ...
12DGR140tipp -0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 ...
12DGR141tipp 4.487077e-12 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 ...
12DGR160tipp 2.181721e-11 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 ...
12DGR161tipp 1.289635e-11 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 ...
12DGR180tipp 1.570299e-11 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 ...
12DGR181tipp 1.600853e-11 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 ...
12PPDRtex -0.000000e+00 -1.477929e-12 -1.477929e-12 -1.477929e-12 -1.477929e-12 -1.477929e-12 -4.206413e-12 -4.206413e-12 -4.206413e-12 -4.206413e-12 -9.094947e-13 -1.250555e-12 9.094947e-13 -2.273737e-12 -5.684342e-13 -1.477929e-12 -2.387424e-12 -9.438376e-13 -3.296918e-12 -1.250555e-12 ...
12PPDRtpp -0.000000e+00 -6.821210e-13 -6.821210e-13 -6.821210e-13 -6.821210e-13 -6.821210e-13 4.320100e-12 4.320100e-12 4.320100e-12 4.320100e-12 -1.477929e-12 -2.160050e-12 2.273737e-12 -2.614797e-12 -1.023182e-12 -6.821210e-13 -1.250555e-12 7.958079e-13 4.547474e-13 -3.865352e-12 ...
12PPDStex -0.000000e+00 -1.364242e-12 -1.364242e-12 -1.364242e-12 -1.364242e-12 -1.364242e-12 -1.136868e-13 -1.136868e-13 -1.136868e-13 -1.136868e-13 -1.023182e-12 -7.958079e-13 3.224384e-12 -3.296918e-12 -1.250555e-12 -1.364242e-12 -1.818989e-12 0.000000e+00 -1.818989e-12 -2.273737e-13 ...

10 rows × 1202 columns


In [17]:
DF_GR = pd.DataFrame(wt_grs)

In [14]:
m=model
tmp = []
for r in m.reactions:
    if m.reactions.get_by_id(r.id).reversibility == False:
        for mets in m.reactions.get_by_id(r.id).reactants:
            if 'nadph_c' in str(mets):
                tmp.append({'reaction':r.id, 'name':m.reactions.get_by_id(r.id).name, 'stoich':m.reactions.get_by_id(r.id).reaction}) 
DF_NADPH_consummers = pd.DataFrame(tmp)

nadph_producers = []
for r in m.reactions:
    if m.reactions.get_by_id(r.id).reversibility == False:
        for mets in m.reactions.get_by_id(r.id).products:
            if 'nadph_c' in str(mets):
                nadph_producers.append({'reaction':r.id, 'name':m.reactions.get_by_id(r.id).name, 'stoich':m.reactions.get_by_id(r.id).reaction}) 
    else:
        for mets in m.reactions.get_by_id(r.id).metabolites:
            if 'nadph_c' in str(mets):
                nadph_producers.append({'reaction':r.id, 'name':m.reactions.get_by_id(r.id).name, 'stoich':m.reactions.get_by_id(r.id).reaction})
                
DF_NADPH_producers = pd.DataFrame(nadph_producers)

In [166]:
read_to_struct=[]
for i in min_flux_vars.columns:
    read_to_struct.append({'gene':i, 'min_flux':np.sum(min_flux_vars[min_flux_vars.index.isin(DF_NADPH_producers.reaction.tolist())][i]),'max_flux':np.sum(max_flux_vars[max_flux_vars.index.isin(DF_NADPH_producers.reaction.tolist())][i])}) 
    
DF_flux_nadph_producers = pd.DataFrame(read_to_struct)
DF_flux_nadph_producers[0:3]


Out[166]:
gene max_flux min_flux
0 WT_max 5.463324 5.463324
1 WT_min 4.613313 4.613313
2 b2215 5.463324 4.613313

3 rows × 3 columns


In [167]:
read_to_struct=[]
for i in DF_flux_nadph_producers.index:
    rxn_list = []
    if DF_flux_nadph_producers.gene[i] == 'WT_max':
        read_to_struct.append({'diff_gr':np.nan,'gene':DF_flux_nadph_producers.gene[i], 'max_flux':5.463324,'rxn':np.nan})
    elif DF_flux_nadph_producers.max_flux[i]-5.463324 > 1 and DF_flux_nadph_producers.max_flux[i]-3.669478 < 20:
        for r in model.genes.get_by_id(str(DF_flux_nadph_producers.gene[i])).reactions:
            rxn_list.append(r)
        gr = float(DF_GR[DF_GR.gene == str(DF_flux_nadph_producers.gene[i])].gr.values[0])-0.752658   
        read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':DF_flux_nadph_producers.gene[i], 'max_flux':DF_flux_nadph_producers.max_flux[i]})
DF_max_nadph_prod = pd.DataFrame(read_to_struct)

In [168]:
read_to_struct=[]
for i in DF_flux_nadph_producers.index:
    rxn_list = []
    if DF_flux_nadph_producers.min_flux[i]-4.613313 > 0.1 and abs(DF_flux_nadph_producers.max_flux[i])-0.255485 < 20:
        if DF_flux_nadph_producers.gene[i] == 'WT_max':
            read_to_struct.append({'diff_gr':np.nan,'gene':DF_flux_nadph_producers.gene[i], 'min_flux':4.613313,'rxn':np.nan})
        elif DF_flux_nadph_producers.gene[i] == 'WT_min':
            pass
        else:    
            for r in model.genes.get_by_id(str(DF_flux_nadph_producers.gene[i])).reactions:
                rxn_list.append(r)
            # gr is the growth rate, here I take the difference from WT (0.752658)
            gr = float(DF_GR[DF_GR.gene == str(DF_flux_nadph_producers.gene[i])].gr.values[0])-0.752658
            read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':DF_flux_nadph_producers.gene[i], 'min_flux':DF_flux_nadph_producers.min_flux[i]})
DF_min_nadph_prod = pd.DataFrame(read_to_struct)

Max NADPH producing reations


In [169]:
DF_max_nadph_prod


Out[169]:
diff_gr gene max_flux rxn
0 NaN WT_max 5.463324 NaN
1 -0.015858 b0727 11.051913 [AKGDH]
2 -0.015858 b0726 11.051913 [AKGDH]
3 -0.015762 b2551 6.717538 [GHMT2r, THRA2i, ALATA_D2, ALATA_L2, THRAi, TH...
4 -0.053434 b1779 12.956576 [E4PD, GAPD]
5 -0.053434 b2779 16.716725 [ENO]
6 -0.013992 b0529 6.576255 [MTHFC, MTHFD]
7 -0.053434 b2926 12.956576 [PGK]
8 -0.013142 b0729 9.093797 [SUCOAS]
9 -0.013142 b0728 9.093797 [SUCOAS]
10 -0.016575 b3919 8.340062 [TPI]

11 rows × 4 columns

Min NADPH producing reactions


In [170]:
DF_min_nadph_prod


Out[170]:
diff_gr gene min_flux rxn
0 NaN WT_max 4.613313 NaN
1 -0.000000 b1539 4.971562 [LSERDHr, ATHRDHr, DSERDHr, MSAR]
2 -0.015516 b1136 4.782464 [ICDHyr]
3 -0.007662 b0197 4.849704 [METabcpp, METDabcpp]
4 -0.007662 b0198 4.849704 [METabcpp, METDabcpp]
5 -0.007662 b0199 4.849704 [METabcpp, METDabcpp]
6 -0.000082 b4238 4.772280 [RNTR4c2, RNTR2c2, RNTR1c2, RNTR3c2]
7 -0.000000 b2366 4.971562 [SERD_D]
8 -0.000000 b0888 4.746827 [TRDR]

9 rows × 4 columns


In [162]:
read_to_struct=[]
for i in min_flux_vars.columns:
    read_to_struct.append({'gene':i, 'min_flux':np.sum(min_flux_vars[min_flux_vars.index.isin(DF_NADPH_consummers.reaction.tolist())][i]),'max_flux':np.sum(max_flux_vars[max_flux_vars.index.isin(DF_NADPH_consummers.reaction.tolist())][i])}) 
    
DF_flux_nadph_consumers = pd.DataFrame(read_to_struct)
DF_flux_nadph_consumers[0:3]


Out[162]:
gene max_flux min_flux
0 WT_max 4.231534 4.231534
1 WT_min 4.098020 4.098020
2 b2215 4.231534 4.098020

3 rows × 3 columns


In [160]:
read_to_struct=[]
for i in DF_flux_nadph_consumers.index:
    rxn_list = []
    if DF_flux_nadph_consumers.gene[i] == 'WT_max':
        read_to_struct.append({'diff_gr':np.nan,'gene':DF_flux_nadph_consumers.gene[i], 'max_flux':4.231534,'rxn':np.nan})
    elif DF_flux_nadph_consumers.gene[i] == 'WT_min':
        pass
    elif (DF_flux_nadph_consumers.max_flux[i]-4.231534) < 0 and (DF_flux_nadph_consumers.max_flux[i]-1.456566) > -20:
        for r in model.genes.get_by_id(str(DF_flux_nadph_consumers.gene[i])).reactions:
            rxn_list.append(r)
        gr = float(DF_GR[DF_GR.gene == str(DF_flux_nadph_consumers.gene[i])].gr.values[0])-0.752658
        read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':DF_flux_nadph_consumers.gene[i], 'max_flux':(DF_flux_nadph_consumers.max_flux_thru_nadph_consumers[i]-1.456566)})
DF_max_nadph_con = pd.DataFrame(read_to_struct)

In [161]:
DF_max_nadph_con


Out[161]:
diff_gr gene max_flux rxn
0 NaN WT_max 4.231534 NaN
1 -0.004718 b2416 2.726409 [DHAPT, GLCptspp, FRUpts2pp, MALTptspp, MNLpts...
2 -0.004718 b2415 2.726409 [SUCptspp, GLCptspp, FRUpts2pp, MALTptspp, MNL...
3 -0.000539 b1748 2.769426 [ACOTA, SOTA]
4 -0.002293 b1605 2.751366 [ARGORNt7pp]
5 -0.000553 b1453 2.769283 [ASNt2rpp]
6 -0.000011 b3528 2.774859 [ASPt2_2pp, MALDt2_2pp, OROTt2_2pp, SUCCt2_2pp...
7 -0.000539 b1747 2.769426 [AST]
8 -0.000001 b3816 2.774958 [COBALT2tpp, NI2tpp, MG2tpp]
9 -0.000032 b2436 2.774646 [CPPPGO]
10 -0.001634 b0910 2.758152 [CYTK2, UMPK, CYTK1]
11 -0.000009 b4209 2.774876 [FESR]
12 -0.000342 b0243 2.687324 [G5SD]
13 -0.001236 b1852 2.764043 [G6PDH2r]
14 -0.001364 b2500 2.760927 [GARFT]
15 -0.000122 b0810 2.773719 [GLNabcpp]
16 -0.000122 b0811 2.773719 [GLNabcpp]
17 -0.000122 b0809 2.773719 [GLNabcpp]
18 -0.000342 b0242 2.687324 [GLU5K]
19 -0.000076 b2903 2.774185 [GLYCL]
20 -0.000076 b2905 2.774185 [GLYCL]
21 -0.000076 b2904 2.774185 [GLYCL]
22 -0.000012 b4467 2.774853 [GLYCTO2, GLYCTO3, GLYCTO4]
23 -0.000012 b2979 2.774853 [GLYCTO2, GLYCTO3, GLYCTO4]
24 -0.000012 b4468 2.774853 [GLYCTO2, GLYCTO3, GLYCTO4]
25 -0.001236 b2029 2.764043 [GND]
26 -0.000139 b0112 2.773536 [TRPt2rpp, HISt2rpp, PHEt2rpp, TYRt2rpp]
27 -0.000002 b2529 2.774951 [I2FE2ST, I2FE2SS, I2FE2SR, I2FE2SS2, I4FE4SR,...
28 -0.000002 b2528 2.774951 [I2FE2ST, I4FE4ST]
29 -0.015516 b1136 2.615274 [ICDHyr]
30 -0.000715 b0401 2.767610 [LEUt2rpp, ILEt2rpp, VALt2rpp]
31 -0.000745 b2508 2.767307 [IMPD]
32 -0.015374 b3236 2.616729 [MDH]
33 -0.000000 b3835 2.774967 [OPHHX]
34 -0.012890 b0115 2.644065 [PDH]
35 -0.012890 b0114 2.644065 [PDH]
36 -0.004022 b4025 2.733575 [PGI]
37 -0.001236 b0767 2.764043 [PGL]
38 -0.000267 b3956 2.772225 [PPC]
39 -0.007245 b2501 2.700402 [PPKr, PPK2r]
40 -0.002293 b0692 2.751366 [PTRCORNt7pp, PTRCt2pp]
41 -0.000082 b4238 2.774132 [RNTR4c2, RNTR2c2, RNTR1c2, RNTR3c2]
42 -0.000539 b1745 2.769426 [SADH]
43 -0.000539 b1744 2.769426 [SGDS]
44 -0.000539 b1746 2.769426 [SGSAD]
45 -0.000417 b1206 2.770683 [SO4t2pp]
46 -0.000000 b0888 2.641454 [TRDR]

47 rows × 4 columns


In [158]:
read_to_struct=[]
for i in DF_flux_nadph_consumers.index:
    rxn_list = []
    if DF_flux_nadph_consumers.gene[i] == 'WT_max':
        read_to_struct.append({'diff_gr':np.nan,'gene':DF_flux_nadph_consumers.gene[i], 'min_flux':1.456566,'rxn':np.nan})
    elif DF_flux_nadph_consumers.gene[i] == 'WT_min':
        pass
    elif (DF_flux_nadph_consumers.min_flux_thru_nadph_consumers[i]-1.456566) < -0.1 and (DF_flux_nadph_consumers.min_flux_thru_nadph_consumers[i]-1.456566) > -20:
        for r in model.genes.get_by_id(str(DF_flux_nadph_consumers.gene[i])).reactions:
            rxn_list.append(r)
        gr = float(DF_GR[DF_GR.gene == str(DF_flux_nadph_consumers.gene[i])].gr.values[0])-0.457437   
        read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':DF_flux_nadph_consumers.gene[i], 'min_flux':(DF_flux_nadph_consumers.min_flux_thru_nadph_consumers[i]-1.456566)})
DF_min_nadph_con = pd.DataFrame(read_to_struct)

Min NADPH consuming reactions


In [159]:
DF_min_nadph_con


Out[159]:
diff_gr gene min_flux rxn
0 NaN WT_max 1.456566 NaN
1 -0.167426 b1263 -0.612613 [ANPRT, ANS]
2 -0.167426 b1264 -0.612613 [ANS]
3 -0.215671 b0928 -0.753058 [TYRTA, PHETA1, ASPTA]
4 -0.165115 b3737 -0.605886 [ATPS4rpp]
5 -0.165115 b3731 -0.605886 [ATPS4rpp]
6 -0.165115 b3732 -0.605886 [ATPS4rpp]
7 -0.165115 b3733 -0.605886 [ATPS4rpp]
8 -0.165115 b3738 -0.605886 [ATPS4rpp]
9 -0.165115 b3735 -0.605886 [ATPS4rpp]
10 -0.165115 b3736 -0.605886 [ATPS4rpp]
11 -0.165115 b3734 -0.605886 [ATPS4rpp]
12 -0.228056 b2600 -0.789110 [CHORM, PPND]
13 -0.304419 b3771 -1.011410 [DHAD2, DHAD1]
14 -0.304419 b3774 -1.011410 [KARA2, DPR, KARA1]
15 -0.114639 b4013 -0.458946 [HSST]
16 -0.167426 b1262 -0.612613 [PRAIi, IGPS]
17 -0.263195 b0073 -0.891403 [IPMD]
18 -0.263195 b0072 -0.891403 [IPPMIa, IPPMIb]
19 -0.263195 b0071 -0.891403 [IPPMIa, IPPMIb]
20 -0.263195 b0074 -0.891403 [IPPS]
21 -0.085693 b0386 -0.370229 [P5CR]
22 -0.114639 b3939 -0.458946 [SHSL1]
23 -0.167426 b1260 -0.612613 [TRPS2, TRPS1, TRPS3]
24 -0.167426 b1261 -0.612613 [TRPS2, TRPS1, TRPS3]

25 rows × 4 columns

compute total flux through PPP


In [60]:
PPP_rxns = []
for r in m.reactions:
    if m.reactions.get_by_id(r.id).subsystem == 'Pentose Phosphate Pathway':
        PPP_rxns.append(r.id)

In [157]:
read_to_struct=[]
for i in min_flux_vars.columns:
    read_to_struct.append({'gene':i, 'min_flux':np.sum(min_flux_vars[min_flux_vars.index.isin(PPP_rxns)][i]),'max_flux':np.sum(max_flux_vars[max_flux_vars.index.isin(PPP_rxns)][i])}) 
    
DF_flux_PPP = pd.DataFrame(read_to_struct)
DF_flux_PPP[0:3]


Out[157]:
gene max_flux min_flux
0 WT_max 9.284744 9.284744
1 WT_min -2.202983 -2.202983
2 b2215 9.284744 -2.202983

3 rows × 3 columns


In [156]:
read_to_struct=[]
for i in DF_flux_PPP.index:
    rxn_list = []
    if DF_flux_PPP.min_flux_ppp[i]+2.202983 > 0.1 and DF_flux_PPP.min_flux_ppp[i]+0.255485 < 20:
        if DF_flux_nadph_producers.gene[i] == 'WT_max':
            read_to_struct.append({'diff_gr':np.nan,'gene':DF_flux_PPP.gene[i], 'min_flux':-2.202983,'rxn':np.nan})
        elif DF_flux_PPP.gene[i] == 'WT_min':
            pass
        else:    
            for r in model.genes.get_by_id(str(DF_flux_PPP.gene[i])).reactions:
                rxn_list.append(r)
            gr = float(DF_GR[DF_GR.gene == str(DF_flux_PPP.gene[i])].gr.values[0])-0.752658
            read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':DF_flux_PPP.gene[i], 'min_flux':DF_flux_PPP.min_flux_ppp[i]})
DF_min_ppp = pd.DataFrame(read_to_struct)

SKOs that minimize flux through PPP


In [171]:
DF_min_ppp


Out[171]:
diff_gr gene min_flux rxn
0 NaN WT_max -2.202983 NaN
1 -0.015762 b2551 2.535495 [GHMT2r, THRA2i, ALATA_D2, ALATA_L2, THRAi, TH...
2 -0.053434 b1779 15.007440 [E4PD, GAPD]
3 -0.053434 b2779 15.007440 [ENO]
4 -0.000000 b2925 1.626259 [FBA3, FBA]
5 -0.007662 b0197 -1.520856 [METabcpp, METDabcpp]
6 -0.007662 b0198 -1.520856 [METabcpp, METDabcpp]
7 -0.007662 b0199 -1.520856 [METabcpp, METDabcpp]
8 -0.013992 b0529 2.002228 [MTHFC, MTHFD]
9 -0.000000 b3916 1.626259 [PFK, PFK_2, PFK_3]
10 -0.053434 b2926 15.007440 [PGK]
11 -0.000082 b1378 -1.984933 [POR5]
12 -0.000267 b3956 -1.487961 [PPC]
13 -0.000082 b4238 -1.984933 [RNTR4c2, RNTR2c2, RNTR1c2, RNTR3c2]
14 -0.016575 b3919 14.267409 [TPI]

15 rows × 4 columns


In [155]:
read_to_struct=[]
for i in DF_flux_PPP.index:
    rxn_list = []
    if DF_flux_PPP.max_flux_ppp[i]-9.284744 > 0.1 and DF_flux_PPP.max_flux_ppp[i]-9.284744 < 20:
        if DF_flux_nadph_producers.gene[i] == 'WT_max':
            read_to_struct.append({'diff_gr':np.nan,'gene':DF_flux_PPP.gene[i], 'max_flux':9.284744,'rxn':np.nan})
        elif DF_flux_PPP.gene[i] == 'WT_min':
            pass
        else:    
            for r in model.genes.get_by_id(str(DF_flux_PPP.gene[i])).reactions:
                rxn_list.append(r)
            gr = float(DF_GR[DF_GR.gene == str(DF_flux_PPP.gene[i])].gr.values[0])-0.752658
            read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':DF_flux_PPP.gene[i], 'max_flux':DF_flux_PPP.max_flux_ppp[i]})
DF_max_ppp = pd.DataFrame(read_to_struct)

SKOs that maximize flux through PPP


In [172]:
DF_max_ppp


Out[172]:
diff_gr gene max_flux rxn
0 -0.015762 b2551 13.054452 [GHMT2r, THRA2i, ALATA_D2, ALATA_L2, THRAi, TH...
1 -0.007662 b0197 9.843361 [METabcpp, METDabcpp]
2 -0.007662 b0198 9.843361 [METabcpp, METDabcpp]
3 -0.007662 b0199 9.843361 [METabcpp, METDabcpp]
4 -0.013992 b0529 12.630220 [MTHFC, MTHFD]
5 -0.000082 b1378 9.456310 [POR5]
6 -0.000267 b3956 9.847336 [PPC]
7 -0.000082 b4238 9.456310 [RNTR4c2, RNTR2c2, RNTR1c2, RNTR3c2]
8 -0.016575 b3919 14.579448 [TPI]

9 rows × 4 columns

Consolidate all SKOs & rank order


In [141]:
def maximize_FVA_min_flux(df):
    read_to_struct=[] 
    for i in df.index:
        rxn_list = []
    
        if df.gene[i] == 'WT_min':
                read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'min_flux':df[df.gene == 'WT_min'].min_flux.values[0],'rxn':np.nan}) 
        if df.min_flux[i] > 0 and df[df.gene == 'WT_min'].min_flux.values[0] < 0:
        
            if df.min_flux[i]+df[df.gene == 'WT_min'].min_flux.values[0] > 0.5 and df.min_flux[i]+df[df.gene == 'WT_min'].min_flux.values[0] < 20:
                if df.gene[i] == 'WT_max':
                    read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'min_flux':df[df.gene == 'WT_max'].min_flux.values[0],'rxn':np.nan})
                elif df.gene[i] == 'WT_min':
                    pass
                else:    
                    for r in model.genes.get_by_id(str(df.gene[i])).reactions:
                        rxn_list.append(r)
                    gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
                    read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'min_flux':df.min_flux[i]})   
                 
        else:
            if df.min_flux[i]-df[df.gene == 'WT_min'].min_flux.values[0] > 0.5 and df.min_flux[i]-df[df.gene == 'WT_min'].min_flux.values[0] < 20:
                if df.gene[i] == 'WT_max':
                    read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'min_flux':df[df.gene == 'WT_max'].min_flux.values[0],'rxn':np.nan})
                elif df.gene[i] == 'WT_min':
                    pass
                else:    
                    for r in model.genes.get_by_id(str(df.gene[i])).reactions:
                        rxn_list.append(r)
                    gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
                    read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'min_flux':df.min_flux[i]}) 
                
    return pd.DataFrame(read_to_struct)

def maximize_FVA_max_flux(df):
    read_to_struct = []
    for i in df.index:
        rxn_list = []
        if df.gene[i] == 'WT_max':
                read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'max_flux':df[df.gene == 'WT_max'].min_flux.values[0],'rxn':np.nan})             
        if df.max_flux[i] > 0 and df[df.gene == 'WT_min'].max_flux.values[0] < 0:
        
            if df.max_flux[i]+df[df.gene == 'WT_max'].max_flux.values[0] > 0.5 and df.max_flux[i]+df[df.gene == 'WT_min'].max_flux.values[0] < 20:
                if df.gene[i] == 'WT_min':
                    pass
                elif df.gene[i] == 'WT_max':
                    pass
                else:    
                    for r in model.genes.get_by_id(str(df.gene[i])).reactions:
                        rxn_list.append(r)
                    gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
                    read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'max_flux':df.max_flux[i]})   
                
          
        else:
            if df.max_flux[i]-df[df.gene == 'WT_max'].min_flux.values[0] > 0.5 and df.min_flux[i]-df[df.gene == 'WT_max'].max_flux.values[0] < 20:
                if df.gene[i] == 'WT_min':
                    pass
                elif df.gene[i] == 'WT_max':
                    pass
                else:    
                    for r in model.genes.get_by_id(str(df.gene[i])).reactions:
                        rxn_list.append(r)
                    gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
                    read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'max_flux':df.max_flux[i]}) 
                
    return pd.DataFrame(read_to_struct)

def minimize_FVA_min_flux(df):
    read_to_struct=[] 
    for i in df.index:
        rxn_list = []
    
        if df.gene[i] == 'WT_min':
                read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'min_flux':df[df.gene == 'WT_min'].min_flux.values[0],'rxn':np.nan}) 
        if df.min_flux[i] > 0 and df[df.gene == 'WT_min'].min_flux.values[0] < 0:
        
            if df.min_flux[i]+df[df.gene == 'WT_min'].min_flux.values[0] < -0.5 and df.min_flux[i]+df[df.gene == 'WT_min'].min_flux.values[0] > -20:
                if df.gene[i] == 'WT_max':
                    read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'min_flux':df[df.gene == 'WT_max'].min_flux.values[0],'rxn':np.nan})
                elif df.gene[i] == 'WT_min':
                    pass
                else:    
                    for r in model.genes.get_by_id(str(df.gene[i])).reactions:
                        rxn_list.append(r)
                    gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
                    read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'min_flux':df.min_flux[i]})   
                 
        else:
            if df.min_flux[i]-df[df.gene == 'WT_min'].min_flux.values[0] < -0.5 and df.min_flux[i]-df[df.gene == 'WT_min'].min_flux.values[0] > -20:
                if df.gene[i] == 'WT_max':
                    read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'min_flux':df[df.gene == 'WT_max'].min_flux.values[0],'rxn':np.nan})
                elif df.gene[i] == 'WT_min':
                    pass
                else:    
                    for r in model.genes.get_by_id(str(df.gene[i])).reactions:
                        rxn_list.append(r)
                    gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
                    read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'min_flux':df.min_flux[i]}) 
                
    return pd.DataFrame(read_to_struct)

def minimize_FVA_max_flux(df):
    read_to_struct = []
    for i in df.index:
        rxn_list = []
        if df.gene[i] == 'WT_max':
                read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'max_flux':df[df.gene == 'WT_max'].min_flux.values[0],'rxn':np.nan})             
        if df.max_flux[i] > 0 and df[df.gene == 'WT_min'].max_flux.values[0] < 0:
        
            if df.max_flux[i]+df[df.gene == 'WT_max'].max_flux.values[0] < -0.5 and df.max_flux[i]+df[df.gene == 'WT_min'].max_flux.values[0] > -20:
                if df.gene[i] == 'WT_min':
                    pass
                elif df.gene[i] == 'WT_max':
                    pass
                else:    
                    for r in model.genes.get_by_id(str(df.gene[i])).reactions:
                        rxn_list.append(r)
                    gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
                    read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'max_flux':df.max_flux[i]})   
                
          
        else:
            if df.max_flux[i]-df[df.gene == 'WT_max'].min_flux.values[0] < -0.5 and df.min_flux[i]-df[df.gene == 'WT_max'].max_flux.values[0] > -20:
                if df.gene[i] == 'WT_min':
                    pass
                elif df.gene[i] == 'WT_max':
                    pass
                else:    
                    for r in model.genes.get_by_id(str(df.gene[i])).reactions:
                        rxn_list.append(r)
                    gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
                    read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'max_flux':df.max_flux[i]}) 
                
    return pd.DataFrame(read_to_struct)

In [200]:
list_DF = [DF_max_ppp, DF_min_ppp, DF_max_nadph_prod, DF_min_nadph_prod, DF_min_nadph_con, DF_max_nadph_con]
legend = ['max_PPP','min_PPP','max_NADPH_prod','min_NADPH_prod','min_NADPH_con','max_NADPH_con']

In [ ]:
# consolidate all SKOs that increase NADPH-related fitness into one dataframe
read_to_struct=[]

for i in range(0,len(list_DF)):
    tmp = list_DF[i]
    print i, legend[i]
    
    for j in tmp.index:
        if pd.notnull(tmp.diff_gr[j]) and tmp.diff_gr[j] > -0.1:
            if 'max' in legend[i]:
                read_to_struct.append({'fitness':legend[i],'diff_gr':tmp.diff_gr[j],'gene':tmp.gene[j],'max_flux':tmp.max_flux[j],'rxn':tmp.rxn[j]})
            
            else:
                read_to_struct.append({'fitness':legend[i],'diff_gr':tmp.diff_gr[j],'gene':tmp.gene[j],'min_flux':tmp.min_flux[j],'rxn':tmp.rxn[j]})
            
DF_KOs = pd.DataFrame(read_to_struct)

prints number of reaction "hits" for SKO candidate


In [213]:
df = DF_KOs.gene.value_counts()
for gene in df.index:
    if df[gene] > mean(DF_KOs.gene.value_counts()):
        print gene, df[gene], [str(i) for i in m.genes.get_by_id(str(gene)).reactions]


b0723 8 ['SUCDi']
b0724 8 ['SUCDi']
b0721 8 ['SUCDi']
b0722 8 ['SUCDi']
b0720 7 ['CS']
b0116 7 ['AKGDH', 'PDH', 'GLYCL']
b3919 6 ['TPI']
b2926 5 ['PGK']
b1779 5 ['E4PD', 'GAPD']
b2779 5 ['ENO']
b2551 4 ['GHMT2r', 'THRA2i', 'ALATA_D2', 'ALATA_L2', 'THRAi', 'THFAT']
b4238 4 ['RNTR4c2', 'RNTR2c2', 'RNTR1c2', 'RNTR3c2']
b0529 4 ['MTHFC', 'MTHFD']
b0198 3 ['METabcpp', 'METDabcpp']
b0197 3 ['METabcpp', 'METDabcpp']
b2925 3 ['FBA3', 'FBA']
b3916 3 ['PFK', 'PFK_2', 'PFK_3']
b0199 3 ['METabcpp', 'METDabcpp']
b3956 3 ['PPC']

lists all SKOs that maximize fitness (related to NADPH depletion) and compares SKO predicted growth rate to WT growth rate (diff_gr)


In [6]:
DF_KOs[DF_KOs.fitness.isin(['max_PPP','min_PPP','max_NADPH_prod','min_NADPH_prod','min_NADPH_con','max_NADPH_con'])]


Out[6]:
diff_gr fitness gene max_flux min_flux rxn
64 -0.015762 max_PPP b2551 13.054452 NaN [GHMT2r, THRA2i, ALATA_D2, ALATA_L2, THRAi, TH...
65 -0.007662 max_PPP b0197 9.843361 NaN [METabcpp, METDabcpp]
66 -0.007662 max_PPP b0198 9.843361 NaN [METabcpp, METDabcpp]
67 -0.007662 max_PPP b0199 9.843361 NaN [METabcpp, METDabcpp]
68 -0.013992 max_PPP b0529 12.630220 NaN [MTHFC, MTHFD]
69 -0.000082 max_PPP b1378 9.456310 NaN [POR5]
70 -0.000267 max_PPP b3956 9.847336 NaN [PPC]
71 -0.000082 max_PPP b4238 9.456310 NaN [RNTR4c2, RNTR2c2, RNTR1c2, RNTR3c2]
72 -0.016575 max_PPP b3919 14.579448 NaN [TPI]
73 -0.015762 min_PPP b2551 NaN 2.535495 [GHMT2r, THRA2i, ALATA_D2, ALATA_L2, THRAi, TH...
74 -0.053434 min_PPP b1779 NaN 15.007440 [E4PD, GAPD]
75 -0.053434 min_PPP b2779 NaN 15.007440 [ENO]
76 -0.000000 min_PPP b2925 NaN 1.626259 [FBA3, FBA]
77 -0.007662 min_PPP b0197 NaN -1.520856 [METabcpp, METDabcpp]
78 -0.007662 min_PPP b0198 NaN -1.520856 [METabcpp, METDabcpp]
79 -0.007662 min_PPP b0199 NaN -1.520856 [METabcpp, METDabcpp]
80 -0.013992 min_PPP b0529 NaN 2.002228 [MTHFC, MTHFD]
81 -0.000000 min_PPP b3916 NaN 1.626259 [PFK, PFK_2, PFK_3]
82 -0.053434 min_PPP b2926 NaN 15.007440 [PGK]
83 -0.000082 min_PPP b1378 NaN -1.984933 [POR5]
84 -0.000267 min_PPP b3956 NaN -1.487961 [PPC]
85 -0.000082 min_PPP b4238 NaN -1.984933 [RNTR4c2, RNTR2c2, RNTR1c2, RNTR3c2]
86 -0.016575 min_PPP b3919 NaN 14.267409 [TPI]
87 -0.015858 max_NADPH_prod b0727 11.051913 NaN [AKGDH]
88 -0.015858 max_NADPH_prod b0726 11.051913 NaN [AKGDH]
89 -0.015762 max_NADPH_prod b2551 6.717538 NaN [GHMT2r, THRA2i, ALATA_D2, ALATA_L2, THRAi, TH...
90 -0.053434 max_NADPH_prod b1779 12.956576 NaN [E4PD, GAPD]
91 -0.053434 max_NADPH_prod b2779 16.716725 NaN [ENO]
92 -0.013992 max_NADPH_prod b0529 6.576255 NaN [MTHFC, MTHFD]
93 -0.053434 max_NADPH_prod b2926 12.956576 NaN [PGK]
94 -0.013142 max_NADPH_prod b0729 9.093797 NaN [SUCOAS]
95 -0.013142 max_NADPH_prod b0728 9.093797 NaN [SUCOAS]
96 -0.016575 max_NADPH_prod b3919 8.340062 NaN [TPI]
97 -0.000000 min_NADPH_prod b1539 NaN 4.971562 [LSERDHr, ATHRDHr, DSERDHr, MSAR]
98 -0.015516 min_NADPH_prod b1136 NaN 4.782464 [ICDHyr]
99 -0.007662 min_NADPH_prod b0197 NaN 4.849704 [METabcpp, METDabcpp]
100 -0.007662 min_NADPH_prod b0198 NaN 4.849704 [METabcpp, METDabcpp]
101 -0.007662 min_NADPH_prod b0199 NaN 4.849704 [METabcpp, METDabcpp]
102 -0.000082 min_NADPH_prod b4238 NaN 4.772280 [RNTR4c2, RNTR2c2, RNTR1c2, RNTR3c2]
103 -0.000000 min_NADPH_prod b2366 NaN 4.971562 [SERD_D]
104 -0.000000 min_NADPH_prod b0888 NaN 4.746827 [TRDR]
105 -0.085693 min_NADPH_con b0386 NaN -0.370229 [P5CR]
106 -0.004718 max_NADPH_con b2416 2.726409 NaN [DHAPT, GLCptspp, FRUpts2pp, MALTptspp, MNLpts...
107 -0.004718 max_NADPH_con b2415 2.726409 NaN [SUCptspp, GLCptspp, FRUpts2pp, MALTptspp, MNL...
108 -0.000539 max_NADPH_con b1748 2.769426 NaN [ACOTA, SOTA]
109 -0.002293 max_NADPH_con b1605 2.751366 NaN [ARGORNt7pp]
110 -0.000553 max_NADPH_con b1453 2.769283 NaN [ASNt2rpp]
111 -0.000011 max_NADPH_con b3528 2.774859 NaN [ASPt2_2pp, MALDt2_2pp, OROTt2_2pp, SUCCt2_2pp...
112 -0.000539 max_NADPH_con b1747 2.769426 NaN [AST]
113 -0.000001 max_NADPH_con b3816 2.774958 NaN [COBALT2tpp, NI2tpp, MG2tpp]
114 -0.000032 max_NADPH_con b2436 2.774646 NaN [CPPPGO]
115 -0.001634 max_NADPH_con b0910 2.758152 NaN [CYTK2, UMPK, CYTK1]
116 -0.000009 max_NADPH_con b4209 2.774876 NaN [FESR]
117 -0.000342 max_NADPH_con b0243 2.687324 NaN [G5SD]
118 -0.001236 max_NADPH_con b1852 2.764043 NaN [G6PDH2r]
119 -0.001364 max_NADPH_con b2500 2.760927 NaN [GARFT]
120 -0.000122 max_NADPH_con b0810 2.773719 NaN [GLNabcpp]
121 -0.000122 max_NADPH_con b0811 2.773719 NaN [GLNabcpp]
122 -0.000122 max_NADPH_con b0809 2.773719 NaN [GLNabcpp]
123 -0.000342 max_NADPH_con b0242 2.687324 NaN [GLU5K]
124 -0.000076 max_NADPH_con b2903 2.774185 NaN [GLYCL]
125 -0.000076 max_NADPH_con b2905 2.774185 NaN [GLYCL]
126 -0.000076 max_NADPH_con b2904 2.774185 NaN [GLYCL]
127 -0.000012 max_NADPH_con b4467 2.774853 NaN [GLYCTO2, GLYCTO3, GLYCTO4]
128 -0.000012 max_NADPH_con b2979 2.774853 NaN [GLYCTO2, GLYCTO3, GLYCTO4]
129 -0.000012 max_NADPH_con b4468 2.774853 NaN [GLYCTO2, GLYCTO3, GLYCTO4]
130 -0.001236 max_NADPH_con b2029 2.764043 NaN [GND]
131 -0.000139 max_NADPH_con b0112 2.773536 NaN [TRPt2rpp, HISt2rpp, PHEt2rpp, TYRt2rpp]
132 -0.000002 max_NADPH_con b2529 2.774951 NaN [I2FE2ST, I2FE2SS, I2FE2SR, I2FE2SS2, I4FE4SR,...
133 -0.000002 max_NADPH_con b2528 2.774951 NaN [I2FE2ST, I4FE4ST]
134 -0.015516 max_NADPH_con b1136 2.615274 NaN [ICDHyr]
135 -0.000715 max_NADPH_con b0401 2.767610 NaN [LEUt2rpp, ILEt2rpp, VALt2rpp]
136 -0.000745 max_NADPH_con b2508 2.767307 NaN [IMPD]
137 -0.015374 max_NADPH_con b3236 2.616729 NaN [MDH]
138 -0.000000 max_NADPH_con b3835 2.774967 NaN [OPHHX]
139 -0.012890 max_NADPH_con b0115 2.644065 NaN [PDH]
140 -0.012890 max_NADPH_con b0114 2.644065 NaN [PDH]
141 -0.004022 max_NADPH_con b4025 2.733575 NaN [PGI]
142 -0.001236 max_NADPH_con b0767 2.764043 NaN [PGL]
143 -0.000267 max_NADPH_con b3956 2.772225 NaN [PPC]
144 -0.007245 max_NADPH_con b2501 2.700402 NaN [PPKr, PPK2r]
145 -0.002293 max_NADPH_con b0692 2.751366 NaN [PTRCORNt7pp, PTRCt2pp]
146 -0.000082 max_NADPH_con b4238 2.774132 NaN [RNTR4c2, RNTR2c2, RNTR1c2, RNTR3c2]
147 -0.000539 max_NADPH_con b1745 2.769426 NaN [SADH]
148 -0.000539 max_NADPH_con b1744 2.769426 NaN [SGDS]
149 -0.000539 max_NADPH_con b1746 2.769426 NaN [SGSAD]
150 -0.000417 max_NADPH_con b1206 2.770683 NaN [SO4t2pp]
151 -0.000000 max_NADPH_con b0888 2.641454 NaN [TRDR]

88 rows × 6 columns

Prioritize model-predicted SKOs using keio KO growth data (in acetate and glucose conditions)


In [9]:
# load in keio colletion
DF_KO_invivo


Out[9]:
Acetate Fructose Glucose TOTAL
b1378 1.033333 1.086667 1.136667 3.256667
b2904 1.216667 1.040000 0.996667 3.253333
b4209 1.046667 1.076667 1.070000 3.193333
b1747 1.033333 1.050000 1.076667 3.160000
b1748 1.080000 1.026667 1.050000 3.156667
b2903 1.140000 1.020000 0.986667 3.146667
b0199 1.030000 1.063333 1.053333 3.146667
b0112 1.033333 1.060000 1.010000 3.103333
b4238 1.016667 1.050000 1.030000 3.096667
b2501 1.020000 1.013333 1.050000 3.083333
b1605 1.020000 1.043333 1.016667 3.080000
b4467 0.963333 1.080000 1.033333 3.076667
b0198 1.030000 1.013333 1.026667 3.070000
b0197 0.983333 1.083333 1.003333 3.070000
b1746 1.030000 1.010000 1.010000 3.050000
b0401 0.990000 1.000000 1.056667 3.046667
b0809 0.970000 1.073333 0.996667 3.040000
b0888 1.006667 1.036667 0.990000 3.033333
b4468 1.023333 0.986667 1.023333 3.033333
b0811 0.991667 1.011667 1.026667 3.030000
b1744 1.016667 0.998333 1.015000 3.030000
b1206 1.016667 0.946667 1.063333 3.026667
b2436 1.026667 0.990000 0.996667 3.013333
b1453 1.026667 0.993333 0.993333 3.013333
b0692 1.023333 1.006667 0.976667 3.006667
b3528 1.000000 1.000000 0.993333 2.993333
b2905 1.110000 0.930000 0.946667 2.986667
b2979 1.016667 0.960000 0.980000 2.956667
b0722 0.966667 1.030000 0.943333 2.940000
b1852 1.003333 0.953333 0.953333 2.910000
b2366 0.986667 0.933333 0.966667 2.886667
b1539 0.930000 0.943333 1.013333 2.886667
b1745 0.930000 0.930000 1.013333 2.873333
b2500 0.733333 1.033333 1.063333 2.830000
b2528 0.926667 0.890000 0.960000 2.776667
b0810 1.076667 0.783333 0.913333 2.773333
b0910 0.983333 0.823333 0.966667 2.773333
b3816 0.966667 0.773333 0.976667 2.716667
b0767 0.873333 0.883333 0.956667 2.713333
b2029 0.923333 0.933333 0.846667 2.703333
b4025 0.866667 1.003333 0.770000 2.640000
b0114 1.046667 0.736667 0.830000 2.613333
b2415 1.000000 0.990000 0.606667 2.596667
b3916 1.023333 1.073333 0.493333 2.590000
b2529 0.893333 0.806667 0.863333 2.563333
b0115 1.033333 0.580000 0.816667 2.430000
b3236 0.256667 0.943333 0.980000 2.180000
b0729 0.000000 1.020000 0.990000 2.010000
b0721 0.000000 0.930000 1.003333 1.933333
b0723 0.076667 0.866667 0.980000 1.923333
b0728 0.000000 0.916667 0.923333 1.840000
b2416 1.016667 0.373333 0.383333 1.773333
b0724 0.000000 0.826667 0.800000 1.626667
b3956 1.000000 0.073333 0.243333 1.316667
b0726 0.000000 0.403333 0.776667 1.180000
b0727 0.000000 0.163333 0.793333 0.956667
b2551 0.000000 0.000000 0.393333 0.393333
b0116 0.000000 0.000000 0.293333 0.293333
b0720 0.000000 0.033333 0.120000 0.153333
b0243 0.018333 0.000000 0.000000 0.018333
b0242 0.000000 0.000000 0.000000 0.000000
b0386 0.000000 0.000000 0.000000 0.000000
b1136 0.000000 0.000000 0.000000 0.000000
b2508 0.000000 0.000000 0.000000 0.000000
b3919 0.000000 0.000000 0.000000 0.000000

65 rows × 4 columns

make a dataframe of WT max and min flux info for comparison


In [7]:
read_to_struct= ({'min_NADPH_prod':4.613313,'max_NADPH_prod':5.463324,'min_NADPH_con':1.456566,'max_NADPH_con':4.231534, 'max_PPP':9.284744, 'min_PPP':-2.202983 })
WT_fluxes = pd.DataFrame(read_to_struct,index=[0])
WT_fluxes


Out[7]:
max_NADPH_con max_NADPH_prod max_PPP min_NADPH_con min_NADPH_prod min_PPP
0 4.231534 5.463324 9.284744 1.456566 4.613313 -2.202983

1 rows × 6 columns

reduce candidate SKOs based on growth conditions in glc and acetate


In [81]:
read_to_struct = []
for i in DF_KO_invivo.index:        
    read_to_struct.append({'gene':i, 'GR_ac':DF_KO_invivo.Acetate[i],'GR_glc':DF_KO_invivo.Glucose[i], 'fitness':DF_KOs[DF_KOs.gene == i].fitness.values.tolist(), 'rxn':[str(i) for i in DF_KOs[DF_KOs.gene == i].rxn.values[0]]})
df = pd.DataFrame(read_to_struct)
df = df[(df.GR_glc > 1)][df.GR_ac > .98]
df


Out[81]:
GR_ac GR_glc fitness gene rxn
0 1.033333 1.136667 [max_PPP, min_PPP] b1378 [POR5]
2 1.046667 1.070000 [max_NADPH_con] b4209 [FESR]
3 1.033333 1.076667 [max_NADPH_con] b1747 [AST]
4 1.080000 1.050000 [max_NADPH_con] b1748 [ACOTA, SOTA]
6 1.030000 1.053333 [max_PPP, min_PPP, min_NADPH_prod] b0199 [METabcpp, METDabcpp]
7 1.033333 1.010000 [max_NADPH_con] b0112 [TRPt2rpp, HISt2rpp, PHEt2rpp, TYRt2rpp]
8 1.016667 1.030000 [max_PPP, min_PPP, min_NADPH_prod, max_NADPH_con] b4238 [RNTR4c2, RNTR2c2, RNTR1c2, RNTR3c2]
9 1.020000 1.050000 [max_NADPH_con] b2501 [PPKr, PPK2r]
10 1.020000 1.016667 [max_NADPH_con] b1605 [ARGORNt7pp]
12 1.030000 1.026667 [max_PPP, min_PPP, min_NADPH_prod] b0198 [METabcpp, METDabcpp]
13 0.983333 1.003333 [max_PPP, min_PPP, min_NADPH_prod] b0197 [METabcpp, METDabcpp]
14 1.030000 1.010000 [max_NADPH_con] b1746 [SGSAD]
15 0.990000 1.056667 [max_NADPH_con] b0401 [LEUt2rpp, ILEt2rpp, VALt2rpp]
18 1.023333 1.023333 [max_NADPH_con] b4468 [GLYCTO2, GLYCTO3, GLYCTO4]
19 0.991667 1.026667 [max_NADPH_con] b0811 [GLNabcpp]
20 1.016667 1.015000 [max_NADPH_con] b1744 [SGDS]
21 1.016667 1.063333 [max_NADPH_con] b1206 [SO4t2pp]

17 rows × 5 columns


In [68]:
struct = []
for i in df.gene:
    tmp_df = DF_KOs[DF_KOs.gene == i]
    for j in tmp_df.index:
        if pd.notnull(tmp_df.max_flux[j]):
            max_f = tmp_df.max_flux[j] - WT_fluxes[str(tmp_df.fitness[j])]
            struct.append({'gene': i, 'fitness':tmp_df.fitness[j], 'diff_wt':max_f.values[0]})
        if pd.notnull(tmp_df.min_flux[j]):
            min_f = tmp_df.min_flux[j] - WT_fluxes[str(tmp_df.fitness[j])]
            struct.append({'gene':i, 'fitness':tmp_df.fitness[j], 'diff_wt':min_f.values[0]})
df_priority_KOs = pd.DataFrame(struct)

sort by fitness into three groups: (i) NADPH consumers; (ii) NADPH producers; (iii) flux through PPP


In [73]:
df_priority_KOs[df_priority_KOs.fitness == 'max_NADPH_con'].sort(columns='diff_wt')


Out[73]:
diff_wt fitness gene
13 -1.531132 max_NADPH_con b2501
14 -1.480168 max_NADPH_con b1605
22 -1.463924 max_NADPH_con b0401
3 -1.462108 max_NADPH_con b1747
4 -1.462108 max_NADPH_con b1748
21 -1.462108 max_NADPH_con b1746
25 -1.462108 max_NADPH_con b1744
26 -1.460851 max_NADPH_con b1206
8 -1.457998 max_NADPH_con b0112
24 -1.457815 max_NADPH_con b0811
12 -1.457402 max_NADPH_con b4238
23 -1.456681 max_NADPH_con b4468
2 -1.456658 max_NADPH_con b4209

13 rows × 3 columns


In [77]:
df_priority_KOs[df_priority_KOs.fitness == 'min_NADPH_prod'].sort(columns='diff_wt',ascending=False)


Out[77]:
diff_wt fitness gene
20 0.236391 min_NADPH_prod b0197
17 0.236391 min_NADPH_prod b0198
7 0.236391 min_NADPH_prod b0199
11 0.158967 min_NADPH_prod b4238

4 rows × 3 columns


In [79]:
df_priority_KOs[df_priority_KOs.fitness == 'min_PPP'].sort(columns='diff_wt',ascending=False)


Out[79]:
diff_wt fitness gene
19 0.682127 min_PPP b0197
16 0.682127 min_PPP b0198
6 0.682127 min_PPP b0199
10 0.218050 min_PPP b4238
1 0.218050 min_PPP b1378

5 rows × 3 columns


In [80]:
df_priority_KOs[df_priority_KOs.fitness == 'max_PPP'].sort(columns='diff_wt',ascending=False)


Out[80]:
diff_wt fitness gene
18 0.558617 max_PPP b0197
15 0.558617 max_PPP b0198
5 0.558617 max_PPP b0199
9 0.171566 max_PPP b4238
0 0.171566 max_PPP b1378

5 rows × 3 columns


In [93]:
gene_list = pd.Series([df_priority_KOs[df_priority_KOs.fitness == 'min_NADPH_prod'].sort(columns='diff_wt',ascending=False).gene.tolist(), df_priority_KOs[df_priority_KOs.fitness == 'min_PPP'].sort(columns='diff_wt',ascending=False).gene.tolist(),['b4209','b1747','b1748','b2501', 'b4468']])

Final Table of potential SKO candidates to test in vivo


In [98]:
leg = ['min_NADPH_prod','min_PPP','max_NADPH_con']
struct = []
for i in gene_list.transpose().index:
    struct.append({'genes':gene_list[i], 'fitness':leg[i]})
pd.DataFrame(struct)


Out[98]:
fitness genes
0 min_NADPH_prod [b0197, b0198, b0199, b4238]
1 min_PPP [b0197, b0198, b0199, b4238, b1378]
2 max_NADPH_con [b4209, b1747, b1748, b2501, b4468]

3 rows × 2 columns

Gene and uniprot information for SKO candidates


In [108]:
struct=[]
for i in gene_list.transpose().index:
    for j in gene_list[i]:
        struct.append({'gene':j, 'seq':DF_GEM_PRO[DF_GEM_PRO.m_gene == str(j)].u_seq.values[0], 'gene_name':DF_GEM_PRO[DF_GEM_PRO.m_gene == str(j)].u_gene_name.values[0], 'uniprot':DF_GEM_PRO[DF_GEM_PRO.m_gene == str(j)].u_uniprot_acc.values[0]})
pd.DataFrame(struct).drop_duplicates()


Out[108]:
gene gene_name seq uniprot
0 b0197 metQ MAFKFKTFAAVGALIGSLALVGCGQDEKDPNHIKVGVIVGAEQQVA... P28635
1 b0198 metI MSEPMMWLLVRGVWETLAMTFVSGFFGFVIGLPVGVLLYVTRPGQI... P31547
2 b0199 metN MIKLSNITKVFHQGTRTIQALNNVSLHVPAGQIYGVIGASGAGKST... P30750
3 b4238 nrdD MTPHVMKRDGCKVPFKSERIKEAILRAAKAAEVDDADYCATVAAVV... P28903
8 b1378 ydbK MITIDGNGAVASVAFRTSEVIAIYPITPSSTMAEQADAWAGNGLKN... P52647
9 b4209 ytfE MAYRDQPLGELALSIPRASALFRKYDMDYCCGGKQTLARAAARKEL... P69506
10 b1747 astA MMVIRPVERSDVSALMQLASKTGGGLTSLPANEATLSARIERAIKT... P0AE37
11 b1748 astC MSQPITRENFDEWMIPVYAPAPFIPVRGEGSRLWDQQGKEYIDFAG... P77581
12 b2501 ppk MGQEKLYIEKELSWLSFNERVLQEAADKSNPLIERMRFLGIYSNNL... P0A7B1
13 b4468 glcE MLRECDYSQALLEQVNQAISDKTPLVIQGSNSKAFLGRPVTGQTLD... P52073

10 rows × 4 columns