In [1]:
import cobra
from cobra.io.mat import load_matlab_model
import pandas as pd
from cobra.io.sbml import create_cobra_model_from_sbml_file
from cobra.manipulation.delete import delete_model_genes, undelete_model_genes
from cobra.flux_analysis.variability import flux_variability_analysis
import os
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
In [2]:
# ROOT_DIR - root directory
ROOT_DIR = os.path.split(os.getcwd())[0]
#GEM Model file - can be a .xml or .mat file
EC_model = ROOT_DIR + '/model_files/iJO1366.xml'
Engineered_model = ROOT_DIR + '/model_files/iJO1366_ENGINEERED.xml'
# Data dir
EC_data = ROOT_DIR + '/data/'
# MODEL_ORGANISM - the BRENDA organism string
EC_MODEL_ORGANISM = 'Escherichia coli'
In [3]:
model = create_cobra_model_from_sbml_file(Engineered_model)
flux_constraints = pd.read_csv(EC_data+'SKO_analysis_flux_bounds.csv')
In [4]:
for r in model.reactions:
model.reactions.get_by_id(r.id).upper_bound = flux_constraints[flux_constraints.reaction == str(r)].upper.values[0]
model.reactions.get_by_id(r.id).lower_bound = flux_constraints[flux_constraints.reaction == str(r)].lower.values[0]
model.reactions.Ec_biomass_iJO1366_core_53p95M.objective_coefficient = 1
model.reactions.Ec_biomass_iJO1366_core_53p95M.upper_bound = 1000
model.reactions.Ec_biomass_iJO1366_core_53p95M.lower_bound = 0
for r in model.reactions:
if model.reactions.get_by_id(r.id).lower_bound > 0:
model.reactions.get_by_id(r.id).lower_bound = 0
In [7]:
for r in model.reactions:
if r.startswith('EX_'):
print r, r.lower_bound
In [8]:
model.optimize(solver='gurobi')
Out[8]:
In [9]:
# Get wt FVA
flux_var_wt = flux_variability_analysis(model)
flux_var_wt = pd.DataFrame(flux_var_wt).transpose()
flux_var_wt.rename(columns={'minimum': 'WT_min', 'maximum':'WT_max'}, inplace=True)
wt_grs = []
model.optimize()
gr = model.solution.f
wt_grs.append({'gene':'WT','gr':gr})
In [10]:
wt_grs
Out[10]:
In [ ]:
name ='DH1_no_secretion'
min_flux_vars = flux_var_wt
max_flux_vars = flux_var_wt
for g in model.genes:
delete_model_genes(model, g)
model.optimize()
gr = model.solution.f
wt_grs.append({'gene':g.id,'gr':gr})
if gr > 0.01:
flux_var_ko = flux_variability_analysis(model)
flux_var_ko = pd.DataFrame(flux_var_ko).transpose()
min_flux_vars = pd.merge(min_flux_vars, pd.DataFrame(flux_var_ko.minimum), left_index=True, right_index=True)
min_flux_vars.rename(columns={'minimum': g.id}, inplace=True)
max_flux_vars = pd.merge(max_flux_vars, pd.DataFrame(flux_var_ko.maximum), left_index=True, right_index=True)
max_flux_vars.rename(columns={'maximum': g.id}, inplace=True)
#print g, model.solution.f
undelete_model_genes(model)
min_flux_vars.to_csv(name+'_min_fva.csv')
max_flux_vars.to_csv(name+'_max_fva.csv')
pd.DataFrame(wt_grs).to_csv(name+'_grs.csv')
In [12]:
min_flux_vars[:10]
Out[12]:
In [17]:
DF_GR = pd.DataFrame(wt_grs)
In [14]:
m=model
tmp = []
for r in m.reactions:
if m.reactions.get_by_id(r.id).reversibility == False:
for mets in m.reactions.get_by_id(r.id).reactants:
if 'nadph_c' in str(mets):
tmp.append({'reaction':r.id, 'name':m.reactions.get_by_id(r.id).name, 'stoich':m.reactions.get_by_id(r.id).reaction})
DF_NADPH_consummers = pd.DataFrame(tmp)
nadph_producers = []
for r in m.reactions:
if m.reactions.get_by_id(r.id).reversibility == False:
for mets in m.reactions.get_by_id(r.id).products:
if 'nadph_c' in str(mets):
nadph_producers.append({'reaction':r.id, 'name':m.reactions.get_by_id(r.id).name, 'stoich':m.reactions.get_by_id(r.id).reaction})
else:
for mets in m.reactions.get_by_id(r.id).metabolites:
if 'nadph_c' in str(mets):
nadph_producers.append({'reaction':r.id, 'name':m.reactions.get_by_id(r.id).name, 'stoich':m.reactions.get_by_id(r.id).reaction})
DF_NADPH_producers = pd.DataFrame(nadph_producers)
In [166]:
read_to_struct=[]
for i in min_flux_vars.columns:
read_to_struct.append({'gene':i, 'min_flux':np.sum(min_flux_vars[min_flux_vars.index.isin(DF_NADPH_producers.reaction.tolist())][i]),'max_flux':np.sum(max_flux_vars[max_flux_vars.index.isin(DF_NADPH_producers.reaction.tolist())][i])})
DF_flux_nadph_producers = pd.DataFrame(read_to_struct)
DF_flux_nadph_producers[0:3]
Out[166]:
In [167]:
read_to_struct=[]
for i in DF_flux_nadph_producers.index:
rxn_list = []
if DF_flux_nadph_producers.gene[i] == 'WT_max':
read_to_struct.append({'diff_gr':np.nan,'gene':DF_flux_nadph_producers.gene[i], 'max_flux':5.463324,'rxn':np.nan})
elif DF_flux_nadph_producers.max_flux[i]-5.463324 > 1 and DF_flux_nadph_producers.max_flux[i]-3.669478 < 20:
for r in model.genes.get_by_id(str(DF_flux_nadph_producers.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(DF_flux_nadph_producers.gene[i])].gr.values[0])-0.752658
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':DF_flux_nadph_producers.gene[i], 'max_flux':DF_flux_nadph_producers.max_flux[i]})
DF_max_nadph_prod = pd.DataFrame(read_to_struct)
In [168]:
read_to_struct=[]
for i in DF_flux_nadph_producers.index:
rxn_list = []
if DF_flux_nadph_producers.min_flux[i]-4.613313 > 0.1 and abs(DF_flux_nadph_producers.max_flux[i])-0.255485 < 20:
if DF_flux_nadph_producers.gene[i] == 'WT_max':
read_to_struct.append({'diff_gr':np.nan,'gene':DF_flux_nadph_producers.gene[i], 'min_flux':4.613313,'rxn':np.nan})
elif DF_flux_nadph_producers.gene[i] == 'WT_min':
pass
else:
for r in model.genes.get_by_id(str(DF_flux_nadph_producers.gene[i])).reactions:
rxn_list.append(r)
# gr is the growth rate, here I take the difference from WT (0.752658)
gr = float(DF_GR[DF_GR.gene == str(DF_flux_nadph_producers.gene[i])].gr.values[0])-0.752658
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':DF_flux_nadph_producers.gene[i], 'min_flux':DF_flux_nadph_producers.min_flux[i]})
DF_min_nadph_prod = pd.DataFrame(read_to_struct)
In [169]:
DF_max_nadph_prod
Out[169]:
In [170]:
DF_min_nadph_prod
Out[170]:
In [162]:
read_to_struct=[]
for i in min_flux_vars.columns:
read_to_struct.append({'gene':i, 'min_flux':np.sum(min_flux_vars[min_flux_vars.index.isin(DF_NADPH_consummers.reaction.tolist())][i]),'max_flux':np.sum(max_flux_vars[max_flux_vars.index.isin(DF_NADPH_consummers.reaction.tolist())][i])})
DF_flux_nadph_consumers = pd.DataFrame(read_to_struct)
DF_flux_nadph_consumers[0:3]
Out[162]:
In [160]:
read_to_struct=[]
for i in DF_flux_nadph_consumers.index:
rxn_list = []
if DF_flux_nadph_consumers.gene[i] == 'WT_max':
read_to_struct.append({'diff_gr':np.nan,'gene':DF_flux_nadph_consumers.gene[i], 'max_flux':4.231534,'rxn':np.nan})
elif DF_flux_nadph_consumers.gene[i] == 'WT_min':
pass
elif (DF_flux_nadph_consumers.max_flux[i]-4.231534) < 0 and (DF_flux_nadph_consumers.max_flux[i]-1.456566) > -20:
for r in model.genes.get_by_id(str(DF_flux_nadph_consumers.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(DF_flux_nadph_consumers.gene[i])].gr.values[0])-0.752658
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':DF_flux_nadph_consumers.gene[i], 'max_flux':(DF_flux_nadph_consumers.max_flux_thru_nadph_consumers[i]-1.456566)})
DF_max_nadph_con = pd.DataFrame(read_to_struct)
In [161]:
DF_max_nadph_con
Out[161]:
In [158]:
read_to_struct=[]
for i in DF_flux_nadph_consumers.index:
rxn_list = []
if DF_flux_nadph_consumers.gene[i] == 'WT_max':
read_to_struct.append({'diff_gr':np.nan,'gene':DF_flux_nadph_consumers.gene[i], 'min_flux':1.456566,'rxn':np.nan})
elif DF_flux_nadph_consumers.gene[i] == 'WT_min':
pass
elif (DF_flux_nadph_consumers.min_flux_thru_nadph_consumers[i]-1.456566) < -0.1 and (DF_flux_nadph_consumers.min_flux_thru_nadph_consumers[i]-1.456566) > -20:
for r in model.genes.get_by_id(str(DF_flux_nadph_consumers.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(DF_flux_nadph_consumers.gene[i])].gr.values[0])-0.457437
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':DF_flux_nadph_consumers.gene[i], 'min_flux':(DF_flux_nadph_consumers.min_flux_thru_nadph_consumers[i]-1.456566)})
DF_min_nadph_con = pd.DataFrame(read_to_struct)
In [159]:
DF_min_nadph_con
Out[159]:
In [60]:
PPP_rxns = []
for r in m.reactions:
if m.reactions.get_by_id(r.id).subsystem == 'Pentose Phosphate Pathway':
PPP_rxns.append(r.id)
In [157]:
read_to_struct=[]
for i in min_flux_vars.columns:
read_to_struct.append({'gene':i, 'min_flux':np.sum(min_flux_vars[min_flux_vars.index.isin(PPP_rxns)][i]),'max_flux':np.sum(max_flux_vars[max_flux_vars.index.isin(PPP_rxns)][i])})
DF_flux_PPP = pd.DataFrame(read_to_struct)
DF_flux_PPP[0:3]
Out[157]:
In [156]:
read_to_struct=[]
for i in DF_flux_PPP.index:
rxn_list = []
if DF_flux_PPP.min_flux_ppp[i]+2.202983 > 0.1 and DF_flux_PPP.min_flux_ppp[i]+0.255485 < 20:
if DF_flux_nadph_producers.gene[i] == 'WT_max':
read_to_struct.append({'diff_gr':np.nan,'gene':DF_flux_PPP.gene[i], 'min_flux':-2.202983,'rxn':np.nan})
elif DF_flux_PPP.gene[i] == 'WT_min':
pass
else:
for r in model.genes.get_by_id(str(DF_flux_PPP.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(DF_flux_PPP.gene[i])].gr.values[0])-0.752658
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':DF_flux_PPP.gene[i], 'min_flux':DF_flux_PPP.min_flux_ppp[i]})
DF_min_ppp = pd.DataFrame(read_to_struct)
In [171]:
DF_min_ppp
Out[171]:
In [155]:
read_to_struct=[]
for i in DF_flux_PPP.index:
rxn_list = []
if DF_flux_PPP.max_flux_ppp[i]-9.284744 > 0.1 and DF_flux_PPP.max_flux_ppp[i]-9.284744 < 20:
if DF_flux_nadph_producers.gene[i] == 'WT_max':
read_to_struct.append({'diff_gr':np.nan,'gene':DF_flux_PPP.gene[i], 'max_flux':9.284744,'rxn':np.nan})
elif DF_flux_PPP.gene[i] == 'WT_min':
pass
else:
for r in model.genes.get_by_id(str(DF_flux_PPP.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(DF_flux_PPP.gene[i])].gr.values[0])-0.752658
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':DF_flux_PPP.gene[i], 'max_flux':DF_flux_PPP.max_flux_ppp[i]})
DF_max_ppp = pd.DataFrame(read_to_struct)
In [172]:
DF_max_ppp
Out[172]:
In [141]:
def maximize_FVA_min_flux(df):
read_to_struct=[]
for i in df.index:
rxn_list = []
if df.gene[i] == 'WT_min':
read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'min_flux':df[df.gene == 'WT_min'].min_flux.values[0],'rxn':np.nan})
if df.min_flux[i] > 0 and df[df.gene == 'WT_min'].min_flux.values[0] < 0:
if df.min_flux[i]+df[df.gene == 'WT_min'].min_flux.values[0] > 0.5 and df.min_flux[i]+df[df.gene == 'WT_min'].min_flux.values[0] < 20:
if df.gene[i] == 'WT_max':
read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'min_flux':df[df.gene == 'WT_max'].min_flux.values[0],'rxn':np.nan})
elif df.gene[i] == 'WT_min':
pass
else:
for r in model.genes.get_by_id(str(df.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'min_flux':df.min_flux[i]})
else:
if df.min_flux[i]-df[df.gene == 'WT_min'].min_flux.values[0] > 0.5 and df.min_flux[i]-df[df.gene == 'WT_min'].min_flux.values[0] < 20:
if df.gene[i] == 'WT_max':
read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'min_flux':df[df.gene == 'WT_max'].min_flux.values[0],'rxn':np.nan})
elif df.gene[i] == 'WT_min':
pass
else:
for r in model.genes.get_by_id(str(df.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'min_flux':df.min_flux[i]})
return pd.DataFrame(read_to_struct)
def maximize_FVA_max_flux(df):
read_to_struct = []
for i in df.index:
rxn_list = []
if df.gene[i] == 'WT_max':
read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'max_flux':df[df.gene == 'WT_max'].min_flux.values[0],'rxn':np.nan})
if df.max_flux[i] > 0 and df[df.gene == 'WT_min'].max_flux.values[0] < 0:
if df.max_flux[i]+df[df.gene == 'WT_max'].max_flux.values[0] > 0.5 and df.max_flux[i]+df[df.gene == 'WT_min'].max_flux.values[0] < 20:
if df.gene[i] == 'WT_min':
pass
elif df.gene[i] == 'WT_max':
pass
else:
for r in model.genes.get_by_id(str(df.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'max_flux':df.max_flux[i]})
else:
if df.max_flux[i]-df[df.gene == 'WT_max'].min_flux.values[0] > 0.5 and df.min_flux[i]-df[df.gene == 'WT_max'].max_flux.values[0] < 20:
if df.gene[i] == 'WT_min':
pass
elif df.gene[i] == 'WT_max':
pass
else:
for r in model.genes.get_by_id(str(df.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'max_flux':df.max_flux[i]})
return pd.DataFrame(read_to_struct)
def minimize_FVA_min_flux(df):
read_to_struct=[]
for i in df.index:
rxn_list = []
if df.gene[i] == 'WT_min':
read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'min_flux':df[df.gene == 'WT_min'].min_flux.values[0],'rxn':np.nan})
if df.min_flux[i] > 0 and df[df.gene == 'WT_min'].min_flux.values[0] < 0:
if df.min_flux[i]+df[df.gene == 'WT_min'].min_flux.values[0] < -0.5 and df.min_flux[i]+df[df.gene == 'WT_min'].min_flux.values[0] > -20:
if df.gene[i] == 'WT_max':
read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'min_flux':df[df.gene == 'WT_max'].min_flux.values[0],'rxn':np.nan})
elif df.gene[i] == 'WT_min':
pass
else:
for r in model.genes.get_by_id(str(df.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'min_flux':df.min_flux[i]})
else:
if df.min_flux[i]-df[df.gene == 'WT_min'].min_flux.values[0] < -0.5 and df.min_flux[i]-df[df.gene == 'WT_min'].min_flux.values[0] > -20:
if df.gene[i] == 'WT_max':
read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'min_flux':df[df.gene == 'WT_max'].min_flux.values[0],'rxn':np.nan})
elif df.gene[i] == 'WT_min':
pass
else:
for r in model.genes.get_by_id(str(df.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'min_flux':df.min_flux[i]})
return pd.DataFrame(read_to_struct)
def minimize_FVA_max_flux(df):
read_to_struct = []
for i in df.index:
rxn_list = []
if df.gene[i] == 'WT_max':
read_to_struct.append({'diff_gr':np.nan,'gene':df.gene[i], 'max_flux':df[df.gene == 'WT_max'].min_flux.values[0],'rxn':np.nan})
if df.max_flux[i] > 0 and df[df.gene == 'WT_min'].max_flux.values[0] < 0:
if df.max_flux[i]+df[df.gene == 'WT_max'].max_flux.values[0] < -0.5 and df.max_flux[i]+df[df.gene == 'WT_min'].max_flux.values[0] > -20:
if df.gene[i] == 'WT_min':
pass
elif df.gene[i] == 'WT_max':
pass
else:
for r in model.genes.get_by_id(str(df.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'max_flux':df.max_flux[i]})
else:
if df.max_flux[i]-df[df.gene == 'WT_max'].min_flux.values[0] < -0.5 and df.min_flux[i]-df[df.gene == 'WT_max'].max_flux.values[0] > -20:
if df.gene[i] == 'WT_min':
pass
elif df.gene[i] == 'WT_max':
pass
else:
for r in model.genes.get_by_id(str(df.gene[i])).reactions:
rxn_list.append(r)
gr = float(DF_GR[DF_GR.gene == str(df.gene[i])].gr.values[0])-DF_GR[DF_GR.gene == 'WT'].gr.values[0]
read_to_struct.append({'diff_gr':gr,'rxn':rxn_list,'gene':df.gene[i], 'max_flux':df.max_flux[i]})
return pd.DataFrame(read_to_struct)
In [200]:
list_DF = [DF_max_ppp, DF_min_ppp, DF_max_nadph_prod, DF_min_nadph_prod, DF_min_nadph_con, DF_max_nadph_con]
legend = ['max_PPP','min_PPP','max_NADPH_prod','min_NADPH_prod','min_NADPH_con','max_NADPH_con']
In [ ]:
# consolidate all SKOs that increase NADPH-related fitness into one dataframe
read_to_struct=[]
for i in range(0,len(list_DF)):
tmp = list_DF[i]
print i, legend[i]
for j in tmp.index:
if pd.notnull(tmp.diff_gr[j]) and tmp.diff_gr[j] > -0.1:
if 'max' in legend[i]:
read_to_struct.append({'fitness':legend[i],'diff_gr':tmp.diff_gr[j],'gene':tmp.gene[j],'max_flux':tmp.max_flux[j],'rxn':tmp.rxn[j]})
else:
read_to_struct.append({'fitness':legend[i],'diff_gr':tmp.diff_gr[j],'gene':tmp.gene[j],'min_flux':tmp.min_flux[j],'rxn':tmp.rxn[j]})
DF_KOs = pd.DataFrame(read_to_struct)
In [213]:
df = DF_KOs.gene.value_counts()
for gene in df.index:
if df[gene] > mean(DF_KOs.gene.value_counts()):
print gene, df[gene], [str(i) for i in m.genes.get_by_id(str(gene)).reactions]
In [6]:
DF_KOs[DF_KOs.fitness.isin(['max_PPP','min_PPP','max_NADPH_prod','min_NADPH_prod','min_NADPH_con','max_NADPH_con'])]
Out[6]:
Prioritize model-predicted SKOs using keio KO growth data (in acetate and glucose conditions)
In [9]:
# load in keio colletion
DF_KO_invivo
Out[9]:
In [7]:
read_to_struct= ({'min_NADPH_prod':4.613313,'max_NADPH_prod':5.463324,'min_NADPH_con':1.456566,'max_NADPH_con':4.231534, 'max_PPP':9.284744, 'min_PPP':-2.202983 })
WT_fluxes = pd.DataFrame(read_to_struct,index=[0])
WT_fluxes
Out[7]:
reduce candidate SKOs based on growth conditions in glc and acetate
In [81]:
read_to_struct = []
for i in DF_KO_invivo.index:
read_to_struct.append({'gene':i, 'GR_ac':DF_KO_invivo.Acetate[i],'GR_glc':DF_KO_invivo.Glucose[i], 'fitness':DF_KOs[DF_KOs.gene == i].fitness.values.tolist(), 'rxn':[str(i) for i in DF_KOs[DF_KOs.gene == i].rxn.values[0]]})
df = pd.DataFrame(read_to_struct)
df = df[(df.GR_glc > 1)][df.GR_ac > .98]
df
Out[81]:
In [68]:
struct = []
for i in df.gene:
tmp_df = DF_KOs[DF_KOs.gene == i]
for j in tmp_df.index:
if pd.notnull(tmp_df.max_flux[j]):
max_f = tmp_df.max_flux[j] - WT_fluxes[str(tmp_df.fitness[j])]
struct.append({'gene': i, 'fitness':tmp_df.fitness[j], 'diff_wt':max_f.values[0]})
if pd.notnull(tmp_df.min_flux[j]):
min_f = tmp_df.min_flux[j] - WT_fluxes[str(tmp_df.fitness[j])]
struct.append({'gene':i, 'fitness':tmp_df.fitness[j], 'diff_wt':min_f.values[0]})
df_priority_KOs = pd.DataFrame(struct)
In [73]:
df_priority_KOs[df_priority_KOs.fitness == 'max_NADPH_con'].sort(columns='diff_wt')
Out[73]:
In [77]:
df_priority_KOs[df_priority_KOs.fitness == 'min_NADPH_prod'].sort(columns='diff_wt',ascending=False)
Out[77]:
In [79]:
df_priority_KOs[df_priority_KOs.fitness == 'min_PPP'].sort(columns='diff_wt',ascending=False)
Out[79]:
In [80]:
df_priority_KOs[df_priority_KOs.fitness == 'max_PPP'].sort(columns='diff_wt',ascending=False)
Out[80]:
In [93]:
gene_list = pd.Series([df_priority_KOs[df_priority_KOs.fitness == 'min_NADPH_prod'].sort(columns='diff_wt',ascending=False).gene.tolist(), df_priority_KOs[df_priority_KOs.fitness == 'min_PPP'].sort(columns='diff_wt',ascending=False).gene.tolist(),['b4209','b1747','b1748','b2501', 'b4468']])
In [98]:
leg = ['min_NADPH_prod','min_PPP','max_NADPH_con']
struct = []
for i in gene_list.transpose().index:
struct.append({'genes':gene_list[i], 'fitness':leg[i]})
pd.DataFrame(struct)
Out[98]:
In [108]:
struct=[]
for i in gene_list.transpose().index:
for j in gene_list[i]:
struct.append({'gene':j, 'seq':DF_GEM_PRO[DF_GEM_PRO.m_gene == str(j)].u_seq.values[0], 'gene_name':DF_GEM_PRO[DF_GEM_PRO.m_gene == str(j)].u_gene_name.values[0], 'uniprot':DF_GEM_PRO[DF_GEM_PRO.m_gene == str(j)].u_uniprot_acc.values[0]})
pd.DataFrame(struct).drop_duplicates()
Out[108]: