notebook.community

Edit and run



In [2]:

    
from bioservices.kegg import KEGG
import numpy as np
import matplotlib.pyplot as plt
import readline
import random
# FDR
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import FloatVector
# Hypergeom
from scipy.stats import hypergeom
# LogNorm color scheme
import matplotlib.colors as colors
# Combinations
from itertools import combinations



In [3]:

    
def read_annotations(annotation_file):
    annotation_fh = open(annotation_file, 'r')
    annotations = annotation_fh.readlines()
    annotations = list(map(str.rstrip, annotations))
    annotation_fh.close()
    return annotations

def filter_zscores(ko_index, modzscore_file, annotations, zscore_threshold):
    '''
    Due to RAM restrictions, must generate input list on the fly
    '''
    metabolite_hits = []
    modz_fh = open(modzscore_file, 'r')
    modz_lines = modz_fh.readlines()
    modz_fh.close()
    for index in range(0, len(modz_lines)):
        line = modz_lines[index]
        zscores = line.rstrip().split()
        exp_zscore = float(zscores[ko_index]) # The zscore of that knockout
        if abs(exp_zscore) >= zscore_threshold:
            kegg_id = annotations[index]
            if kegg_id.startswith('C'): # Verifying KEGG ids
                metabolite_hits.append(kegg_id)
    metabolite_hits = set(metabolite_hits)
    return metabolite_hits

def build_metabo_input(ko_index, 
                       pos_annotation_file, pos_modzscore_file, 
                       neg_annotation_file, neg_modzscore_file, 
                       zscore_threshold):
    pos_annotations = read_annotations(pos_annotation_file)
    neg_annotations = read_annotations(neg_annotation_file)

    pos_hits = filter_zscores(ko_index, pos_modzscore_file, pos_annotations, zscore_threshold)
    neg_hits = filter_zscores(ko_index, neg_modzscore_file, neg_annotations, zscore_threshold)
    
    metabolite_hits = pos_hits | neg_hits
    return metabolite_hits



In [4]:

    
# Get ALL the compounds within a species's pathway db
def get_all_compounds(species):
    # Initiate KEGG instance
    kegg_inst = KEGG()
    kegg_inst.organism = species
    # Get all compounds
    all_compounds = set()
    species_pathways = kegg_inst.pathwayIds
    for pathway in species_pathways:
        parsed_output = kegg_inst.parse(kegg_inst.get(pathway)) # parsed_ouput has lots of information about the pathway
        try:
            compounds = set(parsed_output['COMPOUND'].keys())
            all_compounds = all_compounds | compounds
        except KeyError: # Some pathways do not have defined compounds
            pass
    return all_compounds



In [5]:

    
def loadTsv(filename):
    fh = open(filename, 'r')
    outset = set()
    for line in fh:
        if line.startswith('C'):
            outset = outset | set([line.rstrip()])
    fh.close()
    return outset



In [6]:

    
# ORA (only for E.coli)
def ora(in_metabolites, pathway_id, bg_metabolites, pathway_2_compounds, least_num_metabolites=1):
    '''
    Specifying a KEGG instance is way faster than creating one on the fly
    Need to specify organism for that instance though
    '''
    # Get compounds
    try:
        compounds = pathway_2_compounds[pathway_id]
    except KeyError:
        return 'No compounds (DB)'
    # Background filtering
    test_pathway_compounds = compounds & bg_metabolites
    if len(test_pathway_compounds) == 0:
        return 'No compounds (BG)'
    # Hypergeometric test
    test_in_metabolites = in_metabolites & test_pathway_compounds
    if len(test_in_metabolites) < least_num_metabolites:
        return 'Low metabolites'
    hyperg_test = hypergeom(len(bg_metabolites), len(test_pathway_compounds), len(in_metabolites & bg_metabolites))
    #print(len(in_metabolites), len(in_metabolites & bg_metabolites))
    #print(in_metabolites - bg_metabolites)
    ora_raw_pval = 1 - hyperg_test.cdf(len(test_in_metabolites)) + hyperg_test.pmf(len(test_in_metabolites))
    #print(hyperg_test.cdf(len(test_in_metabolites)))
    #print(hyperg_test.pmf(len(test_in_metabolites)))
    return ora_raw_pval, pathway_id, len(test_pathway_compounds)



In [7]:

    
def misidentify(in_metabolites, times, pool_metabolites, return_changes = False):
    '''
    in_metabolites: set of in metabolites
    times: number of misidentification
    pool_metabolites: set pool of metabolites which the new identification can be
    '''
    in_metabolites = list(in_metabolites)
    pool_metabolites = list(pool_metabolites)
    # Adjusting times so times <= length of in metabolties
    if times > len(in_metabolites):
        times = len(in_metabolites)
    elif times > (len(pool_metabolites) - len(in_metabolites)):
        times = (len(pool_metabolites) - len(in_metabolites))
    if times == 0:
        return in_metabolites
    # Generating new identifications
    new_ident = []
    for i in range(0, times):
        random.shuffle(pool_metabolites)
        new_metabolite = pool_metabolites[0]
        # If new metab is already in the input list, discard and generate new ones
        while new_metabolite in in_metabolites:
            random.shuffle(pool_metabolites)
            new_metabolite = pool_metabolites[0]
        new_ident.append(new_metabolite)
    # Swapping existed metabolites for new metabolites
    old_ident = []
    random.shuffle(in_metabolites)
    for i in range(0, times):
        old_ident.append(in_metabolites.pop())
    in_metabolites += new_ident
    out_metabolites = set(in_metabolites)
    # Added an option to return identity changes
    if return_changes:
        return out_metabolites, old_ident, new_ident
    else:
        return out_metabolites



In [8]:

    
# Run ORAs for all pathways in a single knockout
def oras_ko(ko_number, testing_pathways, background_metabolites, pathway_2_compounds,  
            pos_annotation_file, pos_modzscore_file, 
            neg_annotation_file, neg_modzscore_file, 
            zscore_threshold, 
            multiple_testing_correction, minus_log_trans, max_mutation, mutation_pool):
    
    ko_metabolites = build_metabo_input(ko_number, 
                       pos_annotation_file, pos_modzscore_file, 
                       neg_annotation_file, neg_modzscore_file, 
                       zscore_threshold)
    # Random Mutation
    if max_mutation:
        tmp_metabolites = ko_metabolites # Copy ko_metabolites
        (background_metabolites, old_met, new_met) = misidentify(background_metabolites, 
                                                                 max_mutation, mutation_pool, True)
        met_translate = dict(zip(old_met, new_met)) # Make a dictionary for translation
        ko_metabolites = set() # Empty ko_metabolites
        for met in tmp_metabolites: # For each member in tmp, translate them and add them back to ko_metabolites
            ko_metabolites.add(met_translate.get(met, met))
    # Generating raw p values
    pval = []
    pathwayid = []
    pathwaysize = []
    for pathway in testing_pathways:
        ora_res = ora(ko_metabolites, pathway, background_metabolites, pathway_2_compounds)
        if len(ora_res) == 3: # if both ora_raw_pval and pathway_id are returned
            pval.append(ora_res[0])
            pathwayid.append(ora_res[1])
            pathwaysize.append(ora_res[2])
    # Multiple testing correction
    if multiple_testing_correction:
        pval = list(importr('stats').p_adjust(FloatVector(pval), method = 'BH'))
    # -log transformation
    if minus_log_trans:
        pval = list(map(np.log10, pval))
        pval = list(map(np.negative, pval))
    return pval, pathwayid, pathwaysize



In [22]:

    
# Generating null models
def make_null_model(original_model, metabolite_pool, overlap=False):
    '''
    original_model -- the dictionary with pathways as keys and metabolites as values
    metabolite_pool -- the collection of metabolites which the null model can include
    overlap -- True: overlap between pathways will not be randomised; False: overlap will be randomised
    '''
    if overlap:
        # If keeping the metabolite overlap, we only shuffle the labels of the metabolites
        
        # Getting the original metabolites
        original_metabolites = set()
        for value in original_model.values():
            original_metabolites = original_metabolites | value
        original_metabolites = list(original_metabolites)
        
        random.shuffle(metabolite_pool)
        metabolite_translate = dict(zip(original_metabolites, metabolite_pool[:len(original_metabolites)]))
    new_model = dict()
    for pathway in original_model:
        pathwaysize = len(original_model[pathway])
        if overlap:
            new_model[pathway] = set()
            for metabolite in original_model[pathway]:
                new_model[pathway].add(metabolite_translate[metabolite])
        else:
            for i in range(0, pathwaysize):
                random.shuffle(metabolite_pool)
                new_model[pathway] = set(metabolite_pool[:pathwaysize])
    return new_model



In [10]:

    
# Save null models and load null models
def save_null_model(model, filename):
    '''
    model in the type of a dictionary
    keys = pathway name
    values = metabolites (set)
    '''
    with open(filename, 'w') as fh:
        for pathway in model:
            metabolites = list(model[pathway])
            fh.write(pathway+'\t')
            fh.write(','.join(metabolites))
            fh.write('\n')
            
def load_null_model(filename):
    model = dict()
    with open(filename, 'r') as fh:
        for line in fh.readlines():
            fields = line.rstrip().split('\t')
            pathway = fields[0]
            metabolites = set(fields[1].split(','))
            model[pathway] = metabolites
    return model

Null models & Jaccard coefficients etc



In [24]:

    
for i in range(0, 100):
    filename = ('/home/zxu/Documents/mscbioinfo/Data Project/scripts/Jupyter/null_models/random/' + 
                'model' + str(i) + '.tsv')
    null_model = make_null_model(pathway_2_compounds, list(test_compounds), True)
    #save_null_model(null_model, filename)
    print(null_model['path:eco00010'])









    



{'C05588', 'C06582', 'C14083', 'C14717', 'C00007', 'C14113', 'C00517', 'C01273', 'C04332', 'C16254', 'C05265', 'C02939', 'C06251', 'C11418', 'C00364', 'C03599', 'C09815', 'C05396', 'C18902', 'C14824', 'C11521', 'C14087', 'C01751', 'C00170', 'C20772', 'C05313', 'C00416', 'C05856', 'C16848', 'C00504', 'C15812'}
{'C12621', 'C18102', 'C15700', 'C04593', 'C17559', 'C00021', 'C07096', 'C16241', 'C11453', 'C08491', 'C00887', 'C03933', 'C04574', 'C17660', 'C05994', 'C14814', 'C18240', 'C16239', 'C04822', 'C17664', 'C06705', 'C17948', 'C01272', 'C03586', 'C19567', 'C16193', 'C20518', 'C01967', 'C12144', 'C14156', 'C03345'}
{'C00196', 'C04823', 'C01606', 'C19609', 'C01368', 'C01180', 'C03539', 'C03561', 'C21231', 'C00440', 'C03035', 'C19852', 'C05964', 'C05281', 'C01717', 'C06620', 'C19725', 'C02430', 'C01230', 'C16281', 'C00524', 'C00261', 'C20251', 'C01946', 'C02225', 'C11408', 'C20666', 'C05984', 'C01269', 'C12481', 'C00305'}
{'C00196', 'C05805', 'C00155', 'C00002', 'C04592', 'C00349', 'C01667', 'C15556', 'C06605', 'C12147', 'C01190', 'C15517', 'C02765', 'C04522', 'C05798', 'C01251', 'C20062', 'C01102', 'C15524', 'C03241', 'C05400', 'C20396', 'C00570', 'C11481', 'C01226', 'C16489', 'C01118', 'C21070', 'C19891', 'C00501', 'C00108'}
{'C01500', 'C08060', 'C05945', 'C00789', 'C05923', 'C00036', 'C12456', 'C06311', 'C00131', 'C05515', 'C06592', 'C05519', 'C01037', 'C04785', 'C16531', 'C00309', 'C06315', 'C17401', 'C02247', 'C01013', 'C05939', 'C01455', 'C16238', 'C06336', 'C21015', 'C05697', 'C05161', 'C01218', 'C17558', 'C05977', 'C19151'}
{'C06424', 'C20265', 'C11540', 'C20653', 'C05769', 'C05379', 'C04691', 'C04112', 'C14821', 'C19889', 'C02172', 'C06990', 'C00334', 'C03660', 'C16338', 'C06037', 'C17022', 'C05944', 'C04732', 'C20643', 'C12836', 'C11481', 'C04672', 'C01751', 'C05313', 'C01921', 'C01885', 'C00214', 'C17549', 'C00362', 'C00521'}
{'C11057', 'C01186', 'C18232', 'C14899', 'C01245', 'C11542', 'C05379', 'C03684', 'C05895', 'C03067', 'C02909', 'C06508', 'C00831', 'C00412', 'C16737', 'C16198', 'C14866', 'C11148', 'C03547', 'C06620', 'C01322', 'C01825', 'C00031', 'C02614', 'C04919', 'C11556', 'C18121', 'C04675', 'C01921', 'C20819', 'C04558'}
{'C16541', 'C05639', 'C04244', 'C04553', 'C06047', 'C01877', 'C02198', 'C02823', 'C00096', 'C01284', 'C20784', 'C07301', 'C01001', 'C00526', 'C06612', 'C11918', 'C07092', 'C05965', 'C07090', 'C17700', 'C00865', 'C04734', 'C00606', 'C18033', 'C04044', 'C19879', 'C02431', 'C11555', 'C20683', 'C00802', 'C04633'}
{'C21428', 'C12463', 'C05898', 'C03056', 'C11272', 'C06001', 'C05651', 'C00581', 'C04620', 'C17962', 'C06578', 'C06592', 'C05791', 'C00143', 'C16394', 'C20677', 'C21368', 'C20333', 'C01044', 'C18118', 'C05392', 'C11908', 'C05840', 'C03920', 'C02183', 'C05803', 'C14179', 'C00576', 'C14780', 'C04280', 'C06376'}
{'C14144', 'C19886', 'C05813', 'C04553', 'C18028', 'C02535', 'C01345', 'C19889', 'C05746', 'C11447', 'C17327', 'C00033', 'C05834', 'C04186', 'C01051', 'C01286', 'C05427', 'C21104', 'C05753', 'C02492', 'C14813', 'C11416', 'C21219', 'C01118', 'C01126', 'C05212', 'C00230', 'C20571', 'C14143', 'C15812', 'C00418'}
{'C04598', 'C05125', 'C17961', 'C20820', 'C21197', 'C21018', 'C00986', 'C00461', 'C06202', 'C11413', 'C14109', 'C16308', 'C05762', 'C03664', 'C16300', 'C03793', 'C02800', 'C00179', 'C05689', 'C02488', 'C06587', 'C00966', 'C19879', 'C03219', 'C11537', 'C20979', 'C00018', 'C06148', 'C16242', 'C00233', 'C00531'}
{'C05539', 'C14105', 'C01845', 'C21152', 'C18317', 'C04823', 'C00320', 'C01061', 'C06194', 'C16164', 'C00561', 'C06677', 'C00993', 'C18117', 'C01211', 'C21404', 'C06575', 'C07731', 'C02637', 'C15650', 'C07100', 'C16320', 'C20657', 'C01984', 'C16238', 'C14086', 'C11521', 'C01243', 'C03415', 'C14120', 'C20814'}
{'C14145', 'C05670', 'C06678', 'C05819', 'C06126', 'C00793', 'C05821', 'C12456', 'C05793', 'C00665', 'C06232', 'C14108', 'C17689', 'C21069', 'C06049', 'C02411', 'C14155', 'C01147', 'C16171', 'C17944', 'C03574', 'C16239', 'C14748', 'C16221', 'C21104', 'C21204', 'C01850', 'C18247', 'C16488', 'C21367', 'C17368'}
{'C20359', 'C16392', 'C20815', 'C18168', 'C21485', 'C04752', 'C14180', 'C18301', 'C00445', 'C03273', 'C05723', 'C04332', 'C16338', 'C15925', 'C20386', 'C07271', 'C06720', 'C00038', 'C05133', 'C05274', 'C05753', 'C16402', 'C16842', 'C02375', 'C14179', 'C11916', 'C06028', 'C01708', 'C07662', 'C00394', 'C11922'}
{'C11057', 'C16392', 'C12475', 'C14401', 'C05275', 'C15670', 'C01438', 'C16462', 'C06104', 'C00140', 'C16241', 'C04431', 'C16663', 'C01697', 'C21225', 'C00664', 'C11148', 'C11928', 'C19725', 'C02167', 'C06791', 'C04732', 'C16339', 'C20889', 'C11242', 'C14087', 'C01152', 'C09823', 'C04628', 'C20870', 'C04020'}
{'C07836', 'C17010', 'C05951', 'C01060', 'C16321', 'C17559', 'C04575', 'C21018', 'C12457', 'C06202', 'C00070', 'C00218', 'C01767', 'C05723', 'C21154', 'C18236', 'C05779', 'C03793', 'C06044', 'C01255', 'C00082', 'C18118', 'C00492', 'C18316', 'C06589', 'C05892', 'C06615', 'C11949', 'C05782', 'C02679', 'C01798'}
{'C20653', 'C05616', 'C05715', 'C00099', 'C00637', 'C02043', 'C06442', 'C05809', 'C01214', 'C03722', 'C03114', 'C00996', 'C12220', 'C16699', 'C16593', 'C01825', 'C02488', 'C05837', 'C16528', 'C01412', 'C04498', 'C05673', 'C00880', 'C14782', 'C11409', 'C00204', 'C17412', 'C17558', 'C05127', 'C17549', 'C01211'}
{'C03028', 'C16157', 'C07209', 'C19609', 'C00096', 'C11831', 'C04637', 'C11940', 'C16341', 'C02172', 'C01944', 'C18152', 'C21229', 'C04475', 'C04297', 'C18324', 'C17554', 'C18099', 'C00898', 'C05955', 'C00741', 'C19706', 'C16842', 'C14782', 'C05620', 'C01073', 'C05684', 'C04483', 'C00804', 'C16242', 'C05686'}
{'C06577', 'C02773', 'C05535', 'C02892', 'C20815', 'C11933', 'C04896', 'C06730', 'C11457', 'C06876', 'C01438', 'C01245', 'C21205', 'C05587', 'C02855', 'C03289', 'C16540', 'C05995', 'C00673', 'C16254', 'C02987', 'C00089', 'C05825', 'C18112', 'C18316', 'C05646', 'C18134', 'C20979', 'C05775', 'C11841', 'C04051'}
{'C00188', 'C11936', 'C15667', 'C01063', 'C11145', 'C03741', 'C16465', 'C00012', 'C04590', 'C00346', 'C05774', 'C00385', 'C20677', 'C01894', 'C00249', 'C21204', 'C20372', 'C01499', 'C18034', 'C15547', 'C03586', 'C00861', 'C00719', 'C18172', 'C00067', 'C08301', 'C02938', 'C00628', 'C06813', 'C18305', 'C03479'}
{'C05802', 'C03512', 'C05576', 'C05769', 'C19154', 'C14770', 'C11929', 'C00232', 'C09306', 'C05528', 'C06677', 'C01109', 'C12479', 'C05897', 'C05834', 'C05689', 'C03434', 'C00160', 'C17937', 'C06462', 'C02739', 'C06230', 'C11519', 'C18105', 'C05803', 'C05843', 'C20277', 'C06576', 'C16476', 'C06367', 'C20315'}
{'C00811', 'C11039', 'C11386', 'C06577', 'C05813', 'C18101', 'C05745', 'C15521', 'C00506', 'C16391', 'C16471', 'C20377', 'C04437', 'C02909', 'C18152', 'C02137', 'C00532', 'C16316', 'C05947', 'C16244', 'C21511', 'C00332', 'C16075', 'C16193', 'C05165', 'C00149', 'C05984', 'C03955', 'C00009', 'C21382', 'C11848'}
{'C05102', 'C00250', 'C00036', 'C07490', 'C01007', 'C16255', 'C00029', 'C05835', 'C16391', 'C03230', 'C17646', 'C18202', 'C02991', 'C00245', 'C06041', 'C06105', 'C12477', 'C17367', 'C02972', 'C00500', 'C02515', 'C00031', 'C05239', 'C06333', 'C01011', 'C05922', 'C08061', 'C01077', 'C04778', 'C02331', 'C11352'}
{'C02501', 'C11540', 'C01099', 'C05412', 'C14105', 'C00073', 'C16159', 'C06677', 'C16199', 'C07214', 'C18306', 'C18108', 'C16663', 'C16254', 'C03618', 'C12480', 'C17654', 'C05779', 'C05594', 'C05385', 'C21363', 'C02514', 'C21223', 'C01623', 'C15996', 'C12459', 'C16432', 'C01077', 'C05273', 'C16169', 'C05957'}
{'C11612', 'C12475', 'C19154', 'C16336', 'C00355', 'C05793', 'C04079', 'C00315', 'C02949', 'C05572', 'C18155', 'C06580', 'C05607', 'C03564', 'C02050', 'C06604', 'C04122', 'C18316', 'C01083', 'C20672', 'C00238', 'C03117', 'C06588', 'C00075', 'C17412', 'C17552', 'C06772', 'C06672', 'C01586', 'C01163', 'C00593'}
{'C07212', 'C04287', 'C06398', 'C20781', 'C03671', 'C04882', 'C11384', 'C05815', 'C00449', 'C00445', 'C00070', 'C16180', 'C20920', 'C00556', 'C04348', 'C16688', 'C16663', 'C00337', 'C06321', 'C00168', 'C02295', 'C05427', 'C11554', 'C01747', 'C21239', 'C20911', 'C05852', 'C01967', 'C00272', 'C00340', 'C20919'}
{'C19153', 'C15667', 'C21512', 'C11543', 'C00087', 'C07102', 'C06586', 'C18131', 'C00526', 'C06508', 'C03035', 'C19960', 'C16161', 'C00135', 'C02357', 'C03372', 'C00459', 'C15975', 'C18324', 'C21211', 'C02988', 'C14118', 'C06026', 'C05729', 'C05658', 'C01226', 'C03170', 'C04434', 'C00802', 'C14106', 'C11249'}
{'C03765', 'C04896', 'C01817', 'C02043', 'C06876', 'C01007', 'C05665', 'C12147', 'C00098', 'C06509', 'C05144', 'C05791', 'C12833', 'C04579', 'C08323', 'C09814', 'C04091', 'C00033', 'C09817', 'C15925', 'C20677', 'C05913', 'C01005', 'C01751', 'C01921', 'C03283', 'C06030', 'C11399', 'C06376', 'C00394', 'C20238'}
{'C20265', 'C04045', 'C05125', 'C00279', 'C06866', 'C01249', 'C06671', 'C00583', 'C15523', 'C16850', 'C04361', 'C01207', 'C20423', 'C01273', 'C05961', 'C17694', 'C00898', 'C21201', 'C04236', 'C00712', 'C21250', 'C05660', 'C04635', 'C02553', 'C00003', 'C00018', 'C11943', 'C12144', 'C16481', 'C19872', 'C05635'}
{'C02452', 'C06114', 'C04577', 'C00413', 'C14573', 'C05751', 'C16321', 'C14810', 'C00232', 'C01059', 'C06027', 'C18133', 'C00885', 'C03351', 'C01273', 'C05953', 'C02514', 'C18155', 'C09821', 'C06186', 'C14093', 'C11395', 'C01270', 'C01243', 'C01231', 'C21070', 'C18125', 'C00575', 'C15930', 'C20570', 'C06399'}
{'C11910', 'C14145', 'C05535', 'C01419', 'C02235', 'C04832', 'C01300', 'C04742', 'C05775', 'C00819', 'C03918', 'C01046', 'C13809', 'C12471', 'C05746', 'C04590', 'C00633', 'C00074', 'C00836', 'C01255', 'C21431', 'C16272', 'C14604', 'C18118', 'C04268', 'C16826', 'C17558', 'C14419', 'C04233', 'C06328', 'C00093'}
{'C12460', 'C16156', 'C18317', 'C01419', 'C14899', 'C18232', 'C18243', 'C00437', 'C06505', 'C00310', 'C06022', 'C05677', 'C17373', 'C00826', 'C11912', 'C05662', 'C09822', 'C00740', 'C18893', 'C20995', 'C04246', 'C02247', 'C19084', 'C04853', 'C21104', 'C04534', 'C17552', 'C05593', 'C02353', 'C00670', 'C11249'}
{'C04598', 'C11406', 'C21074', 'C00021', 'C06104', 'C16332', 'C06118', 'C05629', 'C16241', 'C06250', 'C05677', 'C06715', 'C07095', 'C00283', 'C05963', 'C16522', 'C00094', 'C06193', 'C01234', 'C00038', 'C00257', 'C00741', 'C05840', 'C17691', 'C02405', 'C20772', 'C03291', 'C13768', 'C21246', 'C01835', 'C03345'}
{'C02714', 'C05841', 'C01877', 'C06548', 'C12147', 'C03652', 'C00469', 'C03539', 'C15767', 'C19156', 'C11351', 'C09880', 'C00780', 'C06607', 'C12318', 'C00083', 'C00353', 'C05260', 'C04022', 'C18349', 'C18029', 'C05262', 'C21167', 'C20821', 'C06510', 'C01935', 'C06156', 'C02718', 'C06611', 'C03557', 'C03997'}
{'C11039', 'C00040', 'C16530', 'C01053', 'C00021', 'C16201', 'C16540', 'C00533', 'C16152', 'C06041', 'C20846', 'C01508', 'C14090', 'C17367', 'C16221', 'C02050', 'C17551', 'C05401', 'C01161', 'C00450', 'C02330', 'C18153', 'C00213', 'C19891', 'C11408', 'C01152', 'C05856', 'C00800', 'C00015', 'C01163', 'C17569'}
{'C12473', 'C06124', 'C00209', 'C05588', 'C03232', 'C00175', 'C05552', 'C18300', 'C05811', 'C11401', 'C19823', 'C05587', 'C00431', 'C16517', 'C16414', 'C01214', 'C01233', 'C05827', 'C00327', 'C16477', 'C05755', 'C20978', 'C00750', 'C03576', 'C17664', 'C06127', 'C00663', 'C18021', 'C06192', 'C06481', 'C21298'}
{'C06052', 'C15699', 'C20813', 'C15670', 'C04462', 'C00864', 'C03167', 'C11453', 'C06990', 'C05768', 'C18305', 'C05825', 'C02888', 'C04390', 'C00854', 'C19845', 'C03758', 'C05401', 'C20643', 'C01834', 'C00450', 'C18121', 'C01454', 'C01530', 'C06028', 'C01996', 'C00263', 'C03820', 'C06988', 'C05775', 'C19872'}
{'C05842', 'C16172', 'C04287', 'C00365', 'C21152', 'C00087', 'C06000', 'C14769', 'C06397', 'C05261', 'C11145', 'C06112', 'C00078', 'C02528', 'C00218', 'C19848', 'C04348', 'C05636', 'C06317', 'C00379', 'C00836', 'C02376', 'C20083', 'C06604', 'C11908', 'C18059', 'C16330', 'C00416', 'C05267', 'C00785', 'C00124'}
{'C14145', 'C03232', 'C00004', 'C02198', 'C16159', 'C00726', 'C00051', 'C05261', 'C00665', 'C00989', 'C00148', 'C05895', 'C06198', 'C06035', 'C03688', 'C20386', 'C04186', 'C00236', 'C16532', 'C02356', 'C18244', 'C01161', 'C11416', 'C05637', 'C00794', 'C15923', 'C17561', 'C03752', 'C05775', 'C15979', 'C14156'}
{'C14115', 'C02480', 'C16999', 'C14091', 'C02593', 'C00366', 'C05578', 'C00064', 'C05954', 'C00251', 'C16719', 'C03826', 'C20682', 'C02659', 'C21197', 'C11831', 'C05629', 'C03614', 'C11926', 'C05680', 'C05655', 'C12248', 'C05998', 'C02739', 'C04652', 'C06377', 'C15810', 'C04630', 'C00149', 'C16150', 'C02457'}
{'C18326', 'C00919', 'C05588', 'C00175', 'C14573', 'C07094', 'C05528', 'C16396', 'C01046', 'C11849', 'C16522', 'C00510', 'C03402', 'C02295', 'C06758', 'C00332', 'C05941', 'C04853', 'C05844', 'C11520', 'C01252', 'C00213', 'C15810', 'C05698', 'C21028', 'C17561', 'C03646', 'C05600', 'C00700', 'C04677', 'C01384'}
{'C00366', 'C06029', 'C05634', 'C03167', 'C07094', 'C05933', 'C08491', 'C03351', 'C11934', 'C00296', 'C11417', 'C01264', 'C06408', 'C12318', 'C02139', 'C07089', 'C05956', 'C09810', 'C14778', 'C01020', 'C00364', 'C16243', 'C09815', 'C02656', 'C05382', 'C21219', 'C05946', 'C04534', 'C01921', 'C16562', 'C16327'}
{'C00045', 'C05727', 'C15667', 'C00978', 'C00250', 'C00119', 'C04112', 'C20310', 'C20248', 'C11351', 'C06508', 'C18911', 'C02139', 'C02987', 'C00194', 'C00697', 'C00433', 'C02232', 'C19155', 'C18902', 'C11521', 'C05831', 'C00005', 'C02631', 'C01120', 'C00184', 'C05785', 'C02353', 'C06181', 'C00093', 'C00111'}
{'C19871', 'C21017', 'C00313', 'C00185', 'C21206', 'C00036', 'C11542', 'C00310', 'C16285', 'C11934', 'C04431', 'C16166', 'C01068', 'C04411', 'C03933', 'C04573', 'C05336', 'C00829', 'C03241', 'C05653', 'C20929', 'C16243', 'C21250', 'C00246', 'C00688', 'C17726', 'C11150', 'C20911', 'C04137', 'C14156', 'C05338'}
{'C18102', 'C06062', 'C00097', 'C20254', 'C01100', 'C04043', 'C01250', 'C11925', 'C18156', 'C01111', 'C01767', 'C06612', 'C01273', 'C06321', 'C01941', 'C00836', 'C21293', 'C21204', 'C05787', 'C09819', 'C05663', 'C20396', 'C01260', 'C12836', 'C17662', 'C05378', 'C17268', 'C04520', 'C04483', 'C18305', 'C16342'}
{'C00107', 'C14812', 'C00354', 'C19972', 'C06584', 'C00071', 'C04053', 'C21350', 'C20948', 'C05080', 'C00931', 'C20927', 'C06676', 'C04785', 'C18325', 'C00780', 'C03722', 'C00696', 'C16186', 'C16074', 'C16239', 'C09812', 'C06178', 'C06007', 'C17650', 'C04672', 'C11924', 'C01113', 'C04487', 'C18303', 'C05925'}
{'C05526', 'C04824', 'C14144', 'C12126', 'C17961', 'C11145', 'C14821', 'C04405', 'C00135', 'C02514', 'C11851', 'C05814', 'C06575', 'C00546', 'C03981', 'C04582', 'C00128', 'C11411', 'C00227', 'C16593', 'C02515', 'C02656', 'C05382', 'C05349', 'C16488', 'C05411', 'C00117', 'C03453', 'C07088', 'C06399', 'C05350'}
{'C03160', 'C03715', 'C01877', 'C06876', 'C05580', 'C00422', 'C17689', 'C03125', 'C04421', 'C18103', 'C21225', 'C01151', 'C16219', 'C01134', 'C00718', 'C00132', 'C17699', 'C16466', 'C20582', 'C02488', 'C00446', 'C09815', 'C17550', 'C21103', 'C21070', 'C11943', 'C11555', 'C05513', 'C05641', 'C11499', 'C14110'}
{'C16319', 'C14812', 'C15972', 'C05973', 'C05676', 'C06126', 'C04536', 'C05677', 'C04171', 'C16465', 'C16521', 'C16478', 'C16477', 'C07731', 'C06457', 'C14107', 'C17694', 'C00020', 'C01674', 'C08316', 'C20247', 'C00255', 'C16330', 'C03676', 'C02226', 'C06156', 'C05672', 'C02713', 'C07880', 'C05993', 'C11352'}
{'C02483', 'C05172', 'C00147', 'C06125', 'C03819', 'C04351', 'C18345', 'C07733', 'C06188', 'C17962', 'C20920', 'C19848', 'C09880', 'C05829', 'C15651', 'C12220', 'C18904', 'C00128', 'C00244', 'C01832', 'C21237', 'C14101', 'C01260', 'C01747', 'C14087', 'C18796', 'C15924', 'C04317', 'C01798', 'C00072', 'C01211'}
{'C11419', 'C14112', 'C00755', 'C11944', 'C06398', 'C19609', 'C00136', 'C14108', 'C19846', 'C00517', 'C11351', 'C18123', 'C00283', 'C03201', 'C00905', 'C03454', 'C00151', 'C04046', 'C16590', 'C02488', 'C03576', 'C20929', 'C02265', 'C04556', 'C05349', 'C11916', 'C16345', 'C00388', 'C18111', 'C09884', 'C06033'}
{'C16157', 'C21349', 'C12112', 'C05896', 'C00857', 'C12457', 'C18110', 'C18870', 'C06407', 'C16254', 'C17935', 'C05548', 'C16464', 'C00247', 'C01255', 'C21228', 'C05122', 'C11472', 'C02962', 'C03827', 'C01056', 'C18021', 'C02371', 'C05669', 'C02225', 'C21367', 'C12461', 'C05922', 'C01108', 'C05131', 'C17568'}
{'C05584', 'C04002', 'C03742', 'C05715', 'C17961', 'C21310', 'C06047', 'C06442', 'C21449', 'C00415', 'C01190', 'C16181', 'C16589', 'C01273', 'C00194', 'C05780', 'C04297', 'C06234', 'C18120', 'C11908', 'C16179', 'C05730', 'C00103', 'C00439', 'C14087', 'C00416', 'C12476', 'C00628', 'C06735', 'C20571', 'C04604'}
{'C00642', 'C01168', 'C00876', 'C05665', 'C03508', 'C05894', 'C06452', 'C00440', 'C16166', 'C05827', 'C06581', 'C00470', 'C20986', 'C15524', 'C06575', 'C06455', 'C05274', 'C16138', 'C05924', 'C05647', 'C01252', 'C00197', 'C06400', 'C18121', 'C11409', 'C00061', 'C01019', 'C18119', 'C13952', 'C00739', 'C12145'}
{'C02469', 'C03160', 'C01596', 'C18232', 'C03548', 'C16462', 'C15523', 'C16698', 'C21432', 'C01727', 'C20912', 'C21403', 'C04216', 'C01080', 'C11915', 'C03044', 'C02191', 'C06762', 'C11404', 'C17694', 'C18120', 'C17562', 'C05646', 'C06241', 'C13768', 'C05340', 'C04618', 'C04773', 'C04729', 'C15812', 'C01205'}
{'C16172', 'C05777', 'C01829', 'C15699', 'C16719', 'C11844', 'C06757', 'C18032', 'C00338', 'C17646', 'C14795', 'C16311', 'C14099', 'C00302', 'C00013', 'C20582', 'C06406', 'C00244', 'C20928', 'C00927', 'C05688', 'C00387', 'C01455', 'C00822', 'C06192', 'C18098', 'C06507', 'C00121', 'C04778', 'C00394', 'C14781'}
{'C06114', 'C04824', 'C05516', 'C06577', 'C01920', 'C05604', 'C07209', 'C05275', 'C21227', 'C01788', 'C00422', 'C18318', 'C02991', 'C06676', 'C00008', 'C00154', 'C17401', 'G00457', 'C21211', 'C00122', 'C21223', 'C16325', 'C17570', 'C00492', 'C00266', 'C21351', 'C05766', 'C01586', 'C17203', 'C04062', 'C00294'}
{'C01187', 'C04052', 'C04553', 'C16401', 'C03671', 'C16565', 'C11941', 'C16155', 'C16462', 'C21356', 'C00310', 'C02814', 'C03614', 'C02686', 'C21225', 'C05825', 'C00151', 'C02988', 'C00257', 'C06333', 'C14813', 'C00632', 'C01772', 'C18033', 'C01104', 'C16193', 'C06387', 'C17558', 'C09823', 'C00670', 'C20373'}
{'C16172', 'C11038', 'C01026', 'C16318', 'C20375', 'C01242', 'C06104', 'C01312', 'C11850', 'C05678', 'C09822', 'C21486', 'C20333', 'C02237', 'C18120', 'C02050', 'C00958', 'C18059', 'C01159', 'C03692', 'C00005', 'C00170', 'C05349', 'C05892', 'C20772', 'C21367', 'C01120', 'C00208', 'C20683', 'C01586', 'C15814'}
{'C01500', 'C05670', 'C21200', 'C04002', 'C02370', 'C20781', 'C01920', 'C05983', 'C02047', 'C00527', 'C11850', 'C01273', 'C03375', 'C16663', 'C10833', 'C05266', 'C16699', 'C16281', 'C21210', 'C01013', 'C04619', 'C04236', 'C17530', 'C03064', 'C19745', 'C02625', 'C00448', 'C18351', 'C03955', 'C02713', 'C00233'}
{'C20794', 'C05634', 'C05859', 'C02412', 'C21350', 'C16471', 'C01990', 'C01989', 'C00296', 'C05768', 'C12474', 'C21363', 'C00596', 'C00828', 'C01508', 'C05913', 'C00242', 'C01121', 'C20978', 'C05790', 'C01044', 'C17650', 'C02188', 'C17550', 'C04895', 'C06772', 'C09823', 'C00795', 'C00937', 'C00534', 'C11848'}
{'C05582', 'C05560', 'C12112', 'C04477', 'C00141', 'C00036', 'C01788', 'C21069', 'C11925', 'C06250', 'C14821', 'C05723', 'C06581', 'C05897', 'C14082', 'C16154', 'C20846', 'C17650', 'C04122', 'C06055', 'C06594', 'C05660', 'C05697', 'C01996', 'C01218', 'C15924', 'C00739', 'C03752', 'C00272', 'C00645', 'C16196'}
{'C11910', 'C00558', 'C11545', 'C00352', 'C00550', 'C16155', 'C11542', 'C06586', 'C06194', 'C17936', 'C20231', 'C20424', 'C04796', 'C06704', 'C08323', 'C17654', 'C18115', 'C20988', 'C21353', 'C16533', 'C02923', 'C00266', 'C17662', 'C16470', 'C00544', 'C05892', 'C05212', 'C00628', 'C00575', 'C00084', 'C16163'}
{'C11396', 'C16268', 'C00973', 'C02198', 'C00053', 'C02855', 'C20314', 'C03273', 'C02218', 'C01147', 'C20738', 'C20062', 'C12833', 'C16186', 'C07089', 'C01941', 'C04756', 'C06720', 'C14820', 'C02232', 'C14154', 'C00476', 'C05749', 'C19787', 'C03179', 'C21239', 'C20911', 'C16463', 'C16399', 'C00042', 'C00294'}
{'C00429', 'C02876', 'C03979', 'C20913', 'C00527', 'C21350', 'C21069', 'C00512', 'C19889', 'C00226', 'C05677', 'C20522', 'C05976', 'C21075', 'C16186', 'C06762', 'C00836', 'C06575', 'C03082', 'C05646', 'C00576', 'C19887', 'C05130', 'C18180', 'C18320', 'C05637', 'C04534', 'C05223', 'C02336', 'C00323', 'C00964'}
{'C06124', 'C17658', 'C06582', 'C19757', 'C16479', 'C04823', 'C11542', 'C11145', 'C01929', 'C12470', 'C11407', 'C12032', 'C03375', 'C00259', 'C01181', 'C14814', 'C00647', 'C04037', 'C01044', 'C05837', 'C05753', 'C05396', 'C01984', 'C15926', 'C00438', 'C05546', 'C20666', 'C04106', 'C04315', 'C20870', 'C11848'}
{'C02051', 'C05759', 'C14100', 'C05676', 'C04052', 'C12832', 'C20913', 'C03089', 'C00048', 'C03614', 'C03344', 'C06769', 'C03366', 'C05963', 'C12216', 'C02067', 'C07089', 'C03078', 'C20818', 'C02352', 'C00697', 'C01832', 'C15809', 'C00077', 'C16638', 'C05546', 'C01089', 'C09823', 'C01904', 'C06416', 'C18303'}
{'C02051', 'C14145', 'C02501', 'C06038', 'C14144', 'C05576', 'C17697', 'C00097', 'C01259', 'C00086', 'C11850', 'C16241', 'C11849', 'C04171', 'C01001', 'C05279', 'C01233', 'C07729', 'C00780', 'C21241', 'C16348', 'C19794', 'C00179', 'C00958', 'C03871', 'C18242', 'C00438', 'C01262', 'C14106', 'C21382', 'C16163'}
{'C16483', 'C06227', 'C05759', 'C05898', 'C00251', 'C02642', 'C06128', 'C04832', 'C00086', 'C17727', 'C00098', 'C14109', 'C01215', 'C17352', 'C01134', 'C00664', 'C03619', 'C05404', 'C16394', 'C21202', 'C06186', 'C18315', 'C02265', 'C05262', 'C03871', 'C14117', 'C05575', 'C18320', 'C02679', 'C14084', 'C17569'}
{'C16483', 'C02700', 'C04666', 'C02305', 'C05545', 'C00437', 'C21427', 'C01190', 'C00431', 'C17942', 'C02765', 'C18107', 'C06023', 'C16522', 'C17654', 'C00154', 'C00166', 'C01941', 'C00163', 'C00502', 'C06580', 'C04807', 'C01260', 'C06587', 'C16826', 'C02730', 'C05698', 'C16194', 'C00093', 'C03997', 'C14110'}
{'C00147', 'C05381', 'C00919', 'C05578', 'C04287', 'C06125', 'C20867', 'C00146', 'C16200', 'C00153', 'C00810', 'C00162', 'C11915', 'C01613', 'C11403', 'C12216', 'C01112', 'C03078', 'C00734', 'C03248', 'C00546', 'C03442', 'C17367', 'C00623', 'C00577', 'C05844', 'C18154', 'C02802', 'C01739', 'C21073', 'C04349'}
{'C06585', 'C14794', 'C00919', 'C02161', 'C02504', 'C00221', 'C00487', 'C16196', 'C01284', 'C00819', 'C20395', 'C18032', 'C12458', 'C16521', 'C01132', 'C15524', 'C18155', 'C01322', 'C00716', 'C00363', 'C03519', 'C05689', 'C01726', 'C05361', 'C17550', 'C18030', 'C18161', 'C01120', 'C21239', 'C15976', 'C00124'}
{'C00691', 'C21433', 'C17961', 'C06671', 'C18317', 'C00144', 'C00320', 'C06454', 'C04620', 'C18910', 'C00512', 'C20912', 'C02528', 'C11926', 'C01371', 'C20440', 'C11945', 'C21241', 'C00955', 'C01213', 'C13482', 'C16221', 'C02990', 'C21284', 'C21250', 'C00399', 'C01089', 'C05585', 'C04106', 'C05818', 'C11249'}
{'C20265', 'C04702', 'C04592', 'C11933', 'C05776', 'C14463', 'C02047', 'C06049', 'C20950', 'C14092', 'C00721', 'C20374', 'C05594', 'C07092', 'C06451', 'C00344', 'C02727', 'C04115', 'C21167', 'C00436', 'C11421', 'C11409', 'C05512', 'C02477', 'C00104', 'C01530', 'C11437', 'C00861', 'C12831', 'C05763', 'C07478'}
{'C01187', 'C11944', 'C02059', 'C12145', 'C04832', 'C02350', 'C01788', 'C06605', 'C20395', 'C00513', 'C16529', 'C01251', 'C17432', 'C07093', 'C12462', 'C00011', 'D08401', 'C04121', 'C18871', 'C00476', 'C09820', 'C02431', 'C18180', 'C00091', 'C12831', 'C06017', 'C20918', 'C16194', 'C00054', 'C11249', 'C14781'}
{'C02759', 'C00262', 'C14085', 'C06473', 'C01727', 'C18318', 'C20955', 'C00360', 'C16588', 'C21235', 'C04494', 'C06581', 'C03372', 'C21362', 'C18240', 'C14107', 'C00363', 'C04919', 'C06230', 'C03043', 'C21233', 'C07101', 'C00884', 'C02631', 'C18320', 'C03170', 'C06387', 'C00184', 'C06672', 'C14143', 'C16597'}
{'C02824', 'C02593', 'C16474', 'C00144', 'C04351', 'C15973', 'C06548', 'C00868', 'C06793', 'C17942', 'C19852', 'C00955', 'C16469', 'C06599', 'C04556', 'C16842', 'C11416', 'C01772', 'C02330', 'C04672', 'C19853', 'C06588', 'C19887', 'C00039', 'C20357', 'C05270', 'C13768', 'C02199', 'C11436', 'C01798', 'C00072'}
{'C04824', 'C11540', 'C05921', 'C05341', 'C14401', 'C05813', 'C14810', 'C05379', 'C07277', 'C04327', 'C20868', 'C20589', 'C17352', 'C05655', 'C04494', 'C07216', 'C00596', 'C06792', 'C06193', 'C11544', 'C00921', 'C05400', 'C05382', 'C00295', 'C05784', 'C01104', 'C01747', 'C03785', 'C11436', 'C17667', 'C20783'}
{'C01187', 'C05578', 'C06456', 'C03166', 'C00527', 'C17936', 'C18133', 'C02236', 'C03741', 'C21075', 'C06596', 'C05755', 'C00379', 'C04277', 'C17401', 'C00716', 'C00307', 'C05941', 'C04479', 'C21167', 'C03871', 'C00383', 'C17691', 'C04181', 'C21367', 'C06602', 'C04434', 'C00015', 'C20373', 'C17568', 'C03492'}
{'C05526', 'C17687', 'C06029', 'C02350', 'C12468', 'C00153', 'C20377', 'C02528', 'C02765', 'C20816', 'C06023', 'C03618', 'C16219', 'C21016', 'C16348', 'C00984', 'C00353', 'C02888', 'C06234', 'C06675', 'C11911', 'C00927', 'C00430', 'C00822', 'C14771', 'C00380', 'C04487', 'C05267', 'C02353', 'C11436', 'C00022'}
{'C16319', 'C12472', 'C01131', 'C00813', 'C01041', 'C00096', 'C00879', 'C00235', 'C02995', 'C03319', 'C03035', 'C16688', 'C05375', 'C01151', 'C00996', 'C06058', 'C06051', 'C00546', 'C20582', 'C11544', 'C03227', 'C02888', 'C18099', 'C18116', 'C05675', 'C21284', 'C16238', 'C05356', 'C05600', 'C04330', 'C04349'}
{'C01088', 'C00196', 'C15647', 'C01268', 'C00040', 'C20375', 'C15672', 'C00431', 'C06509', 'C01081', 'C17432', 'C01898', 'C05767', 'C01222', 'C18157', 'C07216', 'C03217', 'C20986', 'C01380', 'C20581', 'C18120', 'C19155', 'C01455', 'C16411', 'C00190', 'C17951', 'C20911', 'C16334', 'C06813', 'C02984', 'C11848'}
{'C06424', 'C01909', 'C03012', 'C20629', 'C15670', 'C03684', 'C19823', 'C21449', 'C18133', 'C05894', 'C04146', 'C21029', 'C02991', 'C00337', 'C05404', 'C18893', 'C00530', 'C00032', 'C12837', 'C14773', 'C16527', 'C18347', 'C00257', 'C05546', 'C00039', 'C10793', 'C00922', 'C04773', 'C15814', 'C07478', 'C14781'}
{'C05959', 'C02501', 'C01187', 'C03460', 'C00047', 'C00349', 'C02047', 'C02930', 'C21018', 'C16201', 'C00513', 'C01279', 'C00417', 'C05956', 'C13482', 'C02515', 'C15809', 'C06320', 'C06723', 'C04479', 'C05554', 'C04488', 'C00333', 'C11909', 'C04089', 'C05817', 'C07732', 'C05690', 'C07335', 'C14088', 'C11249'}
{'C20278', 'C05841', 'C05604', 'C16395', 'C16249', 'C21230', 'C16220', 'C11940', 'C02411', 'C06592', 'C05235', 'C12008', 'C00996', 'C05760', 'C02800', 'C05644', 'C02614', 'C06756', 'C19152', 'C02989', 'C06687', 'C05844', 'C17550', 'C05772', 'C00044', 'C14822', 'C11909', 'C21239', 'C14120', 'C01077', 'C00700'}
{'C00691', 'C19880', 'C02960', 'C07096', 'C11542', 'C21205', 'C00186', 'C12458', 'C00870', 'C01001', 'C05655', 'C16588', 'C11837', 'C01801', 'C20737', 'C12323', 'C03659', 'C01380', 'C00344', 'C02888', 'G00457', 'C15809', 'C01721', 'C05663', 'C11920', 'C11521', 'C03572', 'C00844', 'C00223', 'C16848', 'C00238'}
{'C03028', 'C06227', 'C06335', 'C05552', 'C01877', 'C06669', 'C18032', 'C11940', 'C20680', 'C02991', 'C06612', 'C01080', 'C18152', 'C05963', 'C05680', 'C15524', 'C06406', 'C00942', 'C18151', 'C01452', 'C06031', 'C07215', 'C06192', 'C11437', 'C21435', 'C04506', 'C14120', 'C00015', 'C01050', 'C03752', 'C14781'}
{'C01845', 'C16719', 'C06598', 'C00071', 'C06118', 'C05677', 'C00162', 'C12834', 'C06607', 'C05758', 'C00533', 'C03640', 'C00818', 'C17668', 'C00470', 'C04442', 'C00433', 'C21353', 'C05598', 'C06599', 'C00958', 'C18151', 'C11421', 'C00103', 'C16519', 'C00254', 'C12270', 'C05726', 'C05402', 'C16482', 'C05775'}
{'C21195', 'C08276', 'C04438', 'C21074', 'C06029', 'C01817', 'C05379', 'C00788', 'C20914', 'C00415', 'C00219', 'C06250', 'C01542', 'C05994', 'C15646', 'C17322', 'C03044', 'C02357', 'D08401', 'C16394', 'C00372', 'C01172', 'C02405', 'C00246', 'C00019', 'C20378', 'C02519', 'C01042', 'C11397', 'C15975', 'C03479'}
{'C16594', 'C16392', 'C06227', 'C04748', 'C17559', 'C17644', 'C06188', 'C00581', 'C18169', 'C01149', 'C00568', 'C08281', 'C20816', 'C02137', 'C00330', 'C00026', 'C03590', 'C03078', 'C21016', 'C02354', 'C16533', 'C15926', 'C05784', 'C18121', 'C17369', 'C00213', 'C01272', 'C05165', 'C21071', 'C04330', 'C21209'}
{'C16392', 'C02876', 'C01352', 'C00199', 'C20781', 'C21198', 'C01817', 'C00222', 'C20384', 'C21205', 'C05933', 'C02814', 'C04932', 'C11453', 'C20914', 'C01302', 'C05345', 'C20927', 'C05678', 'C13508', 'C04522', 'C03004', 'C01898', 'C16588', 'C00818', 'C16466', 'C20921', 'C00249', 'C04392', 'C19891', 'C00217'}
{'C16319', 'C00188', 'C06376', 'C16235', 'C18872', 'C18028', 'C11638', 'C03351', 'C05345', 'C10164', 'C05953', 'C00267', 'C00582', 'C04332', 'C00154', 'C02166', 'C03547', 'C06462', 'C05924', 'C00020', 'C00446', 'C01630', 'C05946', 'C17951', 'C20991', 'C15810', 'C14809', 'C00067', 'C03323', 'C05172', 'C20570'}
{'C21415', 'C06114', 'C00147', 'C00313', 'C05648', 'C01026', 'C00016', 'C00036', 'C05595', 'C04079', 'C17689', 'C14103', 'C17646', 'C00633', 'C00596', 'C06251', 'C01380', 'C05274', 'C00847', 'C00617', 'C01467', 'C04479', 'C21250', 'C14179', 'C05512', 'C12831', 'C00156', 'C00212', 'C04137', 'C01163', 'C05775'}
{'C14794', 'C20775', 'C06126', 'C06671', 'C04780', 'C04896', 'C02043', 'C11457', 'C00726', 'C16164', 'C03344', 'C00513', 'C05809', 'C20776', 'C00423', 'C02835', 'C05271', 'C00032', 'C00828', 'C06451', 'C02165', 'C21228', 'C16564', 'C06687', 'C03758', 'C03410', 'C14809', 'C04280', 'C05690', 'C21382', 'C07479'}
{'C11057', 'C11039', 'C15563', 'C00864', 'C04575', 'C02470', 'C16645', 'C05200', 'C04797', 'C21075', 'C00166', 'C15987', 'C11512', 'C03087', 'C00236', 'C20582', 'C14776', 'C01163', 'C11418', 'C16480', 'C20422', 'C00750', 'C00975', 'C06006', 'C01563', 'C16402', 'C01236', 'C04895', 'D02224', 'C02798', 'C00152'}
{'C05805', 'C06042', 'C06456', 'C20376', 'C20898', 'C05580', 'C04308', 'C00235', 'C19156', 'C03035', 'C03511', 'C02949', 'C04573', 'C03595', 'C11536', 'C02946', 'C03564', 'C07103', 'C09811', 'C00383', 'C21070', 'C00065', 'C04226', 'C17558', 'C00015', 'C01050', 'C19929', 'C15979', 'C01983', 'C11249', 'C06613'}
{'C00262', 'C18237', 'C21248', 'C16393', 'C00068', 'C05727', 'C17542', 'C00545', 'C21199', 'C04043', 'C00415', 'C14602', 'C06729', 'C09822', 'C06407', 'C00135', 'C12462', 'C00337', 'C01107', 'C21363', 'C00984', 'C16239', 'C20949', 'C05260', 'C21244', 'C14779', 'C14809', 'C18172', 'C17561', 'C00272', 'C16597'}
{'C11540', 'C01877', 'C11755', 'C14769', 'C06679', 'C20395', 'C11453', 'C05677', 'C00556', 'C05824', 'C01989', 'C03771', 'C06041', 'C16464', 'C05570', 'C20572', 'C00122', 'C00486', 'C21250', 'C04392', 'C19692', 'C16598', 'C00805', 'C04618', 'C16242', 'C04233', 'C02199', 'C00937', 'C04729', 'C00979', 'C14084'}
{'C21433', 'C01187', 'C06228', 'C00221', 'C02320', 'C00310', 'C18311', 'C06202', 'C05895', 'C20680', 'C03561', 'C02236', 'C00533', 'C06989', 'C01801', 'C19888', 'C02430', 'C20333', 'C02888', 'C00503', 'C17369', 'C16179', 'C02631', 'C16488', 'C20819', 'C00117', 'C11436', 'C01931', 'C18305', 'C04677', 'C17368'}
{'C08060', 'C04751', 'C05676', 'C00141', 'C14083', 'C05810', 'C00552', 'C11850', 'C00469', 'C05080', 'C20248', 'C13508', 'C06612', 'C20589', 'C01177', 'C00014', 'C03087', 'C15524', 'C16664', 'C20582', 'C06457', 'C00669', 'C04706', 'C06752', 'C02839', 'C11421', 'C00268', 'C04044', 'C19891', 'C00575', 'C16236'}



In [60]:

    
print(len(null_model['path:eco00010'] & null_model['path:eco00030']))
print(len(pathway_2_compounds['path:eco00010'] & pathway_2_compounds['path:eco00030']))



In [80]:

    
null_jaccard = []
kegg_jaccard = []
for i in combinations(pathway_2_compounds, 2):
    null_jaccard.append(len(null_model[i[0]] & null_model[i[1]]) / len(null_model[i[0]] | null_model[i[1]]))
    kegg_jaccard.append(len(pathway_2_compounds[i[0]] & pathway_2_compounds[i[1]]) / len(pathway_2_compounds[i[0]] | pathway_2_compounds[i[1]]))



In [79]:

    
np.var(null_jaccard)









    Out[79]:





0.00035449817750320839



In [81]:

    
bins = np.arange(0, 0.5, 0.01)
hist, bin_edges = np.histogram(null_jaccard, bins=bins)
plt.clf()
plt.bar(bins[:-1],hist,width=np.diff(bins))
plt.show()



In [82]:

    
bins = np.arange(0, 0.5, 0.01)
hist, bin_edges = np.histogram(kegg_jaccard, bins=bins)
plt.clf()
plt.bar(bins[:-1],hist,width=np.diff(bins))
plt.show()

Generating results using null models



In [24]:

    
sig_count = 0
for ko_number in range(0, len(all_knockouts)):
    nullmod_pval, nullmod_pathway_id, nullmod_sizes = oras_ko(ko_number, ecoli_pathways, zamboni_bg, null_model, 
                        pos_annot, pos_mod, neg_annot, neg_mod, 2, False, False, 0, [])
    for i in nullmod_pval:
        if i < 0.05:
            sig_count += 1
print(sig_count)

Analysis & Stuffs

3717 Knockouts in total (0 to 3716)



In [58]:

    
'C00186' in zamboni_bg









    Out[58]:





False



In [57]:

    
build_metabo_input(2281, pos_annot, pos_mod, neg_annot, neg_mod, 5)









    Out[57]:





{'C00042',
 'C00148',
 'C00222',
 'C00244',
 'C00350',
 'C00417',
 'C00438',
 'C00666',
 'C01013',
 'C01216',
 'C01419',
 'C02637',
 'C04236',
 'C05512',
 'C11514'}



In [54]:

    
fh = open('/home/zxu/Documents/mscbioinfo/Data Project/Zamboni/modzscore_neg_annotated.tsv', 'r')
lines = fh.readlines()
ko_scores = []
for line in lines:
    #print(len(line.rstrip().split('\t')))
    score = line.rstrip().split('\t')[2281]
    ko_scores.append(score)
plt.scatter(np.arange(len(ko_scores)), ko_scores)
plt.show()



In [44]:

    
all_knockouts.index('ybjO')









    Out[44]:





2281



In [17]:

    
build_metabo_input(1541, pos_annot, pos_mod, neg_annot, neg_mod, 10)









    Out[17]:





set()



In [8]:

    
for ko_number in range(0, len(all_knockouts)):
    fh = open('./Backgrounds/KO' + str(ko_number) + '.tsv', 'r')
    nobg_pval = []
    nobg_pathways = []
    nobg_size = []
    zamboni_pval = []
    lines = fh.readlines()
    for line in lines:
        fields = line.rstrip().split('\t')
        nobg_pval.append(float(fields[1]))
        nobg_pathways.append(fields[0])
        nobg_size.append(fields[2])
        zamboni_pval.append(float(fields[3]))
    fh.close()
    if len(nobg_pval) == 0:
        continue
    elif max(nobg_pval) < 1.30 and max(zamboni_pval) < 1.30:
        continue
    print('<OPTION VALUE="{}">{}</option>'.format(ko_number, all_knockouts[ko_number]), end='')









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-b9b5ddea3bf8> in <module>()
----> 1 for ko_number in range(0, len(all_knockouts)):
      2     fh = open('./Backgrounds/KO' + str(ko_number) + '.tsv', 'r')
      3     nobg_pval = []
      4     nobg_pathways = []
      5     nobg_size = []

NameError: name 'all_knockouts' is not defined

Preparation for analysis



In [11]:

    
# Stating the annotation files & modzscore files
pos_annot = '/home/zxu/Documents/mscbioinfo/Data Project/Zamboni/annotation_pos.txt'
pos_mod = '/home/zxu/Documents/mscbioinfo/Data Project/Zamboni/modzscore_pos_annotated.tsv'
neg_annot = '/home/zxu/Documents/mscbioinfo/Data Project/Zamboni/annotation_neg.txt'
neg_mod = '/home/zxu/Documents/mscbioinfo/Data Project/Zamboni/modzscore_neg_annotated.tsv'
# Initialise KEGG instance
k = KEGG()
k.organism = "eco"
# Initialise both backgrounds
test_compounds = get_all_compounds('eco')
zamboni_bg = loadTsv('/home/zxu/Documents/mscbioinfo/Data Project/Zamboni/annotation_all.txt')
zamboni_bg = zamboni_bg & test_compounds
# build {pathway: compounds} dictionary for E.coli
ecoli_pathways = k.pathwayIds
pathway_2_compounds = dict()
for pathway in ecoli_pathways:
    parsed_output = k.parse(k.get(pathway)) # parsed_ouput has lots of information about the pathway
    try:
        compounds = set(parsed_output['COMPOUND'].keys())
        pathway_2_compounds[pathway] = compounds
    except KeyError: # Some pathways do not have defined compounds
        #name = parsed_output['NAME']
        #print(pathway, name)
        pass



In [12]:

    
# Translate KO number to gene name
sample_id_all = '/home/zxu/Documents/mscbioinfo/Data Project/Zamboni/sample_id_modzscore.tsv'
all_knockouts = []# End product
fh_sample_id_all = open(sample_id_all, 'r')
for knockout in fh_sample_id_all:
    all_knockouts.append(knockout.rstrip())
fh_sample_id_all.close()
#print(all_knockouts)

Analysis 0: Translate compound names (Raboniwitz)

Manual KEGG translation



In [30]:

    
fh = open('rabinowitz.txt', 'r')
rabinowitz_lines = fh.readlines()
for line in rabinowitz_lines[80:85]:
    compound = line.rstrip()
    if compound.endswith(')'):
        compound = compound.split(' (')[0]
    print(compound)
    print(k.find('compound', compound))
    print('=' * 20)
fh.close()









    



NADP  +
cpd:C00005	NADPH; TPNH; Reduced nicotinamide adenine dinucleotide phosphate
cpd:C00006	NADP+; NADP; Nicotinamide adenine dinucleotide phosphate; beta-Nicotinamide adenine dinucleotide phosphate; TPN; Triphosphopyridine nucleotide; beta-NADP+
cpd:C03024	[Reduced NADPH---hemoprotein reductase]; Reduced flavoprotein
cpd:C03161	[Oxidized NADPH---hemoprotein reductase]; Oxidized flavoprotein
cpd:C04125	[Isocitrate dehydrogenase (NADP+)]
cpd:C04564	[Isocitrate dehydrogenase (NADP+)] phosphate
cpd:C04632	[Hydroxymethylglutaryl-CoA reductase (NADPH)]
cpd:C04749	[[Hydroxymethylglutaryl-CoA reductase (NADPH)]kinase]
cpd:C04768	[Hydroxymethylglutaryl-CoA reductase (NADPH)] phosphate
cpd:C04816	Dephospho-[[hydroxymethylglutaryl-CoA reductase (NADPH)]kinase]
cpd:C04899	(6S)-6beta-Hydroxy-1,4,5,6-tetrahydronicotinamide-adenine dinucleotide phosphate; (S)-NADPH-hydrate; (S)-NADPHX; (6S)-6beta-Hydroxy-1,4,5,6-tetrahydronicotinamide-adenine dinucleotide 2'-phosphate
cpd:C20483	(6R)-6beta-Hydroxy-1,4,5,6-tetrahydronicotinamide-adenine dinucleotide phosphate; (R)-NADPH-hydrate; (R)-NADPHX
cpd:C20745	alpha-NADPH

====================
Guanosine
cpd:C00035	GDP; Guanosine 5'-diphosphate; Guanosine diphosphate
cpd:C00044	GTP; Guanosine 5'-triphosphate
cpd:C00144	GMP; Guanosine 5'-phosphate; Guanosine monophosphate; Guanosine 5'-monophosphate; Guanylic acid
cpd:C00286	dGTP; 2'-Deoxyguanosine 5'-triphosphate; Deoxyguanosine 5'-triphosphate; Deoxyguanosine triphosphate
cpd:C00330	Deoxyguanosine; 2'-Deoxyguanosine
cpd:C00361	dGDP; 2'-Deoxyguanosine 5'-diphosphate
cpd:C00362	dGMP; 2'-Deoxyguanosine 5'-monophosphate; 2'-Deoxyguanosine 5'-phosphate; Deoxyguanylic acid; Deoxyguanosine monophosphate
cpd:C00387	Guanosine
cpd:C00942	3',5'-Cyclic GMP; Guanosine 3',5'-cyclic monophosphate; Guanosine 3',5'-cyclic phosphate; Cyclic GMP; cGMP
cpd:C01228	Guanosine 3',5'-bis(diphosphate); Guanosine 3'-diphosphate 5'-diphosphate; Guanosine 5'-diphosphate,3'-diphosphate
cpd:C02031	G(5')pppPur-mRNA; 5'-(5'-Triphosphoguanosine)-[mRNA]; Guanosine 5'-triphospho-mRNA
cpd:C02339	m7G(5')pppR-mRNA; m7G(5')pppPur-mRNA; mRNA containing an N7-methylguanine cap; 5'-(N7-Methyl 5'-triphosphoguanosine)-[mRNA]; 5'-(N7-Methylguanosine 5'-triphospho)-[mRNA]; 5'-(N7-Methyl 5'-triphosphoguanosine)-(purine-ribonucleotide)-[mRNA]
cpd:C03998	7-Methylguanosine 5'-phosphate; N7-Methyl-GMP
cpd:C04494	Guanosine 3'-diphosphate 5'-triphosphate; Guanosine 5'-triphosphate,3'-diphosphate
cpd:C04545	tRNA containing 2'-O-methylguanosine
cpd:C04696	7-Methylguanosine 5'-triphospho-5'-oligonucleotide; m7GpppX
cpd:C04802	m7G(5')pppRm-mRNA; 5'-(N7-Methyl 5'-triphosphoguanosine)-(2'-O-methyl-purine-ribonucleotide)-[mRNA]; 5'-(N7-Methyl 5'-triphosphoguanosine)-(2'-O-methyl-purine-ribonucleotide)-(ribonucleotide)-[mRNA]
cpd:C05031	O6-Methyl-2'-deoxyguanosine
cpd:C06193	Guanosine 3'-phosphate; 3'-GMP; 3'-Guanylic acid; Guo-3'-P; Gp
cpd:C08432	Isoguanosine; Crotonoside
cpd:C16619	6-Thioguanosine monophosphate
cpd:C16620	6-Methylthioguanosine monophosphate
cpd:C18076	5'-Phosphoguanylyl(3'->5')guanosine
cpd:C19155	(2S)-Lactyl-2-diphospho-5'-guanosine
cpd:C19967	8-Oxo-dGTP; 8-Oxodeoxyguanosine triphosphate; 8-Oxo-7,8-dihydro-2'-deoxyguanosine 5'-triphosphate
cpd:C19968	8-Oxo-dGMP; 2'-Deoxy-7,8-dihydro-8-oxo-5'-guanylate; 8-Hydroxydeoxyguanosine 5'-monophosphate
cpd:C20183	N7-Methylguanosine 5'-diphosphate; N7-Methyl-GDP
cpd:C20674	N7-Methylguanosine
cpd:C20862	5'-(N7-Methyl 5'-triphosphoguanosine)-(2'-O-methyl-purine-ribonucleotide)-(2'-O-methyl-ribonucleotide)-[mRNA]
cpd:C21310	(8S)-3',8-Cyclo-7,8-dihydroguanosine 5'-triphosphate

====================
Adenine
cpd:C00003	NAD+; NAD; Nicotinamide adenine dinucleotide; DPN; Diphosphopyridine nucleotide; Nadide; beta-NAD+
cpd:C00004	NADH; DPNH; Reduced nicotinamide adenine dinucleotide
cpd:C00005	NADPH; TPNH; Reduced nicotinamide adenine dinucleotide phosphate
cpd:C00006	NADP+; NADP; Nicotinamide adenine dinucleotide phosphate; beta-Nicotinamide adenine dinucleotide phosphate; TPN; Triphosphopyridine nucleotide; beta-NADP+
cpd:C00016	FAD; Flavin adenine dinucleotide
cpd:C00147	Adenine; 6-Aminopurine
cpd:C00371	Zeatin; N6-(4-Hydroxyisopentenyl)adenine; (E)-2-Methyl-4-(1H-purin-6-ylamino)but-2-en-1-ol; trans-Zeatin
cpd:C00821	DNA adenine; Adenine in DNA
cpd:C00913	3-Methyladenine
cpd:C01804	Discadenine; 3-(3-Amino-3-carboxypropyl)-N6-(Delta2-isopentenyl)-adenine
cpd:C02029	Dihydrozeatin; 2-Methyl-4-(1H-purin-6-ylamino)butan-1-ol; N6-(4-Hydroxyisopentanyl)adenine
cpd:C02216	1-Methyladenine
cpd:C02241	7-Methyladenine
cpd:C03391	DNA 6-methylaminopurine; N6-Methyladenine in DNA
cpd:C04083	N6-(Delta2-Isopentenyl)-adenine; N6-(3-Methylbut-2-enyl)adenine; 6-(gamma,gamma-Dimethylallylamino)purine; N6-Dimethylallyladenine
cpd:C04154	rRNA containing N6-methyladenine; rRNA(N6-methyladenine); N6-Methyladenine in rRNA
cpd:C04156	tRNA containing N1-methyladenine
cpd:C04159	tRNA containing N6-methyladenine
cpd:C04378	7-alpha-D-Ribosyladenine 5'-phosphate
cpd:C04432	tRNA containing 6-isopentenyladenosine; N6-Dimethylallyladenine in tRNA
cpd:C04856	(6S)-6beta-Hydroxy-1,4,5,6-tetrahydronicotinamide-adenine dinucleotide; (S)-NADH-hydrate; (S)-NADHX; (6S)-6-Hydroxy-1,4,5,6-tetrahydronicotinamide-adenine dinucleotide; 6beta-Hydroxy-1,4,5,6-tetrahydronicotinamide-adenine dinucleotide
cpd:C04899	(6S)-6beta-Hydroxy-1,4,5,6-tetrahydronicotinamide-adenine dinucleotide phosphate; (S)-NADPH-hydrate; (S)-NADPHX; (6S)-6beta-Hydroxy-1,4,5,6-tetrahydronicotinamide-adenine dinucleotide 2'-phosphate
cpd:C05026	N3-Metyladenine
cpd:C07195	Vidarabine; 9-beta-D-Arabino furanosyl adenine monohydrate
cpd:C08272	Kinetin; 6-Furfuryladenine
cpd:C08434	6-Methylaminopurine; N6-Methyladenine
cpd:C10831	Zygadenine
cpd:C11263	6-Benzylaminopurine; N-Benzyladenine; N6-Benzyladenine
cpd:C11277	Adefovir; PMEA; 9-(2-Phosphonylmethoxyethyl)adenine
cpd:C11501	9-Riburonosyladenine
cpd:C11691	Stemmadenine
cpd:C13051	Nicotinic acid adenine dinucleotide phosphate; Nicotinate-adenine dinucleotide phosphate; NAADP
cpd:C17324	tRNA adenine
cpd:C20459	tRNA(Ala) adenine; Adenine in tRNA(Ala)
cpd:C20482	(6R)-6beta-Hydroxy-1,4,5,6-tetrahydronicotinamide-adenine dinucleotide; (R)-NADH-hydrate; (R)-NADHX
cpd:C20483	(6R)-6beta-Hydroxy-1,4,5,6-tetrahydronicotinamide-adenine dinucleotide phosphate; (R)-NADPH-hydrate; (R)-NADPHX
cpd:C20647	N1-Methyladenine in rRNA
cpd:C20648	Adenine in rRNA
cpd:C20751	N6-L-Threonylcarbamoyladenine in tRNA; t6A in tRNA
cpd:C20752	2-Methylthio-N6-L-threonylcarbamoyladenine in tRNA; ms2t6A in tRNA
cpd:C20753	2-Methylthio-N6-dimethylallyladenine in tRNA; 2-Methylthio-N6-isopentenyladenine in tRNA; ms2i6A in tRNA
cpd:C20754	2-Thio-N6-L-threonylcarbamoyladenine in tRNA
cpd:C20755	2-Thio-N6-dimethylallyladenine in tRNA
cpd:C20796	N6-Dimethyladenine in rRNA; rRNA containing N6-dimethyladenine

====================
Deoxyguanosine
cpd:C00286	dGTP; 2'-Deoxyguanosine 5'-triphosphate; Deoxyguanosine 5'-triphosphate; Deoxyguanosine triphosphate
cpd:C00330	Deoxyguanosine; 2'-Deoxyguanosine
cpd:C00361	dGDP; 2'-Deoxyguanosine 5'-diphosphate
cpd:C00362	dGMP; 2'-Deoxyguanosine 5'-monophosphate; 2'-Deoxyguanosine 5'-phosphate; Deoxyguanylic acid; Deoxyguanosine monophosphate
cpd:C05031	O6-Methyl-2'-deoxyguanosine
cpd:C19967	8-Oxo-dGTP; 8-Oxodeoxyguanosine triphosphate; 8-Oxo-7,8-dihydro-2'-deoxyguanosine 5'-triphosphate
cpd:C19968	8-Oxo-dGMP; 2'-Deoxy-7,8-dihydro-8-oxo-5'-guanylate; 8-Hydroxydeoxyguanosine 5'-monophosphate

====================
Adenosine
cpd:C00002	ATP; Adenosine 5'-triphosphate
cpd:C00008	ADP; Adenosine 5'-diphosphate
cpd:C00020	AMP; Adenosine 5'-monophosphate; Adenylic acid; Adenylate; 5'-AMP; 5'-Adenylic acid; 5'-Adenosine monophosphate; Adenosine 5'-phosphate
cpd:C00053	3'-Phosphoadenylyl sulfate; 3'-Phosphoadenosine 5'-phosphosulfate; 3'-Phospho-5'-adenylyl sulfate; PAPS
cpd:C00054	Adenosine 3',5'-bisphosphate; PAP; 3'-Phosphoadenylate; Phosphoadenosine phosphate
cpd:C00131	dATP; 2'-Deoxyadenosine 5'-triphosphate; Deoxyadenosine 5'-triphosphate; Deoxyadenosine triphosphate
cpd:C00170	5'-Methylthioadenosine; Methylthioadenosine; S-Methyl-5'-thioadenosine; 5-Methylthioadenosine; 5'-Deoxy-5'-(methylthio)adenosine; Thiomethyladenosine; MTA
cpd:C00206	dADP; 2'-Deoxyadenosine 5'-diphosphate
cpd:C00212	Adenosine
cpd:C00224	Adenylyl sulfate; Adenosine 5'-phosphosulfate; APS; 5'-Adenylyl sulfate
cpd:C00301	ADP-ribose; ADP-D-ribose; Adenosine diphosphate ribose
cpd:C00360	dAMP; 2'-Deoxyadenosine 5'-phosphate; 2'-Deoxyadenosine 5'-monophosphate; Deoxyadenylic acid; Deoxyadenosine monophosphate
cpd:C00498	ADP-glucose; ADP-alpha-D-glucose; Adenosine diphosphoglucose
cpd:C00559	Deoxyadenosine; 2'-Deoxyadenosine
cpd:C00575	3',5'-Cyclic AMP; Cyclic adenylic acid; Cyclic AMP; Adenosine 3',5'-phosphate; Adenosine 3',5'-cyclic phosphate; cAMP
cpd:C00939	2-Aminoadenosine
cpd:C00946	Adenosine 2'-phosphate; 2'-Adenylic acid; Adenosine-2'-monophosphate; AMP 2'-phosphate
cpd:C01367	3'-AMP; 3'-Adenylic acid; 3'-Adenosine monophosphate; Adenosine-3'-monophosphate; Adenosine 3'-phosphate; AMP 3'-phosphate
cpd:C01972	m7G(5')pppAm-mRNA; mRNA containing a 2'-O-methyladenosine cap
cpd:C02494	1-Methyladenosine
cpd:C03218	2-Methylaminoadenosine
cpd:C03361	5'-Acylphosphoadenosine
cpd:C03416	N6,N6-Dimethyladenosine
cpd:C03483	Adenosine tetraphosphate; Adenosine 5'-tetraphosphate
cpd:C03795	N6-Methyl-2'-deoxyadenosine
cpd:C03850	Adenosine 2',5'-bisphosphate
cpd:C03851	Adenosine 5'-phosphoramidate
cpd:C04432	tRNA containing 6-isopentenyladenosine; N6-Dimethylallyladenine in tRNA
cpd:C04713	N6-(delta2-Isopentenyl)-adenosine 5'-monophosphate; N6-(Dimethylallyl)adenosine 5'-phosphate; Isopentenyl-AMP
cpd:C04779	rRNA containing a single residue of 2'-O-methyladenosine
cpd:C04833	m7G(5')pppm6Am-mRNA; mRNA containing an N6,2'-O-dimethyladenosine cap
cpd:C05198	5'-Deoxyadenosine
cpd:C05686	Adenylylselenate; Adenosine-5'-phosphoselenate
cpd:C05696	3'-Phosphoadenylylselenate; 3'-Phosphoadenosine-5'-phosphoselenate
cpd:C05983	Propionyladenylate; Propionyl-adenosine monophosphate
cpd:C05993	Acetyl adenylate; 5'-Acetylphosphoadenosine
cpd:C06197	P1,P3-Bis(5'-adenosyl) triphosphate; ApppA; Adenosine 5'-triphosphate 5'-adenosine
cpd:C06433	5'-Benzoylphosphoadenosine
cpd:C06510	Adenosine-GDP-cobinamide; Adenosylcobinamide-GDP
cpd:C08431	Cordycepin; 3'-Deoxyadenosine
cpd:C11265	8-Azaadenosine
cpd:C11266	8-Bromoadenosine
cpd:C11500	5'-Dehydroadenosine
cpd:C13742	Adenosine 5-O-(3-thiotriphophate); ATP-gamma-S; Adenosine 5'-(gamma-thio)triphosphate
cpd:C13743	Adenosine 5-O-(3-thiodiphophate); ADP-gamma-S
cpd:C16424	Isopentenyladenosine-5'-triphosphate; Isopentenyl-ATP; N6-(Dimethylallyl)adenosine 5'-triphosphate
cpd:C16426	Isopentenyladenosine-5'-diphosphate; Isopentenyl-ADP; N6-(Dimethylallyl)adenosine 5'-diphosphate
cpd:C16427	Isopentenyl adenosine
cpd:C16467	cis-Prenyl-tRNA; tRNA containing N6-(cis-hydroxy)isopentenyladenosine
cpd:C18344	Adenosine 5'-phosphate disodium
cpd:C19766	5'-Deoxy-5'-fluoroadenosine
cpd:C19768	5-Deoxy-5-chloroadenosine
cpd:C19969	2-Hydroxy-dATP; 2-Hydroxy-2'-deoxyadenosine 5'-triphosphate
cpd:C20198	Adenosine 5'-pentaphosphate
cpd:C20784	ADP-5-ethyl-4-methylthiazole-2-carboxylate; Adenosine diphospho-5-beta-ethyl-4-methylthiazole-2-carboxylic acid

====================



In [10]:

    
met_conc = {}
with open('/home/zxu/Documents/mscbioinfo/Bioinfo Project/rabinowitz_conc.csv', 'r') as fh:
    lines = fh.readlines()
    for line in lines:
        fields = line.rstrip().split('\t')
        # Converting the concentration to actual numbers
        conc = fields[0]
        value = conc.split(' ')[0]
        power = conc.split('− ')[1]
        number = float(value) * 10 ** (-int(power))
        metabolite = fields[1]
        met_conc[metabolite] = number
# Might use if we want to cut off based on concentrations
rab_met = sorted(met_conc, key=met_conc.get, reverse=True)

Generating the small background results



In [11]:

    
for ko_number in range(0, len(all_knockouts)):
    rabbg_pval, rabbg_pathway_id, rabbg_sizes = oras_ko(ko_number, ecoli_pathways, zamboni_bg & set(rab_met), pathway_2_compounds, 
                        pos_annot, pos_mod, neg_annot, neg_mod, 2, True, False, 0, [])
    with open('./rabinowitz/full/KO' + str(ko_number) + '.tsv', 'w') as fh:
        for i in range(0, len(rabbg_pval)):
            fh.write(rabbg_pathway_id[i][5:] + '\t' + str(rabbg_pval[i]) + '\n')

Comparing the small background results with the original results

p-value distribution
number of significant hits



In [13]:

    
zamboni_pval = []
rabinowitz_pval = []
for ko_number in range(0, len(all_knockouts)):
    ora_results = {}
    
    rabinowitz_results = './rabinowitz/full/KO' + str(ko_number) + '.tsv'
    zamboni_results = './allresult/KO' + str(ko_number) + '.tsv'
    with open(rabinowitz_results, 'r') as rabinowitz_fh:
        for line in rabinowitz_fh.readlines():
            fields = line.rstrip().split('\t')
            pathname = fields[0]
            pathpval = float(fields[1])
            ora_results[pathname] = pathpval
    with open(zamboni_results, 'r') as zamboni_fh:
        for line in zamboni_fh.readlines():
            fields = line.rstrip().split('\t')
            pathname = fields[0]
            pathpval = float(fields[1])
            try:
                rabinowitz_pval.append(ora_results[pathname])
                zamboni_pval.append(pathpval)
            except KeyError:
                pass
print(len(zamboni_pval), len(rabinowitz_pval))









    



86629 86629



In [16]:

    
zamboni_pval = list(map(np.log10, zamboni_pval))
zamboni_pval = list(map(np.negative, zamboni_pval))

rabinowitz_pval = list(map(np.log10, rabinowitz_pval))
rabinowitz_pval = list(map(np.negative, rabinowitz_pval))



In [27]:

    
xedges = np.arange(0, 2, 0.05)
yedges = np.arange(0, 2, 0.05)
heatmap, xedges, yedges = np.histogram2d(zamboni_pval, rabinowitz_pval, bins=(xedges, yedges))
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]

plt.clf()
plt.plot([0,10], [0,10], color="Black", label='y=x')
plt.imshow(heatmap.T, extent=extent, origin='lower', norm=colors.LogNorm(), aspect='auto')
plt.set_cmap('rainbow')
plt.colorbar(orientation="horizontal", pad=0.20)
plt.ylabel('Rabinowitz Background')
plt.xlabel('Zamboni Background')
plt.title('-log(P) in two backgrounds')
plt.legend(bbox_to_anchor=(1, 1.22))
plt.savefig('minuslog_zamboni_rabinowitz', transparent=False)

#plt.scatter(nobg, zamboni)
#plt.plot([0,10], [0,10])
#plt.xlim([0, 20])
#plt.ylim([0, 10])
#plt.show()

Analysis 1: Background metabolites

Verdict:

1) A more specified background tends to make p-values less significant

2) Some KOs survived the multiple testing correction



In [32]:

    
path_2_pathname = {}
for path in ecoli_pathways:
    pathname = path[5:] 
    path_2_pathname[pathname] = k.parse(k.get(pathname))['NAME'][0][:-31]



In [33]:

    
path_2_pathname









    Out[33]:





{'eco00010': 'Glycolysis / Gluconeogenesis',
 'eco00020': 'Citrate cycle (TCA cycle)',
 'eco00030': 'Pentose phosphate pathway',
 'eco00040': 'Pentose and glucuronate interconversions',
 'eco00051': 'Fructose and mannose metabolism',
 'eco00052': 'Galactose metabolism',
 'eco00053': 'Ascorbate and aldarate metabolism',
 'eco00061': 'Fatty acid biosynthesis',
 'eco00071': 'Fatty acid degradation',
 'eco00121': 'Secondary bile acid biosynthesis',
 'eco00130': 'Ubiquinone and other terpenoid-quinone biosynthesis',
 'eco00190': 'Oxidative phosphorylation',
 'eco00220': 'Arginine biosynthesis',
 'eco00230': 'Purine metabolism',
 'eco00240': 'Pyrimidine metabolism',
 'eco00250': 'Alanine, aspartate and glutamate metabolism',
 'eco00260': 'Glycine, serine and threonine metabolism',
 'eco00261': 'Monobactam biosynthesis',
 'eco00270': 'Cysteine and methionine metabolism',
 'eco00280': 'Valine, leucine and isoleucine degradation',
 'eco00281': 'Geraniol degradation',
 'eco00290': 'Valine, leucine and isoleucine biosynthesis',
 'eco00300': 'Lysine biosynthesis',
 'eco00310': 'Lysine degradation',
 'eco00330': 'Arginine and proline metabolism',
 'eco00332': 'Carbapenem biosynthesis',
 'eco00340': 'Histidine metabolism',
 'eco00350': 'Tyrosine metabolism',
 'eco00360': 'Phenylalanine metabolism',
 'eco00361': 'Chlorocyclohexane and chlorobenzene degradation',
 'eco00362': 'Benzoate degradation',
 'eco00364': 'Fluorobenzoate degradation',
 'eco00380': 'Tryptophan metabolism',
 'eco00400': 'Phenylalanine, tyrosine and tryptophan biosynthesis',
 'eco00401': 'Novobiocin biosynthesis',
 'eco00410': 'beta-Alanine metabolism',
 'eco00430': 'Taurine and hypotaurine metabolism',
 'eco00440': 'Phosphonate and phosphinate metabolism',
 'eco00450': 'Selenocompound metabolism',
 'eco00460': 'Cyanoamino acid metabolism',
 'eco00471': 'D-Glutamine and D-glutamate metabolism',
 'eco00473': 'D-Alanine metabolism',
 'eco00480': 'Glutathione metabolism',
 'eco00500': 'Starch and sucrose metabolism',
 'eco00511': 'Other glycan degradation',
 'eco00520': 'Amino sugar and nucleotide sugar metabolism',
 'eco00521': 'Streptomycin biosynthesis',
 'eco00523': 'Polyketide sugar unit biosynthesis',
 'eco00525': 'Acarbose and validamycin biosynthesis',
 'eco00540': 'Lipopolysaccharide biosynthesis',
 'eco00550': 'Peptidoglycan biosynthesis',
 'eco00561': 'Glycerolipid metabolism',
 'eco00562': 'Inositol phosphate metabolism',
 'eco00564': 'Glycerophospholipid metabolism',
 'eco00565': 'Ether lipid metabolism',
 'eco00590': 'Arachidonic acid metabolism',
 'eco00592': 'alpha-Linolenic acid metabolism',
 'eco00600': 'Sphingolipid metabolism',
 'eco00620': 'Pyruvate metabolism',
 'eco00621': 'Dioxin degradation',
 'eco00622': 'Xylene degradation',
 'eco00623': 'Toluene degradation',
 'eco00625': 'Chloroalkane and chloroalkene degradation',
 'eco00626': 'Naphthalene degradation',
 'eco00627': 'Aminobenzoate degradation',
 'eco00630': 'Glyoxylate and dicarboxylate metabolism',
 'eco00633': 'Nitrotoluene degradation',
 'eco00640': 'Propanoate metabolism',
 'eco00650': 'Butanoate metabolism',
 'eco00660': 'C5-Branched dibasic acid metabolism',
 'eco00670': 'One carbon pool by folate',
 'eco00680': 'Methane metabolism',
 'eco00730': 'Thiamine metabolism',
 'eco00740': 'Riboflavin metabolism',
 'eco00750': 'Vitamin B6 metabolism',
 'eco00760': 'Nicotinate and nicotinamide metabolism',
 'eco00770': 'Pantothenate and CoA biosynthesis',
 'eco00780': 'Biotin metabolism',
 'eco00785': 'Lipoic acid metabolism',
 'eco00790': 'Folate biosynthesis',
 'eco00860': 'Porphyrin and chlorophyll metabolism',
 'eco00900': 'Terpenoid backbone biosynthesis',
 'eco00903': 'Limonene and pinene degradation',
 'eco00910': 'Nitrogen metabolism',
 'eco00920': 'Sulfur metabolism',
 'eco00930': 'Caprolactam degradation',
 'eco00970': 'Aminoacyl-tRNA biosynthesis',
 'eco01040': 'Biosynthesis of unsaturated fatty acids',
 'eco01053': 'Biosynthesis of siderophore group nonribosomal peptides',
 'eco01100': 'Metabolic pathways',
 'eco01110': 'Biosynthesis of secondary metabolites',
 'eco01120': 'Microbial metabolism in diverse environments',
 'eco01130': 'Biosynthesis of antibiotics',
 'eco01200': 'Carbon metabolism',
 'eco01210': '2-Oxocarboxylic acid metabolism',
 'eco01212': 'Fatty acid metabolism',
 'eco01220': 'Degradation of aromatic compounds',
 'eco01230': 'Biosynthesis of amino acids',
 'eco01501': 'beta-Lactam resistance',
 'eco01502': 'Vancomycin resistance',
 'eco01503': 'Cationic antimicrobial peptide (CAMP) resistance',
 'eco02010': 'ABC transporters',
 'eco02020': 'Two-component system',
 'eco02024': 'Quorum sensing',
 'eco02026': 'Biofilm formation - Escherichia coli',
 'eco02030': 'Bacterial chemotaxis',
 'eco02040': 'Flagellar assembly',
 'eco02060': 'Phosphotransferase system (PTS)',
 'eco03010': 'Ribosome',
 'eco03018': 'RNA degradation',
 'eco03020': 'RNA polymerase',
 'eco03030': 'DNA replication',
 'eco03060': 'Protein export',
 'eco03070': 'Bacterial secretion system',
 'eco03410': 'Base excision repair',
 'eco03420': 'Nucleotide excision repair',
 'eco03430': 'Mismatch repair',
 'eco03440': 'Homologous recombination',
 'eco04122': 'Sulfur relay system'}



In [40]:

    
out_fh = open('datajs2.js', 'w')
for ko_number in range(0, 3717):
    fh = open('./Backgrounds/KO' + str(ko_number) + '.tsv', 'r')
    nobg_pval = []
    nobg_pathways = []
    nobg_size = []
    zamboni_pval = []
    lines = fh.readlines()
    for line in lines:
        fields = line.rstrip().split('\t')
        nobg_pval.append(float(fields[1]))
        nobg_pathways.append(fields[0])
        nobg_size.append(fields[2])
        zamboni_pval.append(float(fields[3]))
    fh.close()
    if len(nobg_pval) == 0:
        continue
    elif max(nobg_pval) < 1.30 and max(zamboni_pval) < 1.30:
        continue
    out_fh.write('else if(selVal == \"' + str(ko_number) + '\")')
    out_fh.write('{options.series = [{data: [')
    # Plotting
    for i in range(0, len(nobg_pval)):
        if nobg_pval[i] > 1.30 or zamboni_pval[i] > 1.30: 
            name = path_2_pathname[nobg_pathways[i]]
            out_fh.write('{ ' + 
                  'x: {}, y: {}, z: {}, name: "{}", country: "{}"'.format(nobg_pval[i], zamboni_pval[i], nobg_size[i], 
                                                                     nobg_pathways[i], name) + 
                  ' }')
            if i == len(nobg_pval)-1:
                pass
            else:
                out_fh.write(',')
    #name = path_2_pathname[nobg_pathways[-1]]
    #out_fh.write('{ ' + 
    #        'x: {}, y: {}, z: {}, name: "{}", country: "{}"'.format(nobg_pval[-1], zamboni_pval[-1], nobg_size[-1], 
    #                                                            nobg_pathways[-1], name) + 
    #        ' }')
    out_fh.write(']}], options.title = {text: \'OverRepresentation Analysis of ' + all_knockouts[ko_number] + '\'}}' + '\n')
out_fh.close()



In [10]:

    
# Background Analysis
for ko_number in range(2406, 3717):
    nobg_pval, nobg_pathway_id, nobg_sizes = oras_ko(ko_number, ecoli_pathways, test_compounds, pathway_2_compounds, 
                        pos_annot, pos_mod, neg_annot, neg_mod, 2, True, True, 0, [])
    zamboni_pval, zamboni_pathway_id, zamboni_sizes = oras_ko(ko_number, ecoli_pathways, zamboni_bg, pathway_2_compounds, 
                           pos_annot, pos_mod, neg_annot, neg_mod, 2, True, True, 0, [])
    result_file = './Backgrounds/KO' + str(ko_number) + '.tsv'
    fh = open(result_file, 'w')
    for i in range(0, len(nobg_pathway_id)):
        fh.write('{}\t{}\t{}\t{}\t{}\n'.format(nobg_pathway_id[i][5:], nobg_pval[i], nobg_sizes[i], zamboni_pval[i], zamboni_sizes[i]))
    fh.close()



In [54]:

    
print(len(fp), len(fn))



In [45]:

    
nobg = []
zamboni = []
fp = []
fn = []
for ko_number in range(0, 3717):
    result_file = './Backgrounds/KO' + str(ko_number) + '.tsv'
    fh = open(result_file, 'r')
    lines = fh.readlines()
    for line in lines:
        fields = line.rstrip().split('\t')
        nobg_pval = float(fields[1])
        zamboni_pval = float(fields[3])
        nobg.append(nobg_pval)
        zamboni.append(zamboni_pval)
        if (nobg_pval > zamboni_pval) and (nobg_pval > 1.301) and (zamboni_pval < 1.301): # fp
            fp.append(1)
        elif (nobg_pval < zamboni_pval) and (zamboni_pval > 1.301) and (nobg_pval < 1.301): # fn
            fn.append(1)
        else:
            pass



In [77]:

    
nobg









    Out[77]:





[0.5072858809203813,
 0.3774386378075645,
 0.381418534883772,
 0.5072858809203813,
 0.5072858809203813,
 0.9014852116153822,
 0.8988922681521581,
 0.8988922681521581,
 0.9014852116153822,
 0.40818774773571687,
 0.3774386378075645,
 0.3063292506489612,
 0.3063292506489612,
 0.30638690312663225,
 0.8988922681521581,
 0.4390750571431407,
 0.5072858809203813,
 0.3774386378075645,
 0.7858272135782487,
 0.40818774773571687,
 0.22197308996631102,
 0.7460481323295192,
 0.49550090480087816,
 0.40818774773571687,
 0.8988922681521581,
 0.4065648854345678,
 0.8988922681521581,
 0.8988922681521581,
 0.8988922681521581,
 0.5072858809203813,
 0.47132954370355484,
 0.17045727686776802,
 0.7460481323295192,
 0.7117394449018816,
 0.3774386378075645,
 0.7858272135782487,
 0.7215660597149661,
 0.5380626289967527,
 0.48640894983063,
 0.41472639813955814,
 0.5380626289967527,
 0.8125575538685357,
 0.9148060161369147,
 0.5380626289967527,
 0.7142527019941546,
 0.48441931510581904,
 0.5380626289967527,
 0.7142527019941546,
 0.5380626289967527,
 0.46731408344313397,
 0.5380626289967527,
 0.5380626289967527,
 0.48640894983063,
 0.41940120354771515,
 0.48441931510581904,
 0.48640894983063,
 0.5380626289967527,
 0.41940120354771515,
 0.5380626289967527,
 0.5380626289967527,
 0.5380626289967527,
 0.29008952420939543,
 0.48640894983063,
 0.5380626289967527,
 0.48640894983063,
 0.5380626289967527,
 0.48640894983063,
 0.5329246158745893,
 0.24170762026782142,
 0.1596741930453404,
 0.8441541970014669,
 0.1609193520984653,
 0.1596741930453404,
 0.612667677866188,
 0.7197704303874375,
 0.612667677866188,
 1.6315947973052094,
 2.162765889106478,
 0.19508275764153027,
 2.162765889106478,
 0.43455470730810986,
 0.7705346187829291,
 0.612667677866188,
 0.18261680030240215,
 0.19508275764153027,
 0.1609193520984653,
 0.48955117125122855,
 0.19508275764153027,
 0.22369232580302667,
 0.1609193520984653,
 0.3909328411777648,
 1.2184936933201205,
 0.17792336106282988,
 0.20867966539921526,
 0.1560738426486834,
 0.18261680030240215,
 0.612667677866188,
 0.7705346187829291,
 0.3909328411777648,
 0.8441541970014669,
 0.19508275764153027,
 0.15333627587529394,
 0.612667677866188,
 0.532239993163812,
 0.3909328411777648,
 0.19508275764153027,
 0.1596741930453404,
 0.7045903415628728,
 0.03116647271212407,
 0.1609193520984653,
 0.2437843456002905,
 0.19508275764153027,
 0.612667677866188,
 0.1596741930453404,
 0.612667677866188,
 0.612667677866188,
 0.2032445847384148,
 0.2032445847384148,
 0.18817720085418757,
 0.18817720085418757,
 0.3460621894037341,
 0.18817720085418757,
 0.2032445847384148,
 2.462921589034994,
 0.2032445847384148,
 0.6970747918753217,
 2.462921589034994,
 0.2032445847384148,
 0.5310943360217646,
 0.2032445847384148,
 0.42403569468822533,
 0.3460621894037341,
 1.0339822464334603,
 0.2032445847384148,
 0.3460621894037341,
 0.5993007680714365,
 0.1452150673368095,
 0.41349967058277765,
 0.2032445847384148,
 0.42403569468822533,
 0.5993007680714365,
 0.3460621894037341,
 0.2032445847384148,
 0.2032445847384148,
 0.2032445847384148,
 0.41349967058277765,
 0.19492099774844834,
 0.18817720085418757,
 0.3460621894037341,
 0.2032445847384148,
 0.10734287065541438,
 0.42403569468822533,
 0.2032445847384148,
 0.3460621894037341,
 0.19872232462860873,
 0.2032445847384148,
 0.3460621894037341,
 0.2032445847384148,
 0.18817720085418757,
 0.2032445847384148,
 0.2032445847384148,
 0.5993007680714365,
 1.1494946960359278,
 0.5308466574820679,
 0.6970747918753217,
 0.2823309232410706,
 0.18817720085418757,
 0.42403569468822533,
 0.2823309232410706,
 0.5116257381836116,
 0.5116257381836116,
 1.8182136839836567,
 0.5116257381836116,
 0.4264124366908126,
 0.5116257381836116,
 0.367803105775276,
 0.5116257381836116,
 0.4254916070217679,
 0.5116257381836116,
 1.1686295453110174,
 0.5116257381836116,
 0.5116257381836116,
 0.5116257381836116,
 0.5116257381836116,
 0.5116257381836116,
 1.2600730118890722,
 0.622648087724627,
 0.622648087724627,
 0.7713607999148007,
 1.3854448207504226,
 0.622648087724627,
 1.1378369063275466,
 1.2600730118890722,
 0.622648087724627,
 0.622648087724627,
 1.0202648212367764,
 0.622648087724627,
 0.47906045719423906,
 0.622648087724627,
 0.6133355899152522,
 0.622648087724627,
 0.3543053777513044,
 0.6865464123291906,
 0.3557028593823483,
 0.6491959853836232,
 0.4942310305411617,
 0.5928446051841394,
 0.630717549143556,
 0.630717549143556,
 0.5928446051841394,
 0.630717549143556,
 0.6977583126978274,
 0.6491959853836232,
 0.630717549143556,
 0.6491959853836232,
 1.107462848682914,
 3.116976490206026,
 0.6491959853836232,
 0.6491959853836232,
 0.630717549143556,
 0.6491959853836232,
 0.6977583126978274,
 0.36707077113552034,
 0.6823470868777012,
 0.13245117229696834,
 0.14875890093212896,
 0.20718021772541184,
 0.13245117229696834,
 1.2529843108094536,
 4.469667511233131,
 0.9471059373778449,
 0.13347977811429393,
 1.2529843108094536,
 0.13245117229696834,
 0.36707077113552034,
 0.13347977811429393,
 0.6823470868777012,
 0.13245117229696834,
 0.4672257311863841,
 0.13245117229696834,
 0.10150806906635862,
 0.3287016796429466,
 0.13347977811429393,
 0.13347977811429393,
 0.13245117229696834,
 0.38458239088921065,
 0.2927974447361445,
 0.13245117229696834,
 0.13245117229696834,
 0.13347977811429393,
 0.13347977811429393,
 1.5503525974016812,
 0.13245117229696834,
 1.2529843108094536,
 0.13347977811429393,
 0.7387232162212659,
 0.13245117229696834,
 0.13245117229696834,
 0.13245117229696834,
 0.36707077113552034,
 0.13347977811429393,
 0.4672257311863841,
 0.13347977811429393,
 1.0480978013448927,
 0.13347977811429393,
 0.14875890093212896,
 0.8095599998473098,
 0.13245117229696834,
 0.13347977811429393,
 1.2529843108094536,
 0.13245117229696834,
 0.5565782988188768,
 0.36707077113552034,
 0.1511572344369411,
 0.22448715328586946,
 0.5565782988188768,
 0.13245117229696834,
 0.13245117229696834,
 0.38458239088921065,
 0.14875890093212896,
 1.3691782801158994,
 0.3923516700058835,
 0.06960674020192532,
 0.1111835627737888,
 0.1111835627737888,
 0.34039181589376993,
 0.040655402070308724,
 0.7823490817545944,
 0.10736636981792444,
 1.576957820836507,
 0.6991413932560001,
 3.332762061413222,
 3.332762061413222,
 1.5113415300790047,
 1.1928085579168926,
 0.6991413932560001,
 0.5718978601506732,
 0.040655402070308724,
 0.6991413932560001,
 0.42141806849996616,
 0.1111835627737888,
 0.9003359573970244,
 0.24824526957220786,
 0.333747627078937,
 0.008222341166823108,
 0.3072584709608609,
 0.008222341166823108,
 0.19267446270051497,
 0.008222341166823108,
 0.7730919284955515,
 0.08251353010017688,
 0.4739906408162012,
 0.3453037140029714,
 0.1111835627737888,
 0.34039181589376993,
 0.2240750279696273,
 0.9177370790911027,
 0.2098033514962248,
 0.0023868739530166494,
 0.26550998274441984,
 0.34039181589376993,
 0.42141806849996616,
 0.34039181589376993,
 2.162090316736676,
 0.34039181589376993,
 0.48718243943423467,
 0.10736636981792444,
 0.2098033514962248,
 0.19267446270051497,
 0.040655402070308724,
 0.2155517158470073,
 1.2338289380150103,
 0.19267446270051497,
 0.8424514660224958,
 0.040655402070308724,
 0.25655032203078754,
 0.3923516700058835,
 0.10736636981792444,
 0.03010276216996437,
 0.5718978601506732,
 0.3453037140029714,
 0.19267446270051497,
 0.0023868739530166494,
 0.5718978601506732,
 0.3982414382759767,
 0.8424514660224958,
 2.162090316736676,
 0.03010276216996437,
 0.2155517158470073,
 3.332762061413222,
 0.1111835627737888,
 0.1111835627737888,
 0.3453037140029714,
 0.40109648556704247,
 0.24470357790011454,
 1.182259975629561,
 1.182259975629561,
 1.0252505077113923,
 1.182259975629561,
 1.182259975629561,
 1.182259975629561,
 1.182259975629561,
 2.1502412668353266,
 2.63914629265344,
 0.31247016006917505,
 0.31247016006917505,
 0.31247016006917505,
 0.2836428981489986,
 0.2836428981489986,
 0.596723705503255,
 3.061899296606223,
 2.0729663159617546,
 0.31303851226381346,
 0.31247016006917505,
 1.5497250634433701,
 0.26734303547036176,
 0.8081719590838885,
 0.31247016006917505,
 0.2836428981489986,
 0.21401537024983963,
 0.2836428981489986,
 0.31247016006917505,
 1.0975128968386307,
 0.2131619755617206,
 0.2131619755617206,
 0.6465757838263921,
 0.31247016006917505,
 0.6162077185880166,
 0.31247016006917505,
 0.27755478390742666,
 0.31247016006917505,
 0.31247016006917505,
 0.2836428981489986,
 0.2836428981489986,
 0.31247016006917505,
 0.8081719590838885,
 0.880988873397989,
 0.2836428981489986,
 0.31247016006917505,
 0.31247016006917505,
 0.31247016006917505,
 0.2836428981489986,
 0.31247016006917505,
 0.10569831800022782,
 0.2836428981489986,
 0.6642846161694717,
 0.45216344858099633,
 0.6642846161694717,
 0.8081719590838885,
 0.2836428981489986,
 0.1982042572941248,
 0.09471888128677164,
 0.06701307728098833,
 0.03383888184984419,
 0.5560928134661602,
 0.09001666696280951,
 0.7228286468990784,
 1.0010703341962102,
 1.1811036444676342,
 0.48323330234677897,
 2.5211376095305162,
 1.4188277419835642,
 2.132015630964354,
 0.3969769040349064,
 0.9078851254221374,
 0.05270962468192759,
 1.1811036444676342,
 0.18156192827571788,
 0.08899507491144401,
 0.41736317975563936,
 0.0718897266874484,
 0.09471888128677164,
 0.09001666696280951,
 1.0152157470933525,
 0.06701307728098833,
 0.013836730355754263,
 1.1811036444676342,
 0.0718897266874484,
 2.5211376095305162,
 0.7228286468990784,
 0.08899507491144401,
 0.5560928134661602,
 0.40631147150599045,
 0.05470901839651756,
 0.08899507491144401,
 0.08899507491144401,
 0.18156192827571788,
 0.18156192827571788,
 0.5560928134661602,
 1.4188277419835642,
 0.08899507491144401,
 0.5215315270091131,
 0.03132764182780743,
 0.16322919026466082,
 0.040685028345610094,
 0.05470901839651756,
 0.013836730355754263,
 0.08899507491144401,
 1.1811036444676342,
 0.3584945869351266,
 0.0718897266874484,
 0.5560928134661602,
 0.5215315270091131,
 0.7970269196961508,
 1.0010703341962102,
 0.43180465557816733,
 0.24651726097972784,
 0.24651726097972784,
 0.13467140847743075,
 0.013836730355754263,
 0.09984230897629791,
 0.5071728187498488,
 1.1230184990715035,
 0.03383888184984419,
 0.18156192827571788,
 3.0202725584067283,
 0.6864781125924405,
 0.08899507491144401,
 0.3584945869351266,
 0.1896811978964146,
 0.1896811978964146,
 0.7090732670381112,
 0.671845148032837,
 0.19380808745137018,
 0.21213826947832015,
 0.03697055220331597,
 0.28980515520731537,
 0.47356402614746784,
 0.14059104761209096,
 3.2261112594887327,
 0.5934944286755528,
 3.9473196486144637,
 4.760839535195143,
 2.031120589630637,
 0.45602849868815454,
 0.5880311832089262,
 0.15695803347225173,
 0.13957439939395924,
 0.5934944286755528,
 0.40288231000528985,
 0.07741919851500684,
 0.38439499119985493,
 0.0679244328952644,
 0.47356402614746784,
 0.21213826947832015,
 0.5880311832089262,
 0.07741919851500684,
 0.0548273813851649,
 1.0198524943523999,
 0.0679244328952644,
 0.0679244328952644,
 0.37666840634861076,
 0.03697055220331597,
 0.47356402614746784,
 1.0330246362395739,
 0.37666840634861076,
 0.38439499119985493,
 0.0548273813851649,
 0.07741919851500684,
 0.2399322323013462,
 0.07741919851500684,
 0.6386467570213155,
 0.47356402614746784,
 2.4577729227560665,
 0.07741919851500684,
 0.47356402614746784,
 0.0548273813851649,
 0.04645823926761958,
 0.05242117911098529,
 0.011959860181231172,
 0.7090732670381112,
 0.07741919851500684,
 1.5014509089963095,
 0.3523295266475097,
 0.4580179625786628,
 0.2696875519826414,
 0.47356402614746784,
 0.47356402614746784,
 0.671845148032837,
 0.47356402614746784,
 0.3910511372864479,
 0.07617131965635322,
 0.2696875519826414,
 0.14059104761209096,
 0.05197950157597569,
 0.04649921375329255,
 0.3910511372864479,
 2.4577729227560665,
 0.9107484327833045,
 1.2966368662616954,
 0.38446981138507785,
 0.07741919851500684,
 0.37666840634861076,
 0.8034203170594043,
 0.22608617324154598,
 0.40558565853902484,
 0.3207934473311579,
 0.5728183644666565,
 0.13011102851681133,
 0.05038794900808228,
 0.40558565853902484,
 0.06613808213066576,
 0.5637748946247938,
 0.0033591807015976183,
 0.6454701980838758,
 5.050833297295876,
 0.8614236825157608,
 1.0238366861228925,
 8.21931618560064,
 1.0709157633253932,
 1.4382561992379015,
 0.10081230588165309,
 0.6057542283285565,
 0.08593621528566357,
 1.3426913051017841,
 0.8614236825157608,
 0.14111274246120867,
 0.40558565853902484,
 0.03927114608929615,
 0.06613808213066576,
 0.0033591807015976183,
 0.6454701980838758,
 0.0033591807015976183,
 0.33434470016239654,
 0.03927114608929615,
 0.6454701980838758,
 0.2825080373466574,
 0.13011102851681133,
 0.40558565853902484,
 0.4925918155428649,
 0.7987989248100524,
 0.7802096213513314,
 0.11562195145949233,
 0.0033591807015976183,
 0.05740631763000055,
 0.0033591807015976183,
 0.1654000712573293,
 0.24599325648496106,
 0.8614236825157608,
 0.40558565853902484,
 2.4374386549131755,
 0.40558565853902484,
 0.0033591807015976183,
 0.017727988399098973,
 0.0033591807015976183,
 0.09797843570455646,
 0.0033591807015976183,
 0.24599325648496106,
 0.05038794900808228,
 1.1424346140012394,
 0.24599325648496106,
 0.3890039741539545,
 0.6454701980838758,
 0.6793690381555769,
 0.40558565853902484,
 1.0090131442359251,
 0.19256339649918827,
 0.294307860826974,
 0.4078575080024227,
 0.4078575080024227,
 0.24961637160372885,
 0.0033591807015976183,
 0.06720102496374401,
 0.33434470016239654,
 0.6454701980838758,
 1.4063920692045584,
 0.05038794900808228,
 0.13865531182349106,
 1.6922551879850825,
 0.13011102851681133,
 0.05038794900808228,
 0.43772714674640956,
 0.6793690381555769,
 0.3825425553096447,
 0.3515869265385437,
 0.12356313683931319,
 0.12356313683931319,
 0.2387222568446404,
 0.12356313683931319,
 0.05413970918512569,
 4.450946482919653,
 4.013738085017708,
 2.8391039836178376,
 0.13481685958622833,
 2.0106165649297503,
 0.14943656276639994,
 0.12356313683931319,
 0.3928487889150847,
 0.12356313683931319,
 0.3928487889150847,
 0.13461059029488956,
 0.2387222568446404,
 0.13481685958622833,
 0.13481685958622833,
 0.06190733489637499,
 1.1063918869154652,
 0.3825425553096447,
 0.3928487889150847,
 0.12356313683931319,
 0.12356313683931319,
 0.12356313683931319,
 0.645919242059147,
 0.5300867243077679,
 0.3825425553096447,
 0.13461059029488956,
 0.12356313683931319,
 0.12356313683931319,
 0.14943656276639994,
 0.2387222568446404,
 0.12356313683931319,
 0.3825425553096447,
 0.3515869265385437,
 0.3825425553096447,
 0.13461059029488956,
 0.3928487889150847,
 0.3825425553096447,
 0.12356313683931319,
 0.13481685958622833,
 0.13481685958622833,
 0.12356313683931319,
 0.019458394550442553,
 0.12356313683931319,
 0.3928487889150847,
 0.3825425553096447,
 0.2060121728674646,
 0.24489832564831224,
 0.3825425553096447,
 0.12356313683931319,
 0.18804781966915596,
 0.3825425553096447,
 0.6664848716912659,
 0.29132332356157425,
 1.2813903364503947,
 0.6978675348069628,
 0.10550970052115478,
 0.32714927702740343,
 0.08928217739905171,
 0.30504745240400466,
 0.6898163048183427,
 0.09515417068702979,
 0.00037918058351392767,
 0.6426386937844013,
 3.7913716876286983,
 1.9302074787566936,
 8.866504622469051,
 5.6207911421281045,
 3.4781850394410396,
 4.802383355858179,
 0.6836698198592988,
 1.2965970902975708,
 0.07586631863902621,
 1.9302074787566936,
 1.123061804396353,
 0.019353014620302277,
 0.8788880312662417,
 0.14361624418966076,
 0.2907399131097922,
 0.09515417068702979,
 1.0122147657105312,
 0.00037918058351392767,
 0.00037918058351392767,
 0.00037918058351392767,
 1.123061804396353,
 0.04039033263021569,
 1.7025727294896111,
 0.3325723891439643,
 0.00037918058351392767,
 0.5366577395300838,
 0.7005414216024278,
 0.6898163048183427,
 0.6898163048183427,
 0.00037918058351392767,
 0.03063362611151438,
 0.00037918058351392767,
 0.00037918058351392767,
 0.054601023429940686,
 0.09515417068702979,
 0.5366577395300838,
 0.32714927702740343,
 3.4781850394410396,
 0.09515417068702979,
 0.9283673602375861,
 0.00037918058351392767,
 0.09515417068702979,
 0.00037918058351392767,
 0.08928217739905171,
 0.00037918058351392767,
 0.6335264987208538,
 0.00037918058351392767,
 1.625885142371752,
 1.1385846642616948,
 0.6335264987208538,
 0.10550970052115478,
 0.5757398886736194,
 0.9283673602375861,
 0.4054406733338667,
 0.7005414216024278,
 0.7005414216024278,
 0.4671131463587166,
 0.4671131463587166,
 0.21438012869047773,
 0.6335264987208538,
 0.00037918058351392767,
 0.32021602096006513,
 0.7013058595723505,
 1.25676488288913,
 2.8823176905169023,
 0.00037918058351392767,
 0.32021602096006513,
 4.118990552196941,
 0.5206602880813214,
 0.008725582165769082,
 0.21438012869047773,
 0.21438012869047773,
 1.25676488288913,
 0.6836698198592988,
 0.09515417068702979,
 0.8247460469468894,
 1.8192058329175502,
 0.7389073408362383,
 0.15289296831375981,
 0.6217922150679076,
 0.004125589996571627,
 0.7756659822374778,
 0.4080345317423404,
 0.004125589996571627,
 0.353817840863242,
 3.8978944597135836,
 1.1998075637867989,
 3.60599220807918,
 6.599478109942986,
 1.8192058329175502,
 1.4125045694018332,
 0.10831695359048514,
 0.7756659822374778,
 0.0853910884445808,
 1.6568616553184319,
 1.1865558339561388,
 0.17055898402838368,
 1.3985805356835543,
 0.02929666957674732,
 0.2185932807095172,
 0.030594846525279485,
 0.609019012347381,
 0.004125589996571627,
 0.004125589996571627,
 0.004125589996571627,
 0.7389073408362383,
 0.02929666957674732,
 1.2856206196770799,
 1.6756464505477946,
 0.030594846525279485,
 0.23229138346162312,
 1.2802728809398247,
 1.4125045694018332,
 0.004125589996571627,
 0.02929666957674732,
 0.37951297607532664,
 0.524964872143806,
 1.3985805356835543,
 0.030594846525279485,
 2.2709842768342767,
 0.004125589996571627,
 0.11140415668698411,
 0.004125589996571627,
 0.9773216653171671,
 0.004125589996571627,
 3.60599220807918,
 0.030594846525279485,
 3.6962339778322466,
 1.3394760929729301,
 1.6756464505477946,
 0.23229138346162312,
 1.2856206196770799,
 1.8192058329175502,
 0.2185932807095172,
 0.6091010073815758,
 0.030594846525279485,
 0.030594846525279485,
 0.28222442364942607,
 0.004125589996571627,
 0.5111581851141902,
 1.3943102086461032,
 0.797977904689593,
 1.3985805356835543,
 0.004125589996571627,
 0.1577528486817634,
 1.4648815840701426,
 0.32606751398723227,
 0.004125589996571627,
 0.609019012347381,
 0.19385623120842124,
 0.7075885192411389,
 0.6923833830035715,
 0.022205388204885095,
 0.5471327048020407,
 0.16807490979114756,
 0.2053024557258271,
 0.4176791995583956,
 0.18368140420057738,
 0.022205388204885095,
 0.3454564190562917,
 0.7739923064960392,
 1.5278053682933532,
 1.921583760131986,
 6.628986216604316,
 1.7918406511453902,
 3.3486304982102446,
 0.5471327048020407,
 0.6910071421856907,
 0.08694931453987562,
 2.1341149959003913,
 0.36828193315761204,
 0.022205388204885095,
 0.8298235326664845,
 0.17900086085492067,
 0.8855718269420911,
 0.03344836922583932,
 0.5401220864507383,
 0.022205388204885095,
 0.07222367068210905,
 0.008903390287959846,
 0.640306087221293,
 0.022205388204885095,
 2.7275431619617554,
 0.31711525336451585,
 0.029829678604720897,
 0.45343574698433653,
 0.17900086085492067,
 0.8180466382004116,
 0.022205388204885095,
 0.037892013309405295,
 0.022205388204885095,
 0.022205388204885095,
 0.025843568992754987,
 0.36828193315761204,
 0.45343574698433653,
 0.5810290759782801,
 1.041335083880849,
 0.022205388204885095,
 0.11381056107776831,
 0.022205388204885095,
 0.09945740264835577,
 0.022205388204885095,
 0.6923833830035715,
 0.03344836922583932,
 2.2693564413209186,
 0.7739923064960392,
 0.4129642190265629,
 0.7739923064960392,
 0.17900086085492067,
 0.6923833830035715,
 0.2053024557258271,
 0.5471327048020407,
 0.7739923064960392,
 0.2053024557258271,
 0.022205388204885095,
 0.022205388204885095,
 0.45343574698433653,
 0.36828193315761204,
 0.4129642190265629,
 1.6415644814671857,
 0.03344836922583932,
 1.3589950789942933,
 0.32584064648849426,
 0.022205388204885095,
 0.8180466382004116,
 0.33651258086699715,
 0.7739923064960392,
 0.18368140420057738,
 0.3105216354521351,
 0.30214378925433655,
 0.30214378925433655,
 0.5536134299194886,
 1.9809221474543453,
 1.6787275374040829,
 0.9747847141426293,
 2.7283172777185496,
 0.7311697424218134,
 0.5457272961320928,
 0.3105216354521351,
 0.30214378925433655,
 0.25165188165602304,
 0.2596997528418489,
 0.26501608570448054,
 0.700221496691901,
 0.5375626254129258,
 0.6627328304172106,
 0.3105216354521351,
 0.3105216354521351,
 0.3105216354521351,
 0.3105216354521351,
 0.3105216354521351,
 0.30214378925433655,
 0.7311697424218134,
 0.30214378925433655,
 0.3105216354521351,
 0.5150096925737451,
 0.30214378925433655,
 0.30214378925433655,
 0.3105216354521351,
 0.30214378925433655,
 0.9747847141426293,
 0.3105216354521351,
 0.5457272961320928,
 0.7311697424218134,
 0.30214378925433655,
 0.5375626254129258,
 0.9905161681117817,
 0.6881084659349493,
 0.6881084659349493,
 0.6881084659349493,
 0.5547595576451885,
 0.6881084659349493,
 0.6881084659349493,
 0.5547595576451885,
 0.6881084659349493,
 0.8244935420006396,
 0.6881084659349493,
 0.6881084659349493,
 0.6881084659349493,
 0.6881084659349493,
 0.86516159791667,
 0.6881084659349493,
 0.6881084659349493,
 1.4894769457284855,
 0.9060774216848,
 0.476359522875017,
 0.04033826291783873,
 0.08267864905091954,
 0.6202358017833761,
 0.058041505148553725,
 0.08267864905091954,
 0.017744172883538734,
 2.3595598912632245,
 0.34316429543376537,
 1.1639169877410445,
 2.2019374271537067,
 1.5664009274900017,
 2.5677431486145403,
 ...]



In [146]:

    
xedges = np.arange(0, 6, 0.05)
yedges = np.arange(0, 3, 0.05)
heatmap, xedges, yedges = np.histogram2d(nobg, zamboni, bins=(xedges, yedges))
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]

plt.clf()
plt.plot([0,10], [0,10], color="Black", label='y=x')
plt.imshow(heatmap.T, extent=extent, origin='lower', norm=LogNorm(), aspect='auto')
plt.set_cmap('rainbow')
plt.colorbar(orientation="horizontal", pad=0.20)
plt.ylabel('Specified Background')
plt.xlabel('Unspecified Background')
plt.title('-log(P) in two backgrounds')
plt.legend(bbox_to_anchor=(1, 1.22))
plt.savefig('contour', transparent=True)

#plt.scatter(nobg, zamboni)
#plt.plot([0,10], [0,10])
#plt.xlim([0, 20])
#plt.ylim([0, 10])
#plt.show()



In [39]:

    
for met in build_metabo_input(0, pos_annot, pos_mod, neg_annot, neg_mod, 2):
    print(met)

Analysis 2: Pval distribution and Pathway Size distribution

Verdict:

1) Pval distribution is just another form of Analysis 1: Background metabolites so verdicts followed

2) Pathway Size distribution showed pathway size experienced a shift when the background was applied



In [ ]:

    
# pval distribution
for ko_number in range(0, 101):
    ko_gene = all_knockouts[ko_number]
    ko_metabolites = loadTsv('/home/zxu/Documents/mscbioinfo/Data Project/scripts/metaboAnalystqueries/maKO' + 
                             str(ko_number) + 'Cutoff2.tsv')
    ecoli_pathways = k.pathwayIds
    nobg_pval = []
    zamboni_pval = []
    for pathway_index in range(0, len(ecoli_pathways)):
        pathway = ecoli_pathways[pathway_index]
        #print(pathway_index)
        nobg_ora_res = ora(ko_metabolites, pathway, test_compounds, pathway_2_compounds)
        if len(nobg_ora_res) == 2:
            nobg_pval.append(nobg_ora_res[0])
            zamboni_pval.append(ora(ko_metabolites, pathway, zamboni_bg, pathway_2_compounds)[0])
    '''
    nobg_pval = list(map(np.log10, nobg_pval))
    zamboni_pval = list(map(np.log10, zamboni_pval))
    nobg_pval = list(map(np.negative, nobg_pval))
    zamboni_pval = list(map(np.negative, zamboni_pval))
    '''
    # Plotting
    fig, ax = plt.subplots(nrows=2, ncols=1)
    fig.subplots_adjust(bottom=-0.5)
    binsize = 0.05
    ax[0].hist(nobg_pval, bins=np.arange(0, 1+binsize, binsize))
    ax[1].hist(zamboni_pval, bins=np.arange(0, 1+binsize, binsize))
    ax[0].set_title('P-value distribution (all compounds) for knockout ' + ko_gene)
    ax[1].set_title('P-value distribution (Zamboni background) for knockout ' + ko_gene)
    ax[0].get_xaxis().set_visible(False)
    for axe in ax:
        axe.patches[0].set_color('r')
    #ax.set_xlabel('No specified background')
    #ax.set_ylabel('Zamboni background')
    plt.tight_layout()
    fig.savefig('./pvaldist/KO' + str(ko_number) + '.png')
    #plt.show()
    fig.clf()



In [10]:

    
size_dist = []
for pathway in pathway_2_compounds:
    #if len(pathway_2_compounds[pathway]) == 1:
    #    print(pathway)
    size_dist.append(len(pathway_2_compounds[pathway]))



In [11]:

    
zamboni_size_dist = []
for pathway in pathway_2_compounds:
    compounds = pathway_2_compounds[pathway]
    cmpd_count = 0
    for compound in compounds:
        if compound in zamboni_bg:
            cmpd_count += 1
    zamboni_size_dist.append(cmpd_count)



In [12]:

    
print(min(zamboni_size_dist), max(zamboni_size_dist))
plt.hist(zamboni_size_dist, bins=range(0, 145, 5))
plt.ylim(0, 40)
plt.xlabel('Pathway size')
plt.ylabel('Number of pathways')
plt.title('Pathway size distribution (Zamboni background)')
plt.show()



In [13]:

    
print(min(size_dist))
print(max(size_dist))
plt.hist(size_dist, bins=range(0, 145, 5))
plt.ylim(0, 40)
plt.xlabel('Pathway size')
plt.ylabel('Number of pathways')
plt.title('Pathway size distribution (all compounds)')
plt.show()

Analysis 3: Random metabolite misidentification

-Data noise

-misindentification

*Repeat it for many times

*Summarize the results (Number of significant pathways; Rank KOs based on length of list)

Zamboni background length: 413

5% = 20 10% = 41 20% = 82 50% = 206



In [10]:

    
random_knockouts = np.random.randint(3717, size=50)



In [12]:

    
random_knockouts = np.array([2673,  470, 3457,  859, 2461, 2776,  514, 1537, 3114, 2120, 2880,
       1312,  484, 3494,  110,   29, 1514,  791, 1925, 1131, 2776, 1274,
       1342,  875, 2235, 2938, 1460, 2957,  718, 1214, 3058,  509, 3215,
       2066, 2598, 3622, 3627,  436, 2223, 2691, 2442, 3439, 2490, 1223,
         90, 1902, 1893,  929, 3349,  746])



In [25]:

    
print(zero, one, two, more)
print(20)









    



{'fn': 0, 'fp': 13} {'fn': 296, 'fp': 49} {'fn': 527, 'fp': 47} {'fn': 999, 'fp': 42}
20



In [27]:

    
print(zero, one, two, more)
print(10)









    



{'fn': 0, 'fp': 9} {'fn': 209, 'fp': 31} {'fn': 339, 'fp': 33} {'fn': 664, 'fp': 29}
10



In [26]:

    
import os

zero = {}
one = {}
two = {}
more = {}
for file in os.listdir('./mis_ident50/3more'):
    path = './mis_ident10/3more/' + file
    fh = open(path, 'r')
    lines = fh.readlines()
    for line in lines:
        fields = line.rstrip().split('\t')
        total_hits = int(fields[0])
        false_pos = int(fields[1])
        false_neg = int(fields[2])
        if total_hits == 0:
            zero['fp'] = zero.get('fp', 0) + false_pos
            zero['fn'] = zero.get('fn', 0) + false_neg
        elif total_hits == 1:
            one['fp'] = one.get('fp', 0) + false_pos
            one['fn'] = one.get('fn', 0) + false_neg
        elif total_hits == 2:
            two['fp'] = two.get('fp', 0) + false_pos
            two['fn'] = two.get('fn', 0) + false_neg
        elif total_hits >= 3:
            more['fp'] = more.get('fp', 0) + false_pos
            more['fn'] = more.get('fn', 0) + false_neg
    fh.close()



In [13]:

    
# Random metabolite mutation
mutation_rate = 0.5
filename = './mis_ident/new/mrate50.tsv'
fh = open(filename, 'w')
for ko_number in random_knockouts:
    fp = 0
    fn = 0
    ora_results = []
    for i in range(0, 51):
        ora_results.append([])
    (pvals, pathwayids, junk) = oras_ko(ko_number, ecoli_pathways, zamboni_bg, pathway_2_compounds, 
                                  pos_annot, pos_mod, neg_annot, neg_mod, 2, 
                                  True, False, 0, [])
    for ind in range(0, len(pvals)):
        if pvals[ind] < 0.05:
            ora_results[0].append(pathwayids[ind])

    for k in range(0, 50): # Number of mutations per ko
        #print(k)
        (pvals_mut, pathwayids_mut, junk) = oras_ko(ko_number, ecoli_pathways, zamboni_bg, pathway_2_compounds, 
                                              pos_annot, pos_mod, neg_annot, neg_mod, 2, 
                                              True, False, int(mutation_rate * 413), test_compounds)
        for ind in range(0, len(pvals_mut)):
            if pvals_mut[ind] < 0.05:
                ora_results[k+1].append(pathwayids_mut[ind])
    # write ora_result to a file
    for i in range(1, len(ora_results)):
        result = ora_results[i]
        fp += len(set(result) - set(ora_results[0]))
        fn += len(set(ora_results[0]) - set(result))
    fh.write('\t'.join([str(len(ora_results[0])), str(fp), str(fn), str(ko_number)]))
    fh.write('\n')
fh.close()



In [28]:

    
fh = open('./mis_ident/new/mrate50.tsv', 'r')
lines = fh.readlines()
fp_all = []
fn_all = []
for line in lines:
    fields = line.rstrip().split('\t')
    tp = int(fields[0])
    fp = int(fields[1])
    fn = int(fields[2])
    fp_all.append(fp)
    if tp != 0:
        fn_all.append(fn)
fh.close()
print(np.mean(fp_all)/50, np.mean(fn_all)/50, sum(fn_all)/2500)









    



0.0416 1.16 0.1624



In [80]:

    
count = -1
for k in ora_results:
    if k == ora_results[0]:
        count += 1
print(count)

Analysis 4: Getting 'correct' answer

Only include metabolic enzyme (EC number)

Annotation (missing/incorrect)

Dont use a cutoff

Biological?Chemical?Statistical?



In [14]:

    
eco_enzymes



In [12]:

    
eco_enzymes = []
for i in range(0, len(all_knockouts)):
    print(i)
    try:
        if 'Enzymes' in k.parse(k.get(k.find('eco', all_knockouts[i])[:9])).get('BRITE', []):
            eco_enzymes.append(i)
    except AttributeError:
        pass



In [21]:

    
outfile = './locality/allKO.tsv'
fh = open(outfile, 'w')
for ko_number in eco_enzymes:
    print(ko_number)
    ko_gene = all_knockouts[ko_number]
    gene_id = k.find("eco", ko_gene)[:9]
    try:
        gene_pathways = k.parse(k.get(gene_id))['PATHWAY'].keys()
    except KeyError:
        gene_pathways = []
    except TypeError:
        gene_pathways = []
    if len(gene_pathways) > 0:
        fh.write(str(ko_number) + '\t' + ko_gene + '\t')
        fh.write(' '.join(list(gene_pathways)))
        fh.write('\n')
fh.close()

Methods

Gather the following statistics:

How many got ranked significant?

How many significant in ORA but not related?

What is the average rank of related pathways?



In [37]:

    
from operator import itemgetter
# Get KO number
# Load rankings for that KO
# Get significant/not related/average rank
kegg_results = './locality/allKO.tsv'
fh = open(kegg_results, 'r')
result_lines = fh.readlines()
fh.close()
out_fh = open('./locality/outresult_sig.tsv', 'w')
out_fh.write('KO_number\tGene\tSigpathways\tNot_KEGG\tSigKEGG\tRank\tPath_count\n')
for line in result_lines:
    fields = line.rstrip().split('\t')
    ko_number = fields[0]
    ko_name = fields[1]
    ko_kegg_pathways = fields[2].split()
    
    ko_ora_results = './allresult/KO' + ko_number + '.tsv'
    fh = open(ko_ora_results, 'r')
    ora_lines = fh.readlines()
    fh.close()
    ora_pvals = []
    ora_pathways = []
    ora_sigpathways = []
    for oraline in ora_lines:
        ora_pathway_result = oraline.rstrip().split('\t')
        ora_pvals.append(ora_pathway_result[1])
        ora_pathways.append(ora_pathway_result[0])
        if float(ora_pathway_result[1]) < 0.05:
            ora_sigpathways.append(ora_pathway_result[0])
    pathway_rank = dict(zip(ora_pathways, list(dup_argsort(ora_pvals))))
    # Sigpathway
    not_related_pathways = len(set(ora_sigpathways) - set(ko_kegg_pathways))
    missing_pathways = len(set(ko_kegg_pathways) - set(ora_sigpathways))
    # average rank
    ranksum = 0
    path_count = 0
    for path in ko_kegg_pathways:
        try:
            rank = pathway_rank[path]
            ranksum += rank
            path_count += 1
        except KeyError:
            pass
    if path_count != 0:
        rankavg = ranksum/path_count
    else:
        rankavg = 'N'
    output_str = '\t'.join([ko_number, ko_name, 
                  str(len(ora_sigpathways)), str(not_related_pathways), str(len(ora_sigpathways) - not_related_pathways), str(rankavg), str(path_count)])
    if len(ora_sigpathways) > 0:
        out_fh.write(output_str+'\n')
out_fh.close()



In [25]:

    
a2 = np.array([4,2,1,1,2])
def dup_argsort(in_val):
    u, v = np.unique(in_val, return_inverse=True)
    out_ind = (np.cumsum(np.concatenate(([0], np.bincount(v)))))[v]
    return out_ind
dup_argsort(a2)









    Out[25]:





array([4, 2, 0, 0, 2])



In [51]:

    
fh = open('./2281.tsv', 'w')
fh.write('Gene\tKegg\tKegg_path\tSig_path(0.1)\tMinPval\n')
for ko_number in range(2281, 2282):
    print(ko_number)
    ko_gene = all_knockouts[ko_number]
    gene_id = k.find("eco", ko_gene)[:9]
    try:
        gene_pathways = k.parse(k.get(gene_id))['PATHWAY'].keys()
    except KeyError:
        gene_pathways = []
    except TypeError:
        gene_pathways = []
    if len(gene_pathways) >= 0:
        #print(ko_gene, gene_id)
        #print(list(gene_pathways))
        significant_pathways = []
        (pvals, pathwayids, junk) = oras_ko(ko_number, ecoli_pathways, zamboni_bg, pathway_2_compounds, 
                                      pos_annot, pos_mod, neg_annot, neg_mod, 2, 
                                      False, False, 0, [])
        for ind in range(0, len(pvals)):
            if pvals[ind] < 0.1:
                significant_pathways.append(pathwayids[ind])
        fh.write('{}\t{}\t{}\t{}\t{}\n'.format(ko_gene, gene_id, list(gene_pathways), significant_pathways, min(pvals)))
fh.close()

Analysis 5: Rank analysis

ORA vs MetaboAnalyst



In [43]:

    
for metabolite in build_metabo_input(0, pos_annot, pos_mod, neg_annot, neg_mod, 2):
    print(metabolite)



In [46]:

    
(pval, pathwayid, pathwaysize) = oras_ko(0, ecoli_pathways, zamboni_bg, pathway_2_compounds, 
                           pos_annot, pos_mod, neg_annot, neg_mod, 2, False, False, 0)



In [77]:

    
def kegg_2_name(kegg_id, kegg_instance):
    return kegg_instance.parse(kegg_instance.get(kegg_id))['NAME'][0].split(' - ')[0]



In [110]:

    
kegg_2_name('path:eco00330', k)









    Out[110]:





'Arginine and proline metabolism'



In [112]:

    
len(k.parse(k.get('path:eco00401'))['COMPOUND'])









    Out[112]:





32



In [109]:

    
pval = np.array(pval)
pvalind = np.argsort(pval)
for i in pvalind:
    size = pathwaysize[i]
    ptw = pathwayid[i]
    ptwname = kegg_2_name(ptw, k)
    print(ptw, size, ptwname, pval[i])









    



path:eco00660 6 C5-Branched dibasic acid metabolism 0.0454630582039
path:eco00460 7 Cyanoamino acid metabolism 0.0613037310008
path:eco00630 18 Glyoxylate and dicarboxylate metabolism 0.0873661914665
path:eco00565 2 Ether lipid metabolism 0.117539199323
path:eco00401 2 Novobiocin biosynthesis 0.117539199323
path:eco00680 21 Methane metabolism 0.126201110982
path:eco00260 22 Glycine, serine and threonine metabolism 0.140382514438
path:eco00730 12 Thiamine metabolism 0.16002185
path:eco02026 3 Biofilm formation 0.171216863598
path:eco00250 13 Alanine, aspartate and glutamate metabolism 0.182299400879
path:eco00400 13 Phenylalanine, tyrosine and tryptophan biosynthesis 0.182299400879
path:eco00970 14 Aminoacyl-tRNA biosynthesis 0.205045708558
path:eco00440 4 Phosphonate and phosphinate metabolism 0.221752420696
path:eco00564 15 Glycerophospholipid metabolism 0.228129680723
path:eco02060 17 Phosphotransferase system (PTS) 0.274850252311
path:eco00130 17 Ubiquinone and other terpenoid-quinone biosynthesis 0.274850252311
path:eco00230 45 Purine metabolism 0.284408501664
path:eco00310 6 Lysine degradation 0.314094467715
path:eco00240 33 Pyrimidine metabolism 0.321670902751
path:eco00020 7 Citrate cycle (TCA cycle) 0.356226257167
path:eco00051 7 Fructose and mannose metabolism 0.356226257167
path:eco02010 35 ABC transporters 0.356914252312
path:eco00860 8 Porphyrin and chlorophyll metabolism 0.395867497489
path:eco00053 8 Ascorbate and aldarate metabolism 0.395867497489
path:eco00350 8 Tyrosine metabolism 0.395867497489
path:eco00261 8 Monobactam biosynthesis 0.395867497489
path:eco00740 9 Riboflavin metabolism 0.433159627274
path:eco00430 9 Taurine and hypotaurine metabolism 0.433159627274
path:eco00625 9 Chloroalkane and chloroalkene degradation 0.433159627274
path:eco00650 9 Butanoate metabolism 0.433159627274
path:eco00750 10 Vitamin B6 metabolism 0.468236383012
path:eco00520 11 Amino sugar and nucleotide sugar metabolism 0.501224200443
path:eco00220 12 Arginine biosynthesis 0.532242595938
path:eco00480 12 Glutathione metabolism 0.532242595938
path:eco02020 13 Two-component system 0.56140452886
path:eco00360 18 Phenylalanine metabolism 0.682903796503
path:eco00330 18 Arginine and proline metabolism 0.682903796503



In [74]:

    
for ko_number in range(0, 3717):
    result_file = './Backgrounds/KO' + str(ko_number) + '.tsv'
    in_fh = open(result_file, 'r')
    lines = in_fh.readlines()
    out_fh = open(('./allresult/KO') + str(ko_number) + '.tsv', 'w')
    for line in lines:
        fields = line.rstrip().split('\t')
        pathway_id = fields[0]
        zamboni_pval = float(fields[3])
        pvalue = 10**(np.negative(zamboni_pval))
        out_fh.write('{}\t{}\n'.format(pathway_id, pvalue))
    out_fh.close()
    in_fh.close()



In [10]:

    
number_of_hits = {}
for ko_number in range(0, 3717):
    result_file = './allresult/KO' + str(ko_number) + '.tsv'
    fh = open(result_file, 'r')
    lines = fh.readlines()
    sig_pathway = 0
    for line in lines:
        fields = line.rstrip().split('\t')
        pvalue = float(fields[1])
        if pvalue < 0.05:
            sig_pathway += 1
    number_of_hits[ko_number] = sig_pathway



In [82]:

    
oras_ko(333, ecoli_pathways, zamboni_bg, pathway_2_compounds, pos_annot, pos_mod, neg_annot, neg_mod, 2, True, False, 0, [])









    Out[82]:





([0.373488152048454,
  0.44946980592280733,
  0.44946980592280733,
  0.44946980592280733,
  0.44946980592280733,
  0.5932473934585375,
  0.8803602402482741,
  0.3708286364863693,
  0.5964699048408784,
  0.45965731191454845,
  0.5932473934585375,
  0.28985600967619185,
  0.46631207243023726,
  0.4095537890998013,
  0.489068883808352,
  0.44946980592280733,
  0.030015365666511404,
  0.44946980592280733,
  0.030015365666511404,
  0.373488152048454,
  0.489068883808352,
  0.4095537890998013,
  0.4095537890998013,
  0.4095537890998013,
  0.5932473934585375,
  0.4095537890998013,
  0.44946980592280733,
  0.030015365666511404,
  0.46631207243023726,
  0.30508474576270195,
  0.45965731191454845,
  0.489068883808352,
  0.5478741334136651,
  0.5478741334136651,
  0.4095537890998013],
 ['path:eco00010',
  'path:eco00030',
  'path:eco00040',
  'path:eco00051',
  'path:eco00052',
  'path:eco00130',
  'path:eco00230',
  'path:eco00260',
  'path:eco00270',
  'path:eco00350',
  'path:eco00360',
  'path:eco00362',
  'path:eco00430',
  'path:eco00440',
  'path:eco00520',
  'path:eco00540',
  'path:eco00561',
  'path:eco00562',
  'path:eco00564',
  'path:eco00565',
  'path:eco00620',
  'path:eco00621',
  'path:eco00622',
  'path:eco00627',
  'path:eco00630',
  'path:eco00640',
  'path:eco00660',
  'path:eco00680',
  'path:eco00760',
  'path:eco00785',
  'path:eco00790',
  'path:eco00900',
  'path:eco00920',
  'path:eco00970',
  'path:eco01053'],
 [11,
  6,
  7,
  7,
  7,
  17,
  45,
  22,
  19,
  8,
  18,
  7,
  9,
  4,
  11,
  5,
  7,
  6,
  15,
  2,
  11,
  3,
  4,
  4,
  18,
  14,
  6,
  21,
  9,
  1,
  8,
  11,
  14,
  14,
  4])



In [11]:

    
no_hits = []
for ko in number_of_hits:
    if number_of_hits[ko] == 0:
        no_hits.append(ko)



In [21]:

    
no_hits



In [89]:

    
total = 0
for ko in number_of_hits:
    total += number_of_hits[ko]
total









    Out[89]:





472