In [124]:
from sys import argv
import re
import pandas as pd
from Bio.KEGG import Enzyme

Get Unique EC Numbers from Tables of RS-only Clusters


In [125]:
path_table = "/Users/luke/singlecell/clusters/orthomcl-pro3/table.RSonly_pro.txt" # argv[1]
path_enzyme = "/Users/luke/singlecell/clusters/enzyme" # argv[2]

In [126]:
# list of EC numbers from table
ec_list = []

# regular expression matching EC number
regex_ec = re.compile(r'EC [0-9]*\.[0-9]*\.[0-9]*\.[0-9]*')

In [127]:
# go through each line and add any matching strings to list
for line in open(path_table).readlines():
    ec_list.append(regex_ec.findall(line))

# dataframe to series to get unique EC numbers, remove first 'None' value
df = pd.DataFrame(ec_list)
ec_unique = pd.Series(df.values.ravel()).unique()[1:]

Parse KEGG Enzyme Records


In [128]:
records = Enzyme.parse(open(path_enzyme))

In [129]:
rlist = list(records)

In [130]:
z = None
for record in rlist:
    for ec in ec_unique:
        z = re.search(record.entry + '$', ec) # match EC number at end of line
        if z is not None:
            if len(record.pathway) > 0:
                for pathway in record.pathway:
                    print "EC %s\t%s\t%s" % (record.entry, pathway[1], pathway[2])
            else:
                print "EC %s\tNA\tNA" % (record.entry)


EC 1.1.1.14	ec00051	Fructose and mannose metabolism
EC 1.1.1.14	ec01100	Metabolic pathways
EC 2.1.1.37	ec00270	Cysteine and methionine metabolism
EC 2.1.1.37	ec01100	Metabolic pathways
EC 2.1.1.64	ec00130	Ubiquinone and other terpenoid-quinone biosynthesis
EC 2.1.1.64	ec01100	Metabolic pathways
EC 2.1.1.64	ec01110	Biosynthesis of secondary metabolites
EC 2.1.1.72	NA	NA
EC 2.1.3.2	ec00240	Pyrimidine metabolism
EC 2.1.3.2	ec00250	Alanine, aspartate and glutamate metabolism
EC 2.1.3.2	ec01100	Metabolic pathways
EC 2.4.1.15	ec00500	Starch and sucrose metabolism
EC 2.4.1.18	ec00500	Starch and sucrose metabolism
EC 2.4.1.18	ec01100	Metabolic pathways
EC 2.4.1.129	ec00550	Peptidoglycan biosynthesis
EC 2.7.7.43	ec00520	Amino sugar and nucleotide sugar metabolism
EC 2.7.7.43	ec01100	Metabolic pathways
EC 3.1.21.3	NA	NA
EC 3.1.21.4	NA	NA
EC 3.5.1.87	NA	NA
EC 3.5.3.11	ec00330	Arginine and proline metabolism
EC 3.5.3.11	ec01100	Metabolic pathways
EC 4.1.1.9	ec00410	beta-Alanine metabolism
EC 4.1.1.9	ec00640	Propanoate metabolism
EC 4.1.1.9	ec01100	Metabolic pathways
EC 4.4.1.8	ec00270	Cysteine and methionine metabolism
EC 4.4.1.8	ec00450	Selenocompound metabolism
EC 4.4.1.8	ec00910	Nitrogen metabolism
EC 4.4.1.8	ec00920	Sulfur metabolism
EC 4.4.1.8	ec01100	Metabolic pathways
EC 4.4.1.8	ec01110	Biosynthesis of secondary metabolites
EC 5.1.3.2	ec00052	Galactose metabolism
EC 5.1.3.2	ec00520	Amino sugar and nucleotide sugar metabolism
EC 5.1.3.2	ec01100	Metabolic pathways
EC 5.1.3.13	ec00521	Streptomycin biosynthesis
EC 5.1.3.13	ec00523	Polyketide sugar unit biosynthesis
EC 5.1.3.13	ec01100	Metabolic pathways
EC 5.1.3.13	ec01110	Biosynthesis of secondary metabolites

In [ ]: