In [1]:
from __future__ import division
import pandas as pd
from StringIO import StringIO
import pandas as pd
fp = "/Users/caporaso/data/img-qiime-25oct2012/gene_ec_numeric.tsv"
f = open("/Users/caporaso/data/img-qiime-25oct2012/gene_ec_numeric.tsv", 'U')
data = []
for line in f:
fields = line.strip().split('\t')
if fields[1] == 'unknown':
ec_fields = None
else:
ec_fields = fields[1].replace(';', '.')
data.append([fields[0]] + [ec_fields])
df = pd.DataFrame(data, columns=['Gene ID', 'EC'])
img_ec_counts = df.groupby(['EC']).count()
These are the Central C metabolic network EC code abundances in the Fierer et al 2012 soil metagenomes, as computed with QIIME 1.9.0 by Greg Caporaso. Here I load these, and then add in the corresponding EC counts from the QIIME/IMG reference database (25 Oct 2012).
In [14]:
ec_biom = pd.read_csv('observation_table_L1.txt', sep='\t', header=1, index_col=0)
ec_biom['IMG Count'] = [img_ec_counts['Gene ID'][e] for e in ec_biom.index]
In [15]:
ec_biom
Out[15]:
Next, I compute the Spearman correlation between all of these abundances.
In [4]:
correlation_m = ec_biom.corr(method='spearman')
It looks like the counts on a per-metagenome basis are not just reflections of the abundances of those EC codes in the database, which is good. This was performed as a control.
In [16]:
correlation_m['IMG Count']
Out[16]:
It also looks like, in general, the abundances are correlated across soils, which is also a good control (i.e., they are fairly repeatable across metagenomes, so we're probably not just looking at random noise).
In [17]:
correlation_m
Out[17]:
And just for the record, these are Paul Dijkstra's central C metabolic network "EC codes of interest".
In [7]:
ec_of_interest = StringIO("""1.1.1.286 (isocitrate dehydrogenase)
1.1.1.37 (malate dehydrogenase to oxaloacetate)
1.1.1.38 (malate dehydrogenase /malic enzyme to pyruvate)
1.1.1.39 (malate dehydrogenase /malic enzyme to pyruvate)
1.1.1.40 (malate dehydrogenase/ malic enzyme to pyruvate)
1.1.1.41 (isocitrate dehydrogenase)
1.1.1.42 (isocitrate dehydrogenase)
1.1.1.44 (6 phosphogluconate dehydrogenase)
1.1.1.49 (glucose-6P dehydrogenase to gluconolactone-P)
1.1.1.82 (malate dehydrogenase to oxaloacetate)
1.1.1.343 (phosphogluconate dehydrogenase)
1.1.1.351 (phosphogluconate dehydrogenase)
1.1.5.4 (malate oxidoreductase to oxaloacetate)
1.2.1.9 (glyceraldehyde dehydrogenase - glycerald to glycerate)
1.2.1.12 (glyceraldehyde3P dehydrogenase)
1.2.1.59 (glyceraldehyde3P dehydrogenase)
1.2.1.79 (succinate semialdehyde dehydrogenase to succinate)
1.2.1.9 (glyceraldehyde3P dehydrogenase)
1.2.4.1 (pyruvate dehydrogenase – split CO2)
1.2.4.2 (oxoglutarate dehydrogenase /decarboxylase/PDH complex E3 – split CO2 from oxoglutarate)
1.2.7.1 (pyruvate synthase /oxidoreductase to acetyl-CoA)
1.2.7.3 (oxoglutarate synthase to succinyl-coa)
1.2.7.6 (glyceraldehyde3P dehydrogenase /oxidoreductase to 3Pglycerate)
1.3.5.1 (succinate dehydrogenase to fumarate)
1.3.5.4 (fumarate reductase to succinate)
1.8.1.4 (dihydrolipoyl dehydrogenase – PDH and OGL complex – NADH generating subunit)
2.2.1.1 (transketolase ppp reaction)
2.2.1.2 (transaldolase ppp reaction)
2.3.1.12 (PDH complex number 3 produces acetyl CoA)
2.3.1.61 (dihydropoyllysine-residue succinyl transferase – complex 3 to succinyl-CoA)
2.3.3.1 (citrate synthase OAA to citrate)
2.3.3.16 (citrate synthase OAA to citrate)
2.3.3.8 (citrate synthase OAA to citrate)
2.7.1.1 (hexokinase – to G6P)
2.7.1.11 (6 phosphofructokinase F6 to F1,6)
2.7.1.146 (ADP-specific phosphofructokinase from F6P to F16P)
2.7.1.147 (ADP specific glucokinase to G6P)
2.7.1.2 (glucokinase)
2.7.1.40 (pyruvate kinase to PEP)
2.7.1.90 (phosphofructokinase 6 to 16 fructose)
2.7.1.146 (ADP specific phosphofructokinase – pyrococcus)
2.7.2.3 (phosphoglycerate kinase – phosphorylation of 13P2glycerate)
2.7.9.1 (pyruvate phosphate dikinase to PEP)
2.7.9.2 (pyruvate water kinase to PEP)
3.1.1.31 (6-phosphogluconolactonase to gluconate 6P)
3.1.3.11 (fructobisphosphatase 1,6P to 6P)
3.1.3.13 (bisphophoglycerate phosphatase – 2,3 to 3P glycerate)
3.1.3.80 (bisphophoglycerate 3-phosphatase 2,3 to 2P glycerate)
4.1.1.3 (oxaloacetate decarboxylase to pyruvate)
4.1.1.31 (PEP carboxylase to OAA)
4.1.1.32 (PEP carboxykinase to OAA)
4.1.1.38 (PEP carboxykinase to OAA)
4.1.1.49 (PEP carboxykinase to OAA)
4.1.1.71 oxoglutarate decarboxylase
4.1.2.13 (fructose bisphosphate aldolase to DHAP and Glyceraldehyde)
4.1.2.14 (2dehydro-3deoxy phosphogluconate aldolase to pyr and glyceraldehyde ED)
4.1.2.51 (2dehydroxy3deoxy gluconate aldolase to glyceraldehyde and pyruvate ED)
4.1.2.55 (2dehydroxy3deoxy gluconate /galactonate aldolase to glyceraldehyde and pyr ED)
4.2.1.11 (phosphopyruvate hydratase – 2P glyc to PEP)
4.2.1.2 (fumarate hydratase – malate to fum)
4.2.1.3 (aconitate hydratase –citrate to isocitrate)
5.1.3.1 (ribuloseP 3 epimerase – to xylulose)
5.3.1.1 (trioseP isomerase)
5.3.1.6 (ribose isomerase ribose to ribulose)
5.3.1.9 (glucose 6 isomerase to fructose 6P)
5.4.2.11 (phosphoglycerate mutase 2 to 3 glycerate)
5.4.2.12 (phosphoglycerate mutase 2 to 3 glycerate)
5.4.2.4 (bisphosphoglycerate mutase (1,3 to 2,3 glycerate)
6.2.1.4 (succinate coa ligase from succinyl)
6.2.1.5 (succinate coa ligase from succinyl)
6.4.1.1 (pyruvate carboxylase)""")
ec_of_interest = [l.split()[0] for l in ec_of_interest]