(that means EGRIN 2.0)
iPython
In [3]:
from query.egrin2_query import *
# connect to the egrin 2.0 database
host = "primordial"
port = 27017
db = "eco_db"
client = MongoClient( 'mongodb://'+ host +':'+ str( port )+'/' )
Nucleotide metabolism
In [3]:
# get nucleotide gene info
gene_info = pd.DataFrame( list( client[ db ].row_info.find( { "$or": [ { "GO": { "$regex" : "GO:0009117" } }, { "TIGRRoles": { "$regex" : "nucleotide" } }, { "ECDesc": { "$regex" : "nucleotide" } } ] } ) ) )
gene_info.loc[ :,[ "name","ECDesc","TIGRRoles" ] ]
# get egrin2 gene names only
genes = gene_info.egrin2_row_name.tolist()
In [4]:
# name chromosome-:1823979-1824947
ggbwebModule( genes, outfile = "nucleotide_module.txt", host = host, port = port, db = db )
In [5]:
# find GREs discovered upstream of these genes
gre_candidates = agglom( genes, x_type="gene", y_type="gre", host=host, db=db, logic="or", x_input_type = "egrin2_row_name").sort(["counts","pval"],ascending=False)
gre_candidates.head()
Out[5]:
In [1]:
gres = gre_candidates.index[0:5].tolist()
(we won't it here because it takes a couple of minutes for the entire genome)
In [7]:
# fimoFinder( locusId = "NC_000913", filterby = gres, filter_type = "gre", host=host, port=port, db=db, outfile = "fimo.txt")