This is the same GOEA as in goea_nbt3102.ipynb, but the GOEA results can be obtained by calling a single function.
We use data from a 2014 Nature paper:
Computational analysis of cell-to-cell heterogeneity
in single-cell RNA-sequencing data reveals hidden
subpopulations of cells
In [1]:
def get_goeaobj_nbt3102(method='fdr_bh'):
"""Return GOEA Object ready to run Nature data."""
from goatools.obo_parser import GODag
from genes_ncbi_10090_proteincoding import GENEID2NT as GeneID2nt_mus
from goatools.base import download_go_basic_obo, download_ncbi_associations
from goatools.anno.genetogo_reader import Gene2GoReader
from goatools.goea.go_enrichment_ns import GOEnrichmentStudyNS
# Load Ontologies
obo_fname = download_go_basic_obo()
obodag = GODag("go-basic.obo")
# Load Associations
download_ncbi_associations() # Get ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2go.gz
# Read NCBI's gene2go. Store annotations in a list of namedtuples
objanno = Gene2GoReader("gene2go", taxids=[10090])
# Get associations for each branch of the GO DAG (BP, MF, CC)
ns2assoc = objanno.get_ns2assc()
# GOE Object holds Ontologies, Associations, and Background gene set
return GOEnrichmentStudyNS(
GeneID2nt_mus.keys(), # Background gene set: mouse protein-coding genes
ns2assoc, # geneid/GO Associations for BP, MF, anc CC GODAG branches
obodag, # Ontologies
propagate_counts = False,
alpha = 0.05, # default significance cut-off
methods = [method]) # defult multipletest correction method
In [2]:
def read_data_nbt3102():
"""Read data from Nature paper."""
import os
# Data will be stored in this variable
geneid2symbol = {}
# Get xlsx filename where data is stored
ROOT = os.path.dirname(os.getcwd()) # go up 1 level from current working directory
din_xlsx = os.path.join(ROOT, "goatools/test_data/nbt_3102/nbt.3102-S4_GeneIDs.xlsx")
# Read data
if os.path.isfile(din_xlsx):
import xlrd
book = xlrd.open_workbook(din_xlsx)
pg = book.sheet_by_index(0)
for r in range(pg.nrows):
symbol, geneid, pval = [pg.cell_value(r, c) for c in range(pg.ncols)]
if geneid:
geneid2symbol[int(geneid)] = symbol
return geneid2symbol
Copyright (C) 2016-present, DV Klopfenstein, H Tang. All rights reserved.