Generate a subgraph covering CbGpPWpGaD paths between bupropion and nicotine dependence


In [1]:
import random

import hetnetpy.readwrite
import hetnetpy.pathtools
import hetnetpy.stats

In [2]:
# Read Hetionet v1.0
url = 'https://github.com/hetio/hetionet/raw/{}/{}'.format(
    '00bf0b6f8886821d91cfdf00eadad145a7a1b6da',
    'hetnet/json/hetionet-v1.0.json.bz2',
)
graph = hetnetpy.readwrite.read_graph(url)
metagraph = graph.metagraph

Generate a subgraph covering CbGpPWpGaD paths between bupropion and nicotine dependence


In [3]:
# Specify compound and disease
compound_id = 'Compound', 'DB01156'  # Bupropion
disease_id = 'Disease', 'DOID:0050742'  # nicotine dependences
metapath = metagraph.metapath_from_abbrev('CbGpPWpGaD')

In [4]:
# Extract walks from compound to disease
paths = hetnetpy.pathtools.paths_between(
    graph, 
    source=graph.node_dict[compound_id],
    target=graph.node_dict[disease_id],
    metapath=metapath,
    duplicates=True,
)

In [5]:
metaedges = set(metapath)
nodes = set()
for path in paths:
    nodes.update(path.get_nodes())
    for edge in path:
        # Add incidental nodes along paths to enable correct DWWC/DWPC computations
        # for the CbGpPWpGaD metapath
        nodes.update(e.target for e in edge.source.get_edges(edge.metaedge))
        nodes.update(e.target for e in edge.target.get_edges(edge.metaedge.inverse))

# Add Gene-interacts-Gene metaedge (not essential but may be useful)
metaedges.add(metagraph.metapath_from_abbrev('GiG')[0])

# Get subgraph
subgraph = graph.get_subgraph(metaedges=metaedges, nodes=nodes)

In [6]:
# Metagraph size
subgraph.metagraph.n_nodes, subgraph.metagraph.n_edges


Out[6]:
(4, 4)

In [7]:
# Graph size
subgraph.n_nodes, subgraph.n_edges


Out[7]:
(3060, 18474)

In [8]:
# Metanode info
hetnetpy.stats.get_metanode_df(subgraph)


Out[8]:
metanode abbreviation metaedges nodes unconnected_nodes
1 Compound C 1 730 730
3 Disease D 1 16 16
0 Gene G 4 2226 612
2 Pathway PW 1 88 0

In [9]:
# Metaedge info
hetnetpy.stats.get_metaedge_df(subgraph)


Out[9]:
metaedge abbreviation edges source_nodes target_nodes
0 Compound - binds - Gene CbG 5634 730 472
1 Disease - associates - Gene DaG 937 16 557
2 Gene - interacts - Gene GiG 5124 1061 1213
3 Gene - participates - Pathway GpPW 6779 2222 88

In [10]:
# Export as JSON
hetnetpy.readwrite.write_graph(subgraph, 'bupropion-CbGpPWpGaD-subgraph.json.xz')

Random subgraph of ~100 nodes per metanode

Choose a different number of nodes per metanode to increase the liklihood rotten matrix operations cause misalignment.


In [11]:
metanode_to_nodes = graph.get_metanode_to_nodes()
n_nodes = 100
node_subset = list()
random.seed(0, version=2)
for metanode, nodes in sorted(metanode_to_nodes.items()):
    nodes = sorted(nodes)
    nodes = random.sample(nodes, n_nodes)
    node_subset.extend(nodes)
    n_nodes += 1

In [12]:
# Get subgraph
subgraph = graph.get_subgraph(nodes=node_subset)

In [13]:
# Metagraph size
subgraph.metagraph.n_nodes, subgraph.metagraph.n_edges


Out[13]:
(11, 24)

In [14]:
# Graph size
subgraph.n_nodes, subgraph.n_edges


Out[14]:
(1155, 3123)

In [15]:
# Metanode info
hetnetpy.stats.get_metanode_df(subgraph)


Out[15]:
metanode abbreviation metaedges nodes unconnected_nodes
0 Anatomy A 4 100 7
1 Biological Process BP 1 101 86
2 Cellular Component CC 1 102 90
3 Compound C 8 103 73
4 Disease D 8 104 22
5 Gene G 16 105 19
6 Molecular Function MF 1 106 95
7 Pathway PW 1 107 87
8 Pharmacologic Class PC 1 108 108
9 Side Effect SE 1 109 76
10 Symptom S 1 110 14

In [16]:
# Metaedge info
hetnetpy.stats.get_metaedge_df(subgraph)


Out[16]:
metaedge abbreviation edges source_nodes target_nodes
0 Anatomy - downregulates - Gene AdG 173 13 64
1 Anatomy - expresses - Gene AeG 735 29 80
2 Anatomy - upregulates - Gene AuG 126 13 58
6 Compound - binds - Gene CbG 10 10 1
9 Compound - causes - Side Effect CcSE 170 53 33
7 Compound - downregulates - Gene CdG 6 6 4
4 Compound - palliates - Disease CpD 26 16 13
3 Compound - resembles - Compound CrC 16 14 14
5 Compound - treats - Disease CtD 37 18 26
8 Compound - upregulates - Gene CuG 4 3 4
12 Disease - associates - Gene DaG 62 37 24
13 Disease - downregulates - Gene DdG 26 18 23
10 Disease - localizes - Anatomy DlA 636 95 93
15 Disease - presents - Symptom DpS 631 96 96
11 Disease - resembles - Disease DrD 309 81 73
14 Disease - upregulates - Gene DuG 28 17 20
18 Gene - covaries - Gene GcG 1 1 1
19 Gene - interacts - Gene GiG 2 2 2
16 Gene - participates - Biological Process GpBP 33 21 15
17 Gene - participates - Cellular Component GpCC 32 23 12
21 Gene - participates - Molecular Function GpMF 13 10 11
22 Gene - participates - Pathway GpPW 25 15 20
20 Gene > regulates > Gene Gr>G 3 3 2
23 Pharmacologic Class - includes - Compound PCiC 19 13 19

In [17]:
# Export as JSON
hetnetpy.readwrite.write_graph(subgraph, 'random-subgraph.json.xz')