Example notebook showing how to load and visualize interaction data in mitab format


Author: Brin Rosenthal (sbrosenthal@ucsd.edu)



In [60]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import networkx as nx

import mygene
mg = mygene.MyGeneInfo()


# latex rendering of text in graphs
import matplotlib as mpl
mpl.rc('text', usetex = False)
mpl.rc('font', family = 'serif')

% matplotlib inline

import visJS2jupyter.visJS_module 
import visJS2jupyter.visualizations

Load the Reactome MI-TAB data

NOTE Make sure you change your path in the cell below to reflect the download location of the Reactome file


In [2]:
reactome_df = pd.read_csv('../../interactomes/reactome/reactome.homo_sapiens.interactions.psi-mitab.txt',sep='\t')
reactome_df.head()


Out[2]:
#ID(s) interactor A ID(s) interactor B Alt. ID(s) interactor A Alt. ID(s) interactor B Alias(es) interactor A Alias(es) interactor B Interaction detection method(s) Publication 1st author(s) Publication Identifier(s) Taxid interactor A ... Checksum(s) interactor A Checksum(s) interactor B Interaction Checksum(s) Negative Feature(s) interactor A Feature(s) interactor B Stoichiometry(s) interactor A Stoichiometry(s) interactor B Identification method participant A Identification method participant B
0 uniprotkb:O00189 uniprotkb:Q9Y587 reactome:R-HSA-5229114|pdbe:3L81|pdbe:4MDR|ref... reactome:R-HSA-5229155|refseq:NP_001121598|ref... reactome:AP4M1(name)|reactome:AP-4 complex sub... reactome:AP4S1(name)|reactome:AP-4 complex sub... psi-mi:"MI:0364"(inferred by curator) Fabregat et al. 2015 pubmed:24243840 taxid:9606(Homo sapiens) ... - - - False - - 1 1 psi-mi:"MI:0364"(inferred by curator) psi-mi:"MI:0364"(inferred by curator)
1 uniprotkb:O00472 uniprotkb:Q8N5P1 reactome:R-HSA-6807506|pdbe:2E5N|refseq:NP_036... reactome:R-HSA-6807406|refseq:NP_115883|ENSEMB... reactome:ELL2(name)|reactome:RNA polymerase II... reactome:ZC3H8(name)|reactome:Zinc finger CCCH... psi-mi:"MI:0364"(inferred by curator) Fabregat et al. 2015 pubmed:24243840 taxid:9606(Homo sapiens) ... - - - False - - 1 1 psi-mi:"MI:0364"(inferred by curator) psi-mi:"MI:0364"(inferred by curator)
2 reactome:R-HSA-114514 reactome:R-HSA-139933 - - reactome:Integrin alpha2bbeta3(name) reactome:fibrin multimer(name) psi-mi:"MI:0364"(inferred by curator) Fabregat et al. 2015 pubmed:24243840 taxid:9606(Homo sapiens) ... - - - False - - 1 1 psi-mi:"MI:0364"(inferred by curator) psi-mi:"MI:0364"(inferred by curator)
3 ChEBI:52966 reactome:R-HSA-382579 reactome:R-ALL-390394 - reactome:cerotyl-CoA(name)|reactome:hexacosano... reactome:ABCD1 homodimer(name)|reactome:ALDP(n... psi-mi:"MI:0364"(inferred by curator) Fabregat et al. 2015 pubmed:24243840 - ... - - - False - - 1 1 psi-mi:"MI:0364"(inferred by curator) psi-mi:"MI:0364"(inferred by curator)
4 uniprotkb:P07195 uniprotkb:P07195 reactome:R-HSA-70503|pdbe:1I0Z|pdbe:1T2F|refse... reactome:R-HSA-70503|pdbe:1I0Z|pdbe:1T2F|refse... reactome:LDHB(name)|reactome:lactate dehydroge... reactome:LDHB(name)|reactome:lactate dehydroge... psi-mi:"MI:0364"(inferred by curator) Fabregat et al. 2015 pubmed:24243840 taxid:9606(Homo sapiens) ... - - - False - - 4 0 psi-mi:"MI:0364"(inferred by curator) psi-mi:"MI:0364"(inferred by curator)

5 rows × 42 columns


In [19]:
# create a networkx graph from the pandas dataframe, with all the other columns as edge attributes
attribute_cols = reactome_df.columns.tolist()[2:]
G_reactome = nx.from_pandas_dataframe(reactome_df,source='#ID(s) interactor A',target = 'ID(s) interactor B',
                                      edge_attr = attribute_cols)
len(G_reactome.nodes())


Out[19]:
11872

In [21]:
# check that edge attributes have been loaded
list(G_reactome.edges(data=True))[0] # nx 2.0 edgeView object does not support indexing, but a list does!


Out[21]:
('reactome:R-HSA-5655959',
 'reactome:R-HSA-5655492',
 {'Alias(es) interactor A': '-',
  'Alias(es) interactor B': '-',
  'Alt. ID(s) interactor A': '-',
  'Alt. ID(s) interactor B': '-',
  'Annotation(s) interactor A': '-',
  'Annotation(s) interactor B': '-',
  'Biological role(s) interactor A': 'psi-mi:"MI:0499"(unspecified role)',
  'Biological role(s) interactor B': 'psi-mi:"MI:0499"(unspecified role)',
  'Checksum(s) interactor A': '-',
  'Checksum(s) interactor B': '-',
  'Confidence value(s)': 'reactome-score:0.5',
  'Creation date': '2014/12/10',
  'Expansion method(s)': 'psi-mi:"MI:1061"(matrix expansion)',
  'Experimental role(s) interactor A': 'psi-mi:"MI:0499"(unspecified role)',
  'Experimental role(s) interactor B': 'psi-mi:"MI:0499"(unspecified role)',
  'Feature(s) interactor A': '-',
  'Feature(s) interactor B': '-',
  'Host organism(s)': 'taxid:9606(Homo sapiens)',
  'Identification method participant A': 'psi-mi:"MI:0364"(inferred by curator)',
  'Identification method participant B': 'psi-mi:"MI:0364"(inferred by curator)',
  'Interaction Checksum(s)': '-',
  'Interaction Xref(s)': 'go:"GO:0005654"(nucleoplasm)',
  'Interaction annotation(s)': '-',
  'Interaction detection method(s)': 'psi-mi:"MI:0364"(inferred by curator)',
  'Interaction identifier(s)': 'reactome:R-HSA-5655961',
  'Interaction parameter(s)': '-',
  'Interaction type(s)': 'psi-mi:"MI:0915"(physical association)',
  'Negative': False,
  'Publication 1st author(s)': 'Fabregat et al. 2015',
  'Publication Identifier(s)': 'pubmed:24243840',
  'Source database(s)': 'psi-mi:"MI:0467"(reactome)',
  'Stoichiometry(s) interactor A': 1,
  'Stoichiometry(s) interactor B': 1,
  'Taxid interactor A': 'taxid:9606(Homo sapiens)',
  'Taxid interactor B': 'taxid:9606(Homo sapiens)',
  'Type(s) interactor A': 'psi-mi:"MI:0314"(complex)',
  'Type(s) interactor B': 'psi-mi:"MI:0314"(complex)',
  'Update date': '-',
  'Xref(s) interactor A': 'go:"GO:0005654"(nucleoplasm)',
  'Xref(s) interactor B': 'go:"GO:0005654"(nucleoplasm)'})

In [ ]:


In [22]:
# only keep nodes which have uniprot ids
uniprot_nodes = []
for n in G_reactome.nodes():
    if n.startswith('uniprot'):
        uniprot_nodes.append(n)

len(uniprot_nodes)

G_reactome = nx.subgraph(G_reactome,uniprot_nodes)

In [25]:
# take the largest connected component (to speed up visualization)
G_LCC = max(nx.connected_component_subgraphs(G_reactome), key=len)
len(G_LCC.nodes())


Out[25]:
1325

In [80]:
#mg_temp = mg.querymany(genes_temp,fields='symbol')
# parse the uniprot ids to HGNC gene symbols
uniprot_temp = [n[n.find(':')+1:] for n in G_LCC.nodes()]
mg_temp = mg.querymany(uniprot_temp,scopes='uniprot',species=9606)
uniprot_list = ['uniprotkb:'+x['query'] for x in mg_temp]
symbol_list = [x['symbol'] if 'symbol' in x.keys() else 'uniprotkb:'+x['query'] for x in mg_temp]
uniprot_to_symbol = dict(zip(uniprot_list,symbol_list))
uniprot_to_symbol = pd.Series(uniprot_to_symbol)
uniprot_to_symbol.head()


querying 1-1000...done.
querying 1001-1325...done.
Finished.
12 input query terms found dup hits:
	[(u'P59665', 2), (u'Q71DI3', 3), (u'P49674', 2), (u'P62805', 10), (u'P35226', 2), (u'P68431', 10), (
93 input query terms found no hit:
	[u'P16767', u'P97333', u'P0DJM0', u'Q08048', u'Q9QXE7', u'Q29011', u'P10493', u'P62993-1', u'Q9Y5A7-
Pass "returnall=True" to return complete lists of duplicate or missing query terms.
Out[80]:
uniprotkb:A0MZ66       SHTN1
uniprotkb:A1A4S6    ARHGAP10
uniprotkb:A2RUS2      DENND3
uniprotkb:A6NGB9       WIPF3
uniprotkb:A7MBM2       DISP2
dtype: object

In [81]:
# relabel the nodes with their gene names
G_LCC = nx.relabel_nodes(G_LCC,dict(uniprot_to_symbol))
list(G_LCC.nodes())[0:10]


Out[81]:
[u'HIF3A',
 u'ELANE',
 u'KHSRP',
 u'NCBP1',
 u'NCBP2',
 u'HSPA8',
 u'CELA2A',
 u'uniprotkb:P97333',
 u'B2M',
 u'LIFR']

In [ ]:


In [83]:
# map from interaction type to integer, and add the integer as an edge attribute

int_types = reactome_df['Interaction type(s)'].unique().tolist()
int_types_2_num = dict(zip(int_types,range(len(int_types))))
num_2_int_types = dict(zip(range(len(int_types)),int_types))

int_num_list = []
for e in G_LCC.edges(data=True):
    int_type_temp = e[2]['Interaction type(s)']
    int_num_list.append(int_types_2_num[int_type_temp])
    
# add int_num_list as attribute
int_num_dict = dict(zip(G_LCC.edges(),int_num_list))
nx.set_edge_attributes(G_LCC, name = 'int_type_numeric', values = int_num_dict) # for compatibility with nx 1.11 and 2.0, 
                                                                                # must explicitly define arguments


# set up the edge title for displaying info about interaction type
edge_title = {}
for e in G_LCC.edges():
    edge_title[e]=num_2_int_types[int_num_dict[e]]
    
# add node degree as a node attribute
deg = dict(nx.degree(G_LCC))
nx.set_node_attributes(G_LCC, name = 'degree', values = deg)

In [ ]:


In [84]:
# set the layout with networkx spring_layout
pos = nx.spring_layout(G_LCC)

In [85]:
# plot the Reactome largest connected component with edges color-coded by interaction type
nodes = list(G_LCC.nodes())
numnodes = len(nodes)
edges = list(G_LCC.edges())
numedges = len(edges)
edges_with_data = list(G_LCC.edges(data=True))

# draw the graph here

edge_to_color = visJS2jupyter.visJS_module.return_edge_to_color(G_LCC,field_to_map = 'int_type_numeric',cmap=mpl.cm.Set1_r,alpha=.9)

nodes_dict = [{"id":n,"degree":G_LCC.degree(n),"color":'black',
              "node_size":deg[n],'border_width':0,
              "node_label":n,
              "edge_label":'',
              "title":n,
              "node_shape":'dot',
              "x":pos[n][0]*1000,
              "y":pos[n][1]*1000} for n in nodes
              ]
node_map = dict(zip(nodes,range(numnodes)))  # map to indices for source/target in edges

edges_dict = [{"source":node_map[edges[i][0]], "target":node_map[edges[i][1]], 
              "color":edge_to_color[edges[i]],"title":edge_title[edges[i]]} for i in range(numedges)]

visJS2jupyter.visJS_module.visjs_network(nodes_dict,edges_dict,
                            node_color_border='black',
                            node_size_field='node_size',
                            node_size_transform='Math.sqrt',
                            node_size_multiplier=1,
                            node_border_width=1,
                            node_font_size=40,
                            node_label_field='node_label',
                            edge_width=2,
                            edge_smooth_enabled=False, 
                            edge_smooth_type='continuous',
                            physics_enabled=False,
                            node_scaling_label_draw_threshold=100,
                            edge_title_field='title',
                            graph_title = 'Reactome largest connected component')


Out[85]:
Network | Basic usage

In [ ]: