In [1]:
# import some useful packages
import numpy as np
import matplotlib.pyplot as plt
import seaborn
import networkx as nx
import pandas as pd
import random
# latex rendering of text in graphs
import matplotlib as mpl
mpl.rc('text', usetex = False)
mpl.rc('font', family = 'serif')
import sys
#sys.path.append('/Users/brin/Google Drive/UCSD/genome_interpreter_docs/barabasi_disease_distances/barabasi_incomplete_interactome/source/')
sys.path.append('source/')
import separation
import plotting_results
import network_prop
import imp
imp.reload(separation)
imp.reload(plotting_results)
imp.reload(network_prop)
% matplotlib inline
Interactome downloaded from supplemental materials of http://science.sciencemag.org/content/347/6224/1257601 (Menche, Jörg, et al. "Uncovering disease-disease relationships through the incomplete interactome." Science 347.6224 (2015): 1257601.)
In [2]:
# load the interactome network (use their default network)
Gint = separation.read_network('data/DataS1_interactome.tsv')
# remove self links
separation.remove_self_links(Gint)
# Get rid of nodes with no edges
nodes_degree = Gint.degree()
nodes_0 = [n for n in nodes_degree.keys() if nodes_degree[n]==0]
Gint.remove_nodes_from(nodes_0)
In [3]:
genes_KID = separation.read_gene_list('kidney_diseases.txt')
genes_EPI = separation.read_gene_list('epilepsy_genes.txt')
# set disease name and focal genes here
dname = 'kidney'
genes_focal = genes_KID
Network propagation simulation follows methods in http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1000641 (Vanunu, Oron, et al. "Associating genes and protein complexes with disease via network propagation." PLoS Comput Biol 6.1 (2010): e1000641.)
In [4]:
Wprime= network_prop.normalized_adj_matrix(Gint)
In [5]:
seed_nodes = list(np.intersect1d(list(genes_focal),Gint.nodes()))
alpha=.5 # this parameter controls how fast the heat dissipates
Fnew = network_prop.network_propagation(Gint,Wprime,seed_nodes,alpha=alpha,num_its=20)
In [6]:
Fsort = Fnew.sort(ascending=False)
top_N = 200
F_top_N = Fnew.head(top_N)
gneigh_top_N = list(F_top_N.index)
G_neigh_N = Gint.subgraph(gneigh_top_N)
# pull out some useful subgraphs for use in plotting functions
# find genes which are neighbors of seed genes
genes_in_graph = list(np.intersect1d(Gint.nodes(),list(genes_focal)))
G_focal=G_neigh_N.subgraph(list(genes_in_graph))
Set the node positions using nx.spring_layout. Parameter k controls default spacing between nodes (lower k brings the nodes closer together, higher k pushes them apart)
In [7]:
pos = nx.spring_layout(G_neigh_N,k=.03) # set the node positions
In [8]:
plotting_results.plot_network_2_diseases(G_neigh_N,pos,G_focal,d1name=dname,saveflag=False)
nx.draw_networkx_nodes(G_neigh_N,pos=pos,node_color=Fnew[G_neigh_N.nodes()],cmap='YlOrRd',node_size=30,
vmin=0,vmax=max(Fnew)/3)
nx.draw_networkx_edges(G_neigh_N,pos=pos,edge_color='white',alpha=.2)
plt.title('Top '+str(top_N)+' genes propagated from '+dname+': alpha = ' + str(alpha),color='white',fontsize=16,y=.95)
plt.savefig('heat_prop_network.png',dpi=200) # save the figure here
In [9]:
import mygene
mg = mygene.MyGeneInfo()
In [10]:
# print out the names of the top N genes (that don't include the seed set)
focal_group = list(F_top_N.index)
focal_group = np.setdiff1d(focal_group,list(genes_focal))
top_heat_focal = F_top_N[focal_group]
focal_temp = mg.getgenes(focal_group)
focal_entrez_names = [str(x['entrezgene']) for x in focal_temp if 'symbol' in x.keys()]
focal_gene_names = [str(x['symbol']) for x in focal_temp if 'symbol' in x.keys()]
top_heat_df = pd.DataFrame({'gene_symbol':focal_gene_names,'heat':top_heat_focal[focal_entrez_names]})
top_heat_df = top_heat_df.sort('heat',ascending=False)
# print the top 25 related genes, along with their heat values
top_heat_df.head(25)
Out[10]:
In [ ]: