In [2]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from IPython.display import Image
In [73]:
# Regular graph initialization
nodes = 1000
k = 10
adj = np.zeros([nodes, nodes])
for i in range(0, nodes):
for j in range(1, k/2 + 1):
adj[i, (i+j) % nodes] = 1
adj[(i+j) % nodes, i] = 1
edgelist = np.zeros([nodes*k/2, 2])
iterator = 0
for i in range(0, nodes-1):
for j in range(i+1, nodes):
if adj[i, j] == 1:
edgelist[iterator, 0] = i
edgelist[iterator, 1] = j
iterator += 1
G = nx.Graph()
G.add_edges_from(edgelist)
# print adj
# print sum(adj)
# G = nx.random_regular_graph(k, nodes)
G_copy = G
# plt.clf()
# pos=nx.spring_layout(G) # positions for all nodes
# nx.draw(G, pos)
# #nx.draw_networkx_labels(G, pos)
# plt.title('Initial Regular Graph')
# plt.show()
In [74]:
# plt.clf()
# pos=nx.spring_layout(G) # positions for all nodes
# nx.draw(G, pos)
# nx.draw_networkx_labels(G, pos)
# plt.title('Initial Regular Graph')
# plt.show()
prob = list()
prob.append(0.0001)
while prob[-1] < 1:
prob.append(prob[-1]*2.303)
prob = prob[0:-1]
L = np.zeros([1, len(prob)+1])[0]
C = np.zeros([1, len(prob)+1])[0]
G = G_copy
L[0] = nx.average_shortest_path_length(G)
C[0] = nx.average_clustering(G)
count = 5
L_temp = np.zeros([1, count])[0]
C_temp = np.zeros([1, count])[0]
iterator = 1
for p in prob:
print p
for c in range(count):
G = G_copy
for n in G.nodes():
for e in G.edges(n):
if np.random.rand() < p:
## perform rewiring
neighbours = np.array(G.edges(n))[:,1]
new_node = np.random.randint(1, high=nodes+1)
while new_node in neighbours:
new_node = np.random.randint(1, high=nodes+1)
G.remove_edge(e[0], e[1])
G.add_edge(e[0], new_node)
if not nx.is_connected(G):
G.remove_edge(e[0], new_node)
G.add_edge(e[0], e[1])
L_temp[c] = nx.average_shortest_path_length(G)
C_temp[c] = nx.average_clustering(G)
L[iterator] = np.average(L_temp)
C[iterator] = np.average(C_temp)
iterator += 1
## TODO: Capture graph disconnected condition on rewiring
# L /= L[0]
# C /= C[0]
In [78]:
ln = L[0:12]
ln = ln/L[0]
cn = C[0:12]
cn = cn/C[0]
plt.semilogx(prob, ln, 'bo', prob, cn, 'rx')
plt.show()
In [14]:
max_nodes = 500
G = nx.Graph([[0, 1]])
for x in range(2, max_nodes):
# print x
p = np.float32(np.asarray(G.degree().values()))
p = p/sum(G.degree().values())
t = True
while t:
try:
n = np.random.rand()
m = max(filter(lambda x: x< n, p))
t = False
except ValueError:
continue
# p = p*200
G.add_node(x)
try:
G.add_edge(np.where(max(filter(lambda x: x < n, p)) == p)[0][0], x)
except IndexError:
pass
except ValueError:
pass
try:
G.add_edge(np.where(max(filter(lambda x: x < m, p)) == p)[0][0], x)
except IndexError:
pass
except ValueError:
pass
print "Average clustering coefficient"
print nx.average_clustering(G)
print "Characteristic Path length"
print nx.average_shortest_path_length(G)
plt.semilogx(G.degree().values())
plt.title('Degree Distribution')
plt.show()
In [19]:
max_nodes = 500
G = nx.Graph([[0, 1]])
for x in range(2, max_nodes):
# print x
p = np.float32(np.asarray(G.degree().values()))
p = np.square(p)
p = p/sum(G.degree().values())
t = True
while t:
try:
n = np.random.rand()
m = max(filter(lambda x: x< n, p))
t = False
except ValueError:
continue
# p = p*200
G.add_node(x)
try:
G.add_edge(np.where(max(filter(lambda x: x < n, p)) == p)[0][0], x)
except IndexError:
pass
except ValueError:
pass
try:
G.add_edge(np.where(max(filter(lambda x: x < m, p)) == p)[0][0], x)
except IndexError:
pass
except ValueError:
pass
print "Average clustering coefficient"
print nx.average_clustering(G)
print "Characteristic Path length"
print nx.average_shortest_path_length(G)
plt.semilogx(G.degree().values())
plt.title('Degree Distribution')
plt.show()
In [67]:
nodes = 100
degree = 5
G = nx.random_regular_graph(degree, nodes)
X = np.linspace(0, 0.8, 101)
L = np.zeros_like(X)
S = np.zeros_like(X)
nodes_to_del = 0.01*G.number_of_nodes()
for x in X:
i = np.where(X == x)[0][0]
# print i
# G_copy = G
if x != 0:
G.remove_nodes_from(np.random.randint(G.number_of_nodes(), size=nodes_to_del))
try:
L[i] = nx.average_shortest_path_length(G)
except nx.NetworkXError:
L[i] = L[i-1]
S[i] = nx.number_of_nodes(max(nx.connected_component_subgraphs(G)))
# print G.number_of_nodes()
plt.hist(G.degree().values())
plt.title('Degree Sequence')
plt.show()
plt.plot(X, S/nodes)
plt.title('Size of the giant cluster')
plt.show()
plt.plot(X[1:8], L[1:8])
plt.title('Characteristic Path Length')
plt.show()
In [86]:
max_nodes = 100
G = nx.Graph([[0, 1]])
for x in range(2, max_nodes):
p = np.float32(np.asarray(G.degree().values()))
p = p/sum(G.degree().values())
t = True
while t:
try:
n = np.random.rand()
m = max(filter(lambda x: x< n, p))
t = False
except ValueError:
continue
G.add_node(x)
try:
G.add_edge(np.where(max(filter(lambda x: x < n, p)) == p)[0][0], x)
except IndexError:
pass
except ValueError:
pass
try:
G.add_edge(np.where(max(filter(lambda x: x < m, p)) == p)[0][0], x)
except IndexError:
pass
except ValueError:
pass
plt.hist(G.degree().values())
plt.title('Degree Sequence')
plt.show()
X = np.linspace(0, 0.8, 101)
## scale free
L_sf = np.zeros_like(X)
S_sf = np.zeros_like(X)
nodes_to_del = 0.01*G.number_of_nodes()
for x in X:
i = np.where(X == x)[0][0]
# print i
# G_copy = G
if x != 0:
G.remove_nodes_from(np.random.randint(G.number_of_nodes(), size=nodes_to_del))
try:
L_sf[i] = nx.average_shortest_path_length(G)
except nx.NetworkXError:
L_sf[i] = L_sf[i-1]
S_sf[i] = nx.number_of_nodes(max(nx.connected_component_subgraphs(G)))
# print G.number_of_nodes()
plt.plot(X, S_sf/max_nodes)
plt.title('Size of the giant cluster')
plt.show()
plt.plot(X[1:8], L_sf[1:8])
plt.title('Characteristic Path Length')
plt.show()
In [64]:
G = nx.random_regular_graph(degree, nodes)
nx.number_of_nodes(min(nx.connected_component_subgraphs(G)))
Out[64]:
In [76]:
# Load YEAST PPI dataset
fh=open("Y2H_union.txt", 'rb')
G_yeast = nx.read_edgelist(fh)
plt.hist(G_yeast.degree().values(), bins=100)
plt.title('YEAST PPI Degree Distribution')
plt.show()
plt.hist(nx.clustering(G_yeast).values(), bins=100)
plt.title('YEAST PPI Clustering Coefficient Distribution')
plt.show()
# Cytoscape generated map
Image(filename='Y2H_union.jpeg')
Out[76]:
In Yeast Protein Protein Interactome visualization generated from cytoscape, many proteins were seen to interact with a large number of other proteins. Such proteins are vital in the organism's survival and are therefore considered important. A higher degree means that that protein is more important.
In [167]:
# Random deletions in YPI
fh=open("Y2H_union.txt", 'rb')
G_yeast = nx.read_edgelist(fh)
X = np.linspace(0, 0.8, 101)
S_avg = np.zeros_like(X)
S = np.zeros_like(X)
nodes_to_del = int(0.01*G_yeast.number_of_nodes())
for x in X:
i = np.where(X == x)[0][0]
nodelist = G_yeast.nodes()
# print i
# G_copy = G
if x != 0:
l = np.random.randint(G_yeast.number_of_nodes(), size=nodes_to_del)
nodelist = G_yeast.nodes()
for f in range(len(l)):
try:
G_yeast.remove_node(nodelist[l[f]])
except nx.NetworkXError:
pass
# print x
# print G_yeast.number_of_nodes()
ass = nx.connected_components(G_yeast)
sss = sorted(ass, key = len, reverse=True)
S[i] = len(sss[0])
sizes = np.zeros(len(sss)-1)
for k in range(len(sizes)):
sizes[k] = len(sss[k+1])
S_avg[i] = np.mean(sizes)
# print G.number_of_nodes()
plt.plot(X, S/S[0])
plt.title('Size of the largest cluster (Failulres)')
plt.show()
plt.plot(X, S_avg)
plt.title('Average size of fragmented components (Failure)')
plt.show()
In [183]:
# Targeted deletions in YPI
fh=open("Y2H_union.txt", 'rb')
G_yeast = nx.read_edgelist(fh)
X = np.linspace(0, 0.8, 101)
S_avg = np.zeros_like(X)
S = np.zeros_like(X)
nodes_to_del = int(0.01*G_yeast.number_of_nodes())
for x in X:
i = np.where(X == x)[0][0]
nodelist = G_yeast.nodes()
# print i
# G_copy = G
if x != 0:
for f in range(len(l)):
nodelist = G_yeast.nodes()
try:
# max(G_yeast.degree().values())
G_yeast.remove_node(nodelist[G_yeast.degree().values().index(max(G_yeast.degree().values()))])
# print "deleted"
except nx.NetworkXError:
# print "caught"
pass
# print x
# print G_yeast.number_of_nodes()
ass = nx.connected_components(G_yeast)
sss = sorted(ass, key = len, reverse=True)
S[i] = len(sss[0])
sizes = np.zeros(len(sss)-1)
for k in range(len(sizes)):
sizes[k] = len(sss[k+1])
S_avg[i] = np.mean(sizes)
# print G.number_of_nodes()
plt.plot(X, S/S[0])
plt.title('Size of the largest cluster (Attacks)')
plt.show()
plt.plot(X, S_avg)
plt.title('Average size of fragmented components (Attacks)')
plt.show()
In [178]:
fh=open("Y2H_union.txt", 'rb')
G_yeast = nx.read_edgelist(fh)
G_yeast.degree().values().index(max(G_yeast.degree().values()))
Out[178]:
In [175]:
?np.where