# NETWORK BIOLOGY

## Assignment 1

#### Submitted By:

##### Divyanshu Srivastava (MT16125)
``````

In [1]:

# Import dependencies
import numpy as np
import pickle
import networkx as nx
import matplotlib.pyplot as plt
import itertools
from IPython.display import Image

``````

#### Qeustion 1

``````

In [3]:

## Undirected Unweighted Graph
G1 = nx.Graph()
G1.add_edges_from([(1, 2), (1, 3), (3, 4), (5, 6), (7, 8), (2, 5), (8, 3), (5, 8)])
plt.clf()

pos=nx.spring_layout(G1) # positions for all nodes
nx.draw(G1, pos)
nx.draw_networkx_labels(G1, pos)
plt.title('(a) Undirected Unwieghted Graph')
plt.show()

``````
``````

``````
``````

In [4]:

## Undirected Weighted Graph
G2 = nx.MultiGraph()
G2.add_weighted_edges_from([(1, 2, 0.1), (1, 3, 0.7), (3, 4, 1.0), (5, 6, 0.4),
(7, 8, 0.3), (2, 5, 0.3), (8, 3, 1.0), (5, 8, 0.5)])
pos=nx.spring_layout(G2) # positions for all nodes
nx.draw(G2, pos)
edge_labels=dict([((u,v,),d['weight'])
for u,v,d in G2.edges(data=True)])
nx.draw_networkx_edge_labels(G2,pos,edge_labels=edge_labels)
nx.draw_networkx_labels(G2, pos)
plt.title('(b) Undirected Wieghted Graph')
plt.show()

``````
``````

``````
``````

In [5]:

## Directed Unweighted Graph
G3 = nx.DiGraph()
G3.add_edges_from([(1, 2), (1, 3), (3, 4), (5, 6), (7, 8), (2, 5), (8, 3), (5, 8)])
pos=nx.spring_layout(G3) # positions for all nodes
nx.draw(G3, pos)
nx.draw_networkx_labels(G3, pos)
plt.title('(c) Directed Unweighted Graph')
plt.show()

``````
``````

``````
``````

In [6]:

## Directed Weighted Graph
G4 = nx.MultiDiGraph()
G4.add_weighted_edges_from([(1, 2, 0.1), (1, 3, 0.7), (3, 4, 1.0), (5, 6, 0.4),
(7, 8, 0.3), (2, 5, 0.3), (8, 3, 1.0), (5, 8, 0.5)])
pos=nx.spring_layout(G4) # positions for all nodes
nx.draw(G4, pos)
edge_labels=dict([((u,v,),d['weight'])
for u,v,d in G4.edges(data=True)])
nx.draw_networkx_edge_labels(G4,pos,edge_labels=edge_labels)
nx.draw_networkx_labels(G4, pos)
plt.title('(d) Directed Wieghted Graph')
plt.show()

``````
``````

``````
``````

In [7]:

G = {'G1 (UD-UW)': G1, 'G2 (UD-W)': G2,'G3 (D-UW)': G3,'G4 (D-W)': G4}

edgelist = dict()
for key in G:
edgelist[key] = nx.to_edgelist(G[key])

# saving
with open('ques1_stired_networks.pickle', 'w') as f:

``````

#### Qeustion 2

``````

In [8]:

## Fuction to find degree from adjacency matrix

# function definition
"""
This function computes the degree of each node for a given adjacency matrix
For Uniderected Graphs:
It prints node wise degree and returns it.
For Directed Graphs:
It prnts both in degree and out degree and reurns both of them.
"""
# print "Undirected Graph"
deg = np.sum(adj != 0, axis=0)
else:
# print "Directed Graph"
deg = list()
return deg

f = open('ques1_stired_networks.pickle', 'r')

print key
if len(deg) == 2:
print "Indegree: " + str(deg[0])
print "Outdegree: " + str(deg[1])
else:
print "Degree: " + str(deg)

``````
``````

G4 (D-W)
Indegree: [[0 1 2 1 1 1 0 2]]
Outdegree: [[2 1 1 0 2 0 1 1]]
G1 (UD-UW)
Degree: [[2 2 3 1 3 1 1 3]]
G2 (UD-W)
Degree: [[2 2 3 1 3 1 1 3]]
G3 (D-UW)
Indegree: [[0 1 2 1 1 1 0 2]]
Outdegree: [[2 1 1 0 2 0 1 1]]

``````

### Question 3

``````

In [9]:

# Computing A square for Undirected nnweighted graph G1
A = A.astype(np.int)
A_square = A*A

pos=nx.spring_layout(G1) # positions for all nodes
nx.draw(G1, pos)
nx.draw_networkx_labels(G1, pos)
plt.title('Undirected Unwieghted Graph')
plt.show()
print "A_square : \n " + np.str(A_square)

``````
``````

A_square :
[[2 0 0 1 1 0 0 1]
[0 2 1 0 0 1 0 1]
[0 1 3 0 1 0 1 0]
[1 0 0 1 0 0 0 1]
[1 0 1 0 3 0 1 0]
[0 1 0 0 0 1 0 1]
[0 0 1 0 1 0 1 0]
[1 1 0 1 0 1 0 3]]

``````

Clearly, \$A^2\$ as computed above for the undirected unweighted graph G1 (question 1) encodes the information of the number of paths of length 2 from each node to every other node. For Example, Cosider the pair of nodes 1 and 4. There exists a path of length 2 from node 1 to node 4 via node 3, thereby giving \$A^2_{(1, 4)} = 1\$ . Similarly, there exists three paths of length 2 from node 8 back to node 8, via node 3, node 5 and node 7. Thus we have \$A^2_{(8, 8)} = 3\$. In general, \$A^n_{(i, j)}\$ gives the number of paths of length \$n\$ from node \$i\$ to node \$j\$ in the undirected unweighted graph.

### Question 4

``````

In [10]:

## Reading C. Elegans neural network dataset
G_celegans_deg = get_degree(G_celegans_adj) # function written in question 2

# Plotting
plt.hist(np.asarray(G_celegans_deg[0])[0], bins='auto')
plt.title("In Degree Distribution for C. Elegans Neural Network")
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.show()

plt.hist(np.asarray(G_celegans_deg[1])[0], bins='auto')
plt.title("Out Degree Distribution for C. Elegans Neural Network")
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.show()

``````
``````

``````
``````

In [11]:

## Reading C. Elegans neural network dataset

# Plotting
plt.title("In Degree Distribution for C. Elegans Neural Network")
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.show()

``````
``````

``````

### Question 5

``````

In [12]:

## Random Undirected Unweighted Graph

def create_random_graph(n, e):
"""
This function creates a random adjacncy matrix for a simple
undirected unweighted graph with n nodes and e edges.
It returns the randomly computed adjacency matrix.
"""
edges = 0;
while edges < e:
i = np.random.randint(0, n-2)
j = np.random.randint(i+1, n-1)
continue
edges = edges+1

n = 1000
e = 10000

# degree distribution
plt.clf()
plt.hist(deg, bins='auto')
plt.title("Degree Distribution for random graph with " + str(n) + " nodes and " + str(e) + " edges")
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.show()

``````
``````

``````

### Question 6

``````

In [13]:

## Comparing C elegans neural network with random controls
# creating underlyting graph for directed graph

n = 100

random_degs = np.empty([n, nodes])

for i in range(0, n):
random_degs[i] = get_degree(create_random_graph(nodes, edges))

random_avg_degs = np.mean(random_degs, axis=0)

plt.clf()
plt.subplot(121)
plt.hist(np.asarray(G_celegans_underlying_deg[0])[0], bins='auto')
plt.title("Degree Distribution for \nC. Elegans Neural Network \n underlying graph")
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.subplot(122)
plt.hist(random_avg_degs, bins='auto')
plt.title("Average Degree Distribution \n for Random Controls")
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.show()

``````
``````

``````

As seen from above figures, real world networks show power law nature for degree distribution, while random graphs show more like a poisson distribution of degree over nodes. Similar results can be seen for adjacency network of common adjectives and nouns.

``````

In [14]:

n = 100

random_degs = np.empty([n, nodes])

for i in range(0, n):
random_degs[i] = get_degree(create_random_graph(nodes, edges))

random_avg_degs = np.mean(random_degs, axis=0)

plt.clf()
plt.subplot(121)
plt.title("Degree Distribution for \n adjacency network of common \n adjectives and nouns")
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.subplot(122)
plt.hist(random_avg_degs, bins='auto')
plt.title("Average Degree Distribution \n for Random Controls")
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.show()

``````
``````

``````

### Question 7

``````

In [ ]:

## No codes in this question. Cytoscape was used to read and display thee two real world netowrks.

``````

#### C. Elegans Neural Network

``````

In [15]:

Image(filename='Newman_Datasets/celegansneural/celegansneural.gml.png')

``````
``````

Out[15]:

``````

#### Adjacent nouns and words network

``````

In [16]:

``````
``````

Out[16]:

``````

### Question 8

``````

In [17]:

## Clustering Coefficient and Average custering coefficient

"""
This Function computes clustering coefficient of the node n
"""
if (n_deg < 2):
return 0
neighbour_pairs = itertools.combinations(neighbours, 2)
triangles = 0
for i in neighbour_pairs:
triangles += 1
clust_coeff = (float) (triangles / (n_deg*(n_deg-1)/2))
return clust_coeff

"""
This function computes average clustering coefficient of a graph respresented
"""
clust_coeffs = np.zeros([1, nodes])[0]
for i in range(0, nodes):
avg_clust_coeff = np.mean(clust_coeffs)
return avg_clust_coeff

# Using adjacent nouns and words undirected graph for validation
print "Nodewise Clustering Coefficients (for first 10 nodes: "
for i in range(0, 10):
print "\tNode "+ str(i) + ": \t" + str(get_clust_coeff(adj, i))
print "Average Clustering Coefficient (Using NetworkX): " + str(nx.average_clustering(G_adjnoun))

``````
``````

Nodewise Clustering Coefficients (for first 10 nodes:
Node 0: 	0.666666666667
Node 1: 	0.153846153846
Node 2: 	0.132575757576
Node 3: 	0.194444444444
Node 4: 	0.0
Node 5: 	0.285714285714
Node 6: 	0.266666666667
Node 7: 	0.290909090909
Node 8: 	0
Node 9: 	0.102941176471
Average Clustering Coefficient (Using function): 0.17284007981
Average Clustering Coefficient (Using NetworkX): 0.17284007981

``````

### Question 9

``````

In [18]:

## Finding All Pair Shortest Path using Floyd Warshall Algorithm for the aadjacent nouns and adjectives network

"""
This function computes all pair shortest path for the adjacency matrix adj
"""
shortest_paths[shortest_paths == 0] = np.inf
shortest_paths[np.diag_indices_from(shortest_paths)] = 0

for k in range(nodes):
for i in range(nodes):
for j in range(nodes):
shortest_paths[i, j] = min(shortest_paths[i, j] , shortest_paths[i, k]+ shortest_paths[k, j])

return shortest_paths

print "All Pair Shortest path Matrix: \n" + str(all_pair_shortest_paths)
print "\nCharacteristic path length (Self-Computed): \t" + str(np.sum(all_pair_shortest_paths) / (nodes*nodes - nodes))
print "Characteristic path length (Using NetworkX): \t" + str(nx.average_shortest_path_length(G_adjnoun))

``````
``````

All Pair Shortest path Matrix:
[[ 0.  1.  1. ...,  4.  4.  2.]
[ 1.  0.  1. ...,  4.  4.  2.]
[ 1.  1.  0. ...,  3.  3.  1.]
...,
[ 4.  4.  3. ...,  0.  2.  4.]
[ 4.  4.  3. ...,  2.  0.  4.]
[ 2.  2.  1. ...,  4.  4.  0.]]

Characteristic path length (Self-Computed): 	2.53555341055
Characteristic path length (Using NetworkX): 	2.53555341055

``````