In [6]:
from collections import Counter, OrderedDict
import matplotlib.pyplot as plt
def load_graph(file_name):
"""
Function that loads a graph given a text
representation of the graph
Returns a dictionary that models a graph
"""
graph_file = open(file_name)
graph_text = graph_file.read()
graph_lines = graph_text.split('\n')
graph_lines = graph_lines[:-1]
print("Loaded graph with", len(graph_lines), "nodes")
answer_graph = OrderedDict()
for line in graph_lines:
neighbors = line.split(' ')
node = int(neighbors[0])
answer_graph[node] = set([])
for neighbor in neighbors[1:-1]:
answer_graph[node].add(int(neighbor))
return answer_graph
citation_graph = load_graph('alg_phys-cite.txt')
def compute_in_degrees(digraph):
"""Returns a dictionary of in-degrees of the nodes
of the input digraph"""
degrees = OrderedDict.fromkeys(digraph, 0)
edges = []
for value in digraph.values():
edges += list(value)
for edge in edges:
degrees[edge] += 1
return degrees
def in_degree_distribution(digraph):
"""Returns the degree distribution of a digraph"""
num_edges = float(len(digraph))
degrees = compute_in_degrees(digraph)
normalized_degrees = OrderedDict(Counter(degrees.values()))
for degree in iter(normalized_degrees):
normalized_degrees[degree] /= num_edges
return normalized_degrees
normalized_degree_dist = in_degree_distribution(citation_graph)
# plt.loglog(normalized_degree_dist.keys(), normalized_degree_dist.values(), 'o', color='#634017')
# plt.title("Log-log plot of the normalized\ndegree distribution of paper's citations", fontsize=18, color='#ff8800')
# plt.xlabel("Number of Citations", fontsize=14, color='#ff8800')
# plt.ylabel("Papers", fontsize=14, color='#ff8800')
plt.savefig('Q1.jpg', dpi=300, format='png', transparent=False, orientation='landscape', bbox_inches='tight', pad_inches=0.3)
In [5]:
In [1]:
# normalized_degree_dist = normalized_in_degree_distribution(er)
# plt.loglog(normalized_degree_dist.keys(), normalized_degree_dist.values(), 'o', color='#634017')
# plt.plot(normalized_degree_dist.keys(), normalized_degree_dist.values(), 'o', color='#634017')
er = nx.erdos_renyi_graph(100, .5, directed=True)
nodes = er.nodes()
edges = er.edges()
from math import log, e
from collections import Counter, OrderedDict
import matplotlib.pyplot as plt
import networkx as nx
def make_digraph(vertices, edges):
"""Returns a digraph from a list of nodes
and a list of edges represented as tuples"""
graph = dict()
for node in vertices:
graph[node] = set()
for edge in edges:
graph[edge[0]].add(edge[1])
return graph
er = make_digraph(nodes, edges)
def compute_in_degrees(digraph):
"""Returns a dictionary of in-degrees of the nodes
of the input digraph"""
degrees = OrderedDict.fromkeys(digraph, 0)
edges = []
for value in digraph.values():
edges += list(value)
for edge in edges:
degrees[edge] += 1
return degrees
def in_degree_distribution(digraph):
"""Returns the degree distribution of a digraph"""
degrees = compute_in_degrees(digraph)
return dict(Counter(degrees.values()))
# def normalized_in_degree_distribution(digraph):
# """Returns the degree distribution of a digraph"""
# num_edges = float(len(digraph))
# degrees = compute_in_degrees(digraph)
# normalized_degrees = OrderedDict(Counter(degrees.values()))
# for degree in iter(normalized_degrees):
# normalized_degrees[degree] /= num_edges
# return normalized_degrees
degree_dist = in_degree_distribution(er)
# plt.loglog(degree_dist.keys(), degree_dist.values(), 'o', color='#000000')
# plt.title("Log-log plot of the normalized\ndegree distribution of ER graph", fontsize=18, color='#ff8800')
# plt.plot(degree_dist.keys(), degree_dist.values(), 'o', color='#ff0000')
In [ ]:
In [2]:
n = 27770
m = 352768/n
print(m)
def compute_out_degrees(digraph):
"""Returns a dictionary of out-degrees of the nodes
of the input digraph"""
degrees = dict()
for node in digraph:
degrees[node] = len(digraph[node])
return degrees
from collections import Counter, OrderedDict
def load_graph(file_name):
"""
Function that loads a graph given a text
representation of the graph
Returns a dictionary that models a graph
"""
graph_file = open(file_name)
graph_text = graph_file.read()
graph_lines = graph_text.split('\n')
graph_lines = graph_lines[:-1]
print("Loaded graph with", len(graph_lines), "nodes")
answer_graph = OrderedDict()
for line in graph_lines:
neighbors = line.split(' ')
node = int(neighbors[0])
answer_graph[node] = set([])
for neighbor in neighbors[1:-1]:
answer_graph[node].add(int(neighbor))
return answer_graph
citation_graph = load_graph('alg_phys-cite.txt')
out_degrees = compute_out_degrees(citation_graph)
print(sum(out_degrees.values()))
print(len(out_degrees))
sum(out_degrees.values())/ len(out_degrees)
Out[2]:
In [ ]:
In [ ]:
"""
Provided code for application portion of module 1
Helper class for implementing efficient version
of DPA algorithm
"""
# general imports
import random
from collections import OrderedDict, Counter
from matplotlib import pyplot as plt
class DPATrial:
"""
Simple class to encapsulate optimized trials for DPA algorithm
Maintains a list of node numbers with multiple instances of each number.
The number of instances of each node number are
in the same proportion as the desired probabilities
Uses random.choice() to select a node number from this list for each trial.
"""
def __init__(self, num_nodes):
"""
Initialize a DPATrial object corresponding to a
complete graph with num_nodes nodes
Note the initial list of node numbers has num_nodes copies of
each node number
"""
self._num_nodes = num_nodes
self._node_numbers = [node for node in range(num_nodes) for dummy_idx in range(num_nodes)]
def run_trial(self, num_nodes):
"""
Conduct num_node trials using by applying random.choice()
to the list of node numbers
Updates the list of node numbers so that the number of instances of
each node number is in the same ratio as the desired probabilities
Returns:
Set of nodes
"""
# compute the neighbors for the newly-created node
new_node_neighbors = set()
for dummy_idx in range(num_nodes):
new_node_neighbors.add(random.choice(self._node_numbers))
# update the list of node numbers so that each node number
# appears in the correct ratio
self._node_numbers.append(self._num_nodes)
self._node_numbers.extend(list(new_node_neighbors))
# update the number of nodes
self._num_nodes += 1
return new_node_neighbors
def make_digraph(vertices, edges):
"""Returns a digraph from a list of nodes
and a list of edges represented as tuples"""
graph = dict()
for node in vertices:
graph[node] = set()
for edge in edges:
graph[edge[0]].add(edge[1])
return graph
def make_complete_graph(num_nodes):
"""Returns a complete graph"""
nodes = list(range(num_nodes))
edges = [(node_1, node_2) for node_1 in nodes for node_2 in nodes if node_1 != node_2]
return make_digraph(nodes, edges)
def compute_in_degrees(digraph):
"""Returns a dictionary of in-degrees of the nodes
of the input digraph"""
degrees = dict.fromkeys(digraph, 0)
edges = []
for value in digraph.values():
edges += list(value)
for edge in edges:
degrees[edge] += 1
return degrees
def normalized_in_degree_distribution(digraph):
"""Returns the degree distribution of a digraph"""
num_edges = float(len(digraph))
degrees = compute_in_degrees(digraph)
normalized_degrees = OrderedDict(Counter(degrees.values()))
for degree in iter(normalized_degrees):
normalized_degrees[degree] /= num_edges
return normalized_degrees
graph = make_complete_graph(13)
trials = DPATrial(13)
for node in range(13, 27769):
new_nodes = trials.run_trial(13)
graph[node] = new_nodes
normalized_degree_dist = normalized_in_degree_distribution(graph)
plt.loglog(normalized_degree_dist.keys(), normalized_degree_dist.values(), 'o', color='#634017')
plt.title("Log-log plot of the normalized\nin-degree distribution of the DPA graph\nwith 27770 node and num of fixed edges", fontsize=18, color='#ff8800')
plt.xlabel("Number of Citations", fontsize=14, color='#ff8800')
plt.ylabel("Papers", fontsize=14, color='#ff8800')
plt.savefig('Q4.png', dpi=300, format='png', transparent=False, orientation='landscape', bbox_inches='tight', pad_inches=0.3)