In [6]:
from collections import Counter, OrderedDict
import matplotlib.pyplot as plt

def load_graph(file_name):
    """
    Function that loads a graph given a text
    representation of the graph

    Returns a dictionary that models a graph
    """
    graph_file = open(file_name)
    graph_text = graph_file.read()
    graph_lines = graph_text.split('\n')
    graph_lines = graph_lines[:-1]

    print("Loaded graph with", len(graph_lines), "nodes")

    answer_graph = OrderedDict()
    for line in graph_lines:
        neighbors = line.split(' ')
        node = int(neighbors[0])
        answer_graph[node] = set([])
        for neighbor in neighbors[1:-1]:
            answer_graph[node].add(int(neighbor))

    return answer_graph


citation_graph = load_graph('alg_phys-cite.txt')


def compute_in_degrees(digraph):
    """Returns a dictionary of in-degrees of the nodes
    of the input digraph"""
    degrees = OrderedDict.fromkeys(digraph, 0)
    edges = []
    for value in digraph.values():
        edges += list(value)
    for edge in edges:
        degrees[edge] += 1
    return degrees


def in_degree_distribution(digraph):
    """Returns the degree distribution of a digraph"""
    num_edges = float(len(digraph))
    degrees = compute_in_degrees(digraph)
    normalized_degrees = OrderedDict(Counter(degrees.values()))
    for degree in iter(normalized_degrees):
        normalized_degrees[degree] /= num_edges
    return normalized_degrees


normalized_degree_dist = in_degree_distribution(citation_graph)

# plt.loglog(normalized_degree_dist.keys(), normalized_degree_dist.values(), 'o', color='#634017')
# plt.title("Log-log plot of the normalized\ndegree distribution of paper's citations", fontsize=18, color='#ff8800')
# plt.xlabel("Number of Citations", fontsize=14, color='#ff8800')
# plt.ylabel("Papers", fontsize=14, color='#ff8800')

plt.savefig('Q1.jpg', dpi=300, format='png', transparent=False, orientation='landscape', bbox_inches='tight', pad_inches=0.3)


Loaded graph with 27770 nodes
<matplotlib.figure.Figure at 0x7f2c397c34e0>

In [5]:


In [1]:
# normalized_degree_dist = normalized_in_degree_distribution(er)
# plt.loglog(normalized_degree_dist.keys(), normalized_degree_dist.values(), 'o', color='#634017')

# plt.plot(normalized_degree_dist.keys(), normalized_degree_dist.values(), 'o', color='#634017')

er = nx.erdos_renyi_graph(100, .5, directed=True)
nodes = er.nodes()
edges = er.edges()

from math import log, e
from collections import Counter, OrderedDict
import matplotlib.pyplot as plt
import networkx as nx

def make_digraph(vertices, edges):
    """Returns a digraph from a list of nodes
    and a list of edges represented as tuples"""
    graph = dict()
    for node in vertices:
        graph[node] = set()
    for edge in edges:
        graph[edge[0]].add(edge[1])
    return graph


er = make_digraph(nodes, edges)

def compute_in_degrees(digraph):
    """Returns a dictionary of in-degrees of the nodes
    of the input digraph"""
    degrees = OrderedDict.fromkeys(digraph, 0)
    edges = []
    for value in digraph.values():
        edges += list(value)
    for edge in edges:
        degrees[edge] += 1
    return degrees


def in_degree_distribution(digraph):
    """Returns the degree distribution of a digraph"""
    degrees = compute_in_degrees(digraph)
    return dict(Counter(degrees.values()))

# def normalized_in_degree_distribution(digraph):
#     """Returns the degree distribution of a digraph"""
#     num_edges = float(len(digraph))
#     degrees = compute_in_degrees(digraph)
#     normalized_degrees = OrderedDict(Counter(degrees.values()))
#     for degree in iter(normalized_degrees):
#         normalized_degrees[degree] /= num_edges
#     return normalized_degrees


degree_dist = in_degree_distribution(er)
# plt.loglog(degree_dist.keys(), degree_dist.values(), 'o', color='#000000')
# plt.title("Log-log plot of the normalized\ndegree distribution of ER graph", fontsize=18, color='#ff8800')

# plt.plot(degree_dist.keys(), degree_dist.values(), 'o', color='#ff0000')


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-0d92ffbb9583> in <module>()
      4 # plt.plot(normalized_degree_dist.keys(), normalized_degree_dist.values(), 'o', color='#634017')
      5 
----> 6 er = nx.erdos_renyi_graph(100, .5, directed=True)
      7 nodes = er.nodes()
      8 edges = er.edges()

NameError: name 'nx' is not defined

In [ ]:


In [2]:
n = 27770
m = 352768/n
print(m)

def compute_out_degrees(digraph):
    """Returns a dictionary of out-degrees of the nodes
    of the input digraph"""
    degrees = dict()
    for node in digraph:
        degrees[node] = len(digraph[node])
    return degrees

from collections import Counter, OrderedDict

def load_graph(file_name):
    """
    Function that loads a graph given a text
    representation of the graph

    Returns a dictionary that models a graph
    """
    graph_file = open(file_name)
    graph_text = graph_file.read()
    graph_lines = graph_text.split('\n')
    graph_lines = graph_lines[:-1]

    print("Loaded graph with", len(graph_lines), "nodes")

    answer_graph = OrderedDict()
    for line in graph_lines:
        neighbors = line.split(' ')
        node = int(neighbors[0])
        answer_graph[node] = set([])
        for neighbor in neighbors[1:-1]:
            answer_graph[node].add(int(neighbor))

    return answer_graph


citation_graph = load_graph('alg_phys-cite.txt')

out_degrees = compute_out_degrees(citation_graph)

print(sum(out_degrees.values()))
print(len(out_degrees))
sum(out_degrees.values())/ len(out_degrees)


12.703204897371265
Loaded graph with 27770 nodes
352768
27770
Out[2]:
12.703204897371265

In [ ]:


In [ ]:
"""
Provided code for application portion of module 1

Helper class for implementing efficient version
of DPA algorithm
"""

# general imports
import random
from collections import OrderedDict, Counter
from matplotlib import pyplot as plt


class DPATrial:
    """
    Simple class to encapsulate optimized trials for DPA algorithm

    Maintains a list of node numbers with multiple instances of each number.
    The number of instances of each node number are
    in the same proportion as the desired probabilities

    Uses random.choice() to select a node number from this list for each trial.
    """
    def __init__(self, num_nodes):
        """
        Initialize a DPATrial object corresponding to a
        complete graph with num_nodes nodes

        Note the initial list of node numbers has num_nodes copies of
        each node number
        """
        self._num_nodes = num_nodes
        self._node_numbers = [node for node in range(num_nodes) for dummy_idx in range(num_nodes)]

    def run_trial(self, num_nodes):
        """
        Conduct num_node trials using by applying random.choice()
        to the list of node numbers

        Updates the list of node numbers so that the number of instances of
        each node number is in the same ratio as the desired probabilities

        Returns:
        Set of nodes
        """

        # compute the neighbors for the newly-created node
        new_node_neighbors = set()
        for dummy_idx in range(num_nodes):
            new_node_neighbors.add(random.choice(self._node_numbers))

        # update the list of node numbers so that each node number
        # appears in the correct ratio
        self._node_numbers.append(self._num_nodes)
        self._node_numbers.extend(list(new_node_neighbors))

        # update the number of nodes
        self._num_nodes += 1
        return new_node_neighbors


def make_digraph(vertices, edges):
    """Returns a digraph from a list of nodes
    and a list of edges represented as tuples"""
    graph = dict()
    for node in vertices:
        graph[node] = set()
    for edge in edges:
        graph[edge[0]].add(edge[1])
    return graph


def make_complete_graph(num_nodes):
    """Returns a complete graph"""
    nodes = list(range(num_nodes))
    edges = [(node_1, node_2) for node_1 in nodes for node_2 in nodes if node_1 != node_2]
    return make_digraph(nodes, edges)


def compute_in_degrees(digraph):
    """Returns a dictionary of in-degrees of the nodes
    of the input digraph"""
    degrees = dict.fromkeys(digraph, 0)
    edges = []
    for value in digraph.values():
        edges += list(value)
    for edge in edges:
        degrees[edge] += 1
    return degrees


def normalized_in_degree_distribution(digraph):
    """Returns the degree distribution of a digraph"""
    num_edges = float(len(digraph))
    degrees = compute_in_degrees(digraph)
    normalized_degrees = OrderedDict(Counter(degrees.values()))
    for degree in iter(normalized_degrees):
        normalized_degrees[degree] /= num_edges
    return normalized_degrees


graph = make_complete_graph(13)
trials = DPATrial(13)
for node in range(13, 27769):
    new_nodes = trials.run_trial(13)
    graph[node] = new_nodes


normalized_degree_dist = normalized_in_degree_distribution(graph)

plt.loglog(normalized_degree_dist.keys(), normalized_degree_dist.values(), 'o', color='#634017')
plt.title("Log-log plot of the normalized\nin-degree distribution of the DPA graph\nwith 27770 node and num of fixed edges", fontsize=18, color='#ff8800')
plt.xlabel("Number of Citations", fontsize=14, color='#ff8800')
plt.ylabel("Papers", fontsize=14, color='#ff8800')

plt.savefig('Q4.png', dpi=300, format='png', transparent=False, orientation='landscape', bbox_inches='tight', pad_inches=0.3)