In [1]:
import os
import csv
import platform
import pandas as pd
import networkx as nx
from graph_partitioning import GraphPartitioning, utils
run_metrics = True
cols = ["WASTE", "CUT RATIO", "EDGES CUT", "TOTAL COMM VOLUME", "Qds", "CONDUCTANCE", "MAXPERM", "RBSE", "NMI", "FSCORE", "FSCORE RELABEL IMPROVEMENT", "LONELINESS"]
#cols = ["WASTE", "CUT RATIO", "EDGES CUT", "TOTAL COMM VOLUME", "Q", "Qds", "CONDUCTANCE", "LONELINESS", "NETWORK PERMANENCE", "NORM. MUTUAL INFO", "EDGE CUT WEIGHT", "FSCORE", "FSCORE RELABEL IMPROVEMENT"]
#cols = ["WASTE", "CUT RATIO", "EDGES CUT", "TOTAL COMM VOLUME", "MODULARITY", "LONELINESS", "NETWORK PERMANENCE", "NORM. MUTUAL INFO", "EDGE CUT WEIGHT", "FSCORE", "FSCORE RELABEL IMPROVEMENT"]
pwd = %pwd
config = {
"DATA_FILENAME": os.path.join(pwd, "data", "predition_model_tests", "network", "network_2.txt"),
"OUTPUT_DIRECTORY": os.path.join(pwd, "output"),
# Set which algorithm is run for the PREDICTION MODEL.
# Either: 'FENNEL' or 'SCOTCH'
"PREDICTION_MODEL_ALGORITHM": "FENNEL",
# Alternativly, read input file for prediction model.
# Set to empty to generate prediction model using algorithm value above.
"PREDICTION_MODEL": "",
"PARTITIONER_ALGORITHM": "FENNEL",
# File containing simulated arrivals. This is used in simulating nodes
# arriving at the shelter. Nodes represented by line number; value of
# 1 represents a node as arrived; value of 0 represents the node as not
# arrived or needing a shelter.
"SIMULATED_ARRIVAL_FILE": os.path.join(pwd,
"data",
"predition_model_tests",
"dataset_1_shift_rotate",
"simulated_arrival_list",
"percentage_of_prediction_correct_100",
"arrival_100_2.txt"
),
# File containing the prediction of a node arriving. This is different to the
# simulated arrivals, the values in this file are known before the disaster.
"PREDICTION_LIST_FILE": os.path.join(pwd,
"data",
"predition_model_tests",
"dataset_1_shift_rotate",
"prediction_list",
"prediction_2.txt"
),
# File containing the geographic location of each node, in "x,y" format.
"POPULATION_LOCATION_FILE": os.path.join(pwd,
"data",
"predition_model_tests",
"coordinates",
"coordinates_2.txt"
),
# Number of shelters
"num_partitions": 4,
# The number of iterations when making prediction model
"num_iterations": 10,
# Percentage of prediction model to use before discarding
# When set to 0, prediction model is discarded, useful for one-shot
"prediction_model_cut_off": .0,
# Alpha value used in one-shot (when restream_batches set to 1)
"one_shot_alpha": 0.5,
"use_one_shot_alpha": False,
# Number of arrivals to batch before recalculating alpha and restreaming.
"restream_batches": 1000,#50,
# When the batch size is reached: if set to True, each node is assigned
# individually as first in first out. If set to False, the entire batch
# is processed and empty before working on the next batch.
"sliding_window": False,
# Create virtual nodes based on prediction model
"use_virtual_nodes": False,
# Virtual nodes: edge weight
"virtual_edge_weight": 1.0,
# Loneliness score parameter. Used when scoring a partition by how many
# lonely nodes exist.
"loneliness_score_param": 1.2,
"compute_metrics_enabled" : True,
####
# GRAPH MODIFICATION FUNCTIONS
# Also enables the edge calculation function.
"graph_modification_functions": True,
# If set, the node weight is set to 100 if the node arrives at the shelter,
# otherwise the node is removed from the graph.
"alter_arrived_node_weight_to_100": False,
# Uses generalized additive models from R to generate prediction of nodes not
# arrived. This sets the node weight on unarrived nodes the the prediction
# given by a GAM.
# Needs POPULATION_LOCATION_FILE to be set.
"alter_node_weight_to_gam_prediction": True,
# Enables edge expansion when graph_modification_functions is set to true
"edge_expansion_enabled": True,
# The value of 'k' used in the GAM will be the number of nodes arrived until
# it reaches this max value.
"gam_k_value": 100,
# Alter the edge weight for nodes that haven't arrived. This is a way to
# de-emphasise the prediction model for the unknown nodes.
"prediction_model_emphasis": 1.0,
# This applies the prediction_list_file node weights onto the nodes in the graph
# when the prediction model is being computed and then removes the weights
# for the cutoff and batch arrival modes
"apply_prediction_model_weights": True,
"SCOTCH_LIB_PATH": os.path.join(pwd, "libs/scotch/macOS/libscotch.dylib")
if 'Darwin' in platform.system()
else "/usr/local/lib/libscotch.so",
# Path to the PaToH shared library
"PATOH_LIB_PATH": os.path.join(pwd, "libs/patoh/lib/macOS/libpatoh.dylib")
if 'Darwin' in platform.system()
else os.path.join(pwd, "libs/patoh/lib/linux/libpatoh.so"),
"PATOH_ITERATIONS": 5,
# Expansion modes: 'avg_node_weight', 'total_node_weight', 'smallest_node_weight'
# 'largest_node_weight'
# add '_squared' or '_sqrt' at the end of any of the above for ^2 or sqrt(weight)
# i.e. 'avg_node_weight_squared
"PATOH_HYPEREDGE_EXPANSION_MODE": 'no_expansion',
# Edge Expansion: average, total, minimum, maximum, product, product_squared, sqrt_product
"EDGE_EXPANSION_MODE" : 'total',
# Whether nodes should be reordered using a centrality metric for optimal node assignments in batch mode
# This is specific to FENNEL and at the moment Leverage Centrality is used to compute new noder orders
"FENNEL_NODE_REORDERING_ENABLED": True,
# The node ordering scheme: PII_LH (political index), LEVERAGE_HL
"FENNEL_NODE_REODERING_SCHEME": 'LEVERAGE_HL',
# Whether the Friend of a Friend scoring system is active during FENNEL partitioning.
# FOAF employs information about a node's friends to determine the best partition when
# this node arrives at a shelter and no shelter has friends already arrived
"FENNEL_FRIEND_OF_A_FRIEND_ENABLED": False,
# Alters how much information to print. Keep it at 1 for this notebook.
# 0 - will print nothing, useful for batch operations.
# 1 - prints basic information on assignments and operations.
# 2 - prints more information as it batches arrivals.
"verbose": 1
}
gp = GraphPartitioning(config)
# Optional: shuffle the order of nodes arriving
# Arrival order should not be shuffled if using GAM to alter node weights
#random.shuffle(gp.arrival_order)
%pylab inline
In [2]:
gp.load_network()
In [3]:
pos = {}
with open(gp.POPULATION_LOCATION_FILE, 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
for node, row in enumerate(reader):
pos[node] = np.array(row).astype(np.float)
plt.figure(figsize=(10, 10))
plt.axis('off')
nx.draw_networkx_nodes(gp.G, pos, node_size=20, cmap=plt.cm.jet, with_labels=False)
nx.draw_networkx_edges(gp.G, pos, alpha=0.5)
plt.show(gp.G)
In [4]:
gp.init_partitioner()
In [5]:
m = gp.prediction_model()
In [6]:
rows = list(range(1, len(m)+1))
df = pd.DataFrame(m, index=rows, columns=cols)
print(df)
pos = nx.spring_layout(gp.G)
plt.figure(figsize=(10, 10))
plt.axis('off')
colours = {0: 'red', 1: 'blue', 2: 'yellow', 3: 'green'}
colour = []
#for n in gp.G.nodes_iter():
# colour += [colours[gp.assignments[n]]]
nx.draw_networkx_nodes(gp.G, pos, node_size=20, node_color=gp.assignments, cmap=plt.cm.jet, with_labels=False)
nx.draw_networkx_edges(gp.G, pos, alpha=0.2)
plt.show(gp.G)
In [7]:
m = gp.assign_cut_off()
In [8]:
rows = list(range(1, len(m)+1))
df = pd.DataFrame(m, index=rows, columns=cols)
print(df)
In [9]:
m = gp.batch_arrival()
In [10]:
rows = list(range(1, len(m)+1))
df = pd.DataFrame(m, index=rows, columns=cols).astype(float)
print(df)
if len(df) > 1:
#t = int(len(rows) / 2)
t = 1
xticks = rows[::t]
df.plot(y=['EDGES CUT', 'TOTAL COMM VOLUME'], xticks=xticks, figsize=(5,4))
fig, axs = plt.subplots(1,6)
df.plot(y=['CUT RATIO'], title='Cut ratio', xticks=xticks, figsize=(12,2), legend=False, ax=axs[0])
df.plot(y=['Qds'], title='Qds', xticks=xticks, figsize=(12,2), legend=False, ax=axs[1])
#df.plot(y=['LONELINESS'], title='Loneliness', xticks=xticks, figsize=(12,2), legend=False, ax=axs[2])
df.plot(y=['MAXPERM'], title='Network permanence', xticks=xticks, figsize=(12,2), legend=False, ax=axs[2])
df.plot(y=['NMI'], title='Norm. Mutual Info', xticks=rows, figsize=(12,2), legend=False, ax=axs[3])
df.plot(y=['FSCORE'], title='Fscore', xticks=rows, figsize=(12,2), legend=False, ax=axs[4])
df.plot(y=['RBSE'], title='RBSE', xticks=rows, figsize=(12,2), legend=False, ax=axs[5])
else:
print("\n\nNot enough data points to plot charts. There is only one row.")
In [11]:
part = dict(zip(gp.G.nodes(), gp.assignments))
values = [part.get(node) for node in gp.G.nodes()]
pos_spring = nx.spring_layout(gp.G, k=0.5, iterations=20)
pos = {}
with open(gp.POPULATION_LOCATION_FILE, 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
for node, row in enumerate(reader):
pos[node] = np.array(row).astype(np.float)
# create colours
cmap = utils.get_cmap(gp.num_partitions)
colours = np.zeros((gp.G.number_of_nodes(), 4), dtype=np.float)
for i,n in enumerate(gp.G.nodes_iter()):
if gp.assignments[n] == -1:
col = [(1.0, 1.0, 1.0, 1.0)]
else:
col = [cmap(gp.assignments[n])]
colours[i] = np.array(col, dtype=np.float)
plt.figure(figsize=(10, 10))
plt.axis('off')
nx.draw_networkx_nodes(gp.G, pos, node_size=20, node_color=colours, cmap=plt.cm.jet, with_labels=False)
nx.draw_networkx_edges(gp.G, pos, alpha=0.5)
plt.show(gp.G)
In [12]:
if run_metrics:
gp.get_metrics()