notebook.community

Edit and run



In [1]:

    
import os
import pandas as pd
import networkx as nx
from graph_partitioning import GraphPartitioning, utils

cols = ["WASTE", "CUT RATIO", "EDGES CUT", "TOTAL COMM VOLUME", "MODULARITY", "LONELINESS"]
pwd = %pwd

config = {

    "DATA_FILENAME": os.path.join(pwd, "data", "oneshot_fennel_weights.txt"),
    "OUTPUT_DIRECTORY": os.path.join(pwd, "output"),

    # Set which algorithm is run for the PREDICTION MODEL.
    # Either: 'FENNEL' or 'SCOTCH'
    "PREDICTION_MODEL_ALGORITHM": "FENNEL",

    # Alternativly, read input file for prediction model.
    # Set to empty to generate prediction model using algorithm value above.
    "PREDICTION_MODEL": "",

    
    "PARTITIONER_ALGORITHM": "FENNEL",

    # File containing simulated arrivals. This is used in simulating nodes
    # arriving at the shelter. Nodes represented by line number; value of
    # 1 represents a node as arrived; value of 0 represents the node as not
    # arrived or needing a shelter.
    "SIMULATED_ARRIVAL_FILE": os.path.join(pwd, "data", "simulated_arrival.txt"),

    # File containing the prediction of a node arriving. This is different to the",
    # simulated arrivals, the values in this file are known before the disaster.",
    "PREDICTION_LIST_FILE": os.path.join(pwd, "data", "prediction.txt"),

    # File containing the geographic location of each node, in "x,y" format.
    "POPULATION_LOCATION_FILE": os.path.join(pwd, "data", "population_location.csv"),

    # Number of shelters
    "num_partitions": 4,

    # The number of iterations when making prediction model
    "num_iterations": 10,

    # Percentage of prediction model to use before discarding
    # When set to 0, prediction model is discarded, useful for one-shot
    "prediction_model_cut_off": 0.10,

    # Number of arrivals to batch before recalculating alpha and restreaming.
    "restream_batches": 100,

    # When the batch size is reached: if set to True, each node is assigned
    # individually as first in first out. If set to False, the entire batch
    # is processed and empty before working on the next batch.
    "sliding_window": False,

    # Create virtual nodes based on prediction model
    "use_virtual_nodes": False,

    # Virtual nodes: edge weight
    "virtual_edge_weight": 1.0,
    
    # Loneliness score parameter. Used when scoring a partition by how many
    # lonely nodes exist.
    "loneliness_score_param": 1.2,


    ####
    # GRAPH MODIFICATION FUNCTIONS

    # Also enables the edge calculation function.
    "graph_modification_functions": True,

    # If set, the node weight is set to 100 if the node arrives at the shelter,
    # otherwise the node is removed from the graph.
    "alter_arrived_node_weight_to_100": False,

    # Uses generalized additive models from R to generate prediction of nodes not
    # arrived. This sets the node weight on unarrived nodes the the prediction
    # given by a GAM.
    # Needs POPULATION_LOCATION_FILE to be set.
    "alter_node_weight_to_gam_prediction": False,

    # The value of 'k' used in the GAM will be the number of nodes arrived until
    # it reaches this max value.
    "gam_k_value": 100,

    # Alter the edge weight for nodes that haven't arrived. This is a way to
    # de-emphasise the prediction model for the unknown nodes.
    "prediction_model_emphasis": 1.0,
    
    
    # Path to the scotch shared library
    "SCOTCH_LIB_PATH": "/usr/local/lib/libscotch.so",

    "SCOTCH_PYLIB_REL_PATH": os.path.join(pwd, "..", "csap-graphpartitioning", "src", "python"),

    # Alters how much information to print. Keep it at 0 for this notebook.
    # 0 - will print nothing, useful for batch operations.
    # 1 - prints basic information on assignments and operations.
    # 2 - prints more information as it batches arrivals.
    "verbose": 0
}

%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
test_dir = os.path.join(pwd, "data", "predition_model_tests")

print("\nProcessing...")
for i in list(range(1,5)): #list(range(1,1001)):
    network_file = "network_{}.txt".format(i)
    arrival_file = "arrival_0_{}.txt".format(i)
    coordinates_file = "coordinates_{}.txt".format(i)
    prediction_file = "prediction_{}.txt".format(i)

    print(".", end='', flush=True)
    #print("\nProcessing...",
    #      "\n\tnetwork file:", network_file,
    #      "\n\tarrival file:", arrival_file,
    #      "\n\tcoordinates file:", coordinates_file)

    config["DATA_FILENAME"] = os.path.join(test_dir,
                                           "network",
                                           network_file)

    config["SIMULATED_ARRIVAL_FILE"] = os.path.join(test_dir,
                                                    "dataset_1_shift_rotate",
                                                    "simulated_arrival_list",
                                                    "percentage_of_prediction_correct_0",
                                                    arrival_file)

    config["POPULATION_LOCATION_FILE"] = os.path.join(test_dir,
                                                      "coordinates",
                                                      coordinates_file)

    config["PREDICTION_LIST_FILE"] = os.path.join(test_dir,
                                                  "dataset_1_shift_rotate",
                                                  "prediction_list",
                                                  prediction_file)

    gp = GraphPartitioning(config)
    gp.load_network()
    gp.init_partitioner()
    gp.prediction_model()
    gp.assign_cut_off()

    # run simulation
    gp.batch_arrival()

    gp.get_metrics()

print("\nComplete!")









    



Processing...
....
Complete!



In [3]:

    
def wavg(group, avg_name, weight_name):
    """
    Weighted average
    """
    d = group[avg_name]
    w = group[weight_name]
    try:
        return (d * w).sum() / w.sum()
    except ZeroDivisionError:
        return None



In [4]:

    
metrics = pd.read_csv(os.path.join(config['OUTPUT_DIRECTORY'], 'metrics.csv'))
nonover = pd.read_csv(os.path.join(config['OUTPUT_DIRECTORY'], 'metrics-partitions-nonoverlapping.csv'))
over = pd.read_csv(os.path.join(config['OUTPUT_DIRECTORY'], 'metrics-partitions-overlapping.csv'))

metrics.set_index(['file'], inplace=True)

# get weighted averages.
# use population of the partition as the weight

data = {
    'loneliness_score (wavg)':  nonover.groupby("file").apply(wavg, "loneliness_score", "population"),
    'network_permanence (wavg)': nonover.groupby("file").apply(wavg, "network_permanence", "population"),
}
nonover_wavg = pd.DataFrame.from_dict(data)

data = {
    'Q (wavg)': over.groupby("file").apply(wavg, "Q", "population"),
    'NQ (wavg)': over.groupby("file").apply(wavg, "NQ", "population"),
    'Qds (wavg)': over.groupby("file").apply(wavg, "Qds", "population"),
    'intraEdges (wavg)': over.groupby("file").apply(wavg, "intraEdges", "population"),
    'interEdges (wavg)': over.groupby("file").apply(wavg, "interEdges", "population"),
    'intraDensity (wavg)': over.groupby("file").apply(wavg, "intraDensity", "population"),
    'modularity degree (wavg)': over.groupby("file").apply(wavg, "modularity degree", "population"),
    'conductance (wavg)': over.groupby("file").apply(wavg, "conductance", "population"),
    'expansion (wavg)': over.groupby("file").apply(wavg, "expansion", "population"),
    'contraction (wavg)': over.groupby("file").apply(wavg, "contraction", "population"),
    'fitness (wavg)': over.groupby("file").apply(wavg, "fitness", "population"),
    'QovL (wavg)': over.groupby("file").apply(wavg, "QovL", "population")
}
over_wavg = pd.DataFrame.from_dict(data)

# compile a single dataframe with all metrics and write to csv file
cols = ['num_partitions', 'num_iterations', 'prediction_model_cut_off',
        'one_shot_alpha', 'restream_batches', 'use_virtual_nodes',
        'virtual_edge_weight', 'edges_cut', 'waste',
        'cut_ratio', 'total_communication_volume',
        'network_permanence', 'network_permanence (wavg)',
        'loneliness_score (wavg)',
        'Q', 'Q (wavg)',
        'NQ', 'NQ (wavg)', 
        'Qds', 'Qds (wavg)',
        'intraEdges', 'intraEdges (wavg)',
        'interEdges', 'interEdges (wavg)',
        'intraDensity', 'intraDensity (wavg)',
        'modularity degree', 'modularity degree (wavg)',
        'conductance', 'conductance (wavg)',
        'expansion', 'expansion (wavg)',
        'contraction', 'contraction (wavg)',
        'fitness', 'fitness (wavg)',
        'QovL', 'QovL (wavg)'
       ]

combined_metrics = metrics.join(nonover_wavg).join(over_wavg)[cols]
combined_csv = os.path.join(config['OUTPUT_DIRECTORY'], 'metrics-combined.csv')
combined_metrics.to_csv(combined_csv)
print("Combined metrics saved to", combined_csv)









    



Combined metrics saved to /home/sami/repos/smbwebs/graph-partitioning/output/metrics-combined.csv



In [ ]: