In [ ]:

    
import os
from shutil import copyfile
import subprocess
from save_embedded_graph27 import main_binary as embed_main
from spearmint_ghsom import main as ghsom_main
import numpy as np
import pickle
from time import time

def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

#root dir
os.chdir("C:\Miniconda3\Jupyter\GHSOM_simplex_dsd")

#save directory
dir = os.path.abspath("parameter_tests")

#number of times to repeat
num_repeats = 10

#number of nodes in communitiy
s1 = 32

#number of links to same community
z1 = 16

#number of nodes in micro community
minc = s1
maxc = s1

#make save directory
if not os.path.isdir(dir):
    os.mkdir(dir)

#change to dir
os.chdir(dir)    

#network file names -- output of network generator
network = "network.dat"
first_level = "community.dat"

#community labels
labels = 'firstlevelcommunity'

#mixing parameter
z2 = 16

#node degree
k = z1 + z2
maxk = k

#mixing factors
mu = float(z2) / k 

num_communities = [3, 4, 5, 6]
parameter_settings = [0.5, 0.6, 0.7, 0.8, 0.9, 1][::-1]

overall_nmi_scores = np.zeros((len(num_communities), len(parameter_settings)))

for i in range(len(num_communities)):
# for k1 in num_communities:
    k1 = num_communities[i]
    
    #number of nodes in the network
    N = k1 * s1
    
    #create directory
    dir_string = os.path.join(dir, str(k1))
    if not os.path.isdir(dir_string):
        os.mkdir(dir_string)
    
    #change working directory    
    os.chdir(dir_string)
    
    for j in range(len(parameter_settings)):
#     for p in parameter_settings:
        
        p = parameter_settings[j]
        
        #ghsom parameters
        params = {'w': 0.0001,
                 'eta': 0.0001,
                 'sigma': 1,
                  'e_sg': p,
                 'e_en': 0.8}
    
        #create directory
        dir_string_p = os.path.join(dir_string, str(p))
        if not os.path.isdir(dir_string_p):
            os.mkdir(dir_string_p)
    
        #change working directory    
        os.chdir(dir_string_p)
        
        if os.path.isfile('nmi_scores.csv'):
            print 'already completed {}/{}, loading scores and continuing'.format(k1, p)
            nmi_scores = np.genfromtxt('nmi_scores.csv', delimiter=',')
            overall_nmi_scores[i,j] = np.mean(nmi_scores, axis=0)
            continue
        
        #copy executable
        ex = "benchmark.exe"   
        if not os.path.isfile(ex):

            source = "C:\\Users\\davem\\Documents\\PhD\\Benchmark Graph Generators\\binary_networks\\benchmark.exe"
            copyfile(source, ex)

        #make benchmark parameter file
        filename = "benchmark_flags_{}_{}.dat".format(k1,p)
        if not os.path.isfile(filename):
            with open(filename,"w") as f:
                f.write("-N {} -k {} -maxk {} -minc {} -maxc {} -mu {}".format(N, k, maxk, minc, maxc, mu))
            print 'written flag file: {}'.format(filename)
            
        #cmd strings
        change_dir_cmd = "cd {}".format(dir_string_p)
        generate_network_cmd = "benchmark -f {}".format(filename)

        #output of cmd
        output_file = open("cmd_output.out", 'w')

        #record NMI scores
        if not os.path.isfile('nmi_scores.pkl'):
            print 'creating new nmi scores array'
            nmi_scores = np.zeros(num_repeats)
        else:
            print 'loading nmi score progress'
            nmi_scores = load_obj('nmi_scores')

        #record running times
        if not os.path.isfile('running_times.pkl'):
            print 'creating new running time array'
            running_times = np.zeros(num_repeats)
        else:
            print 'loading running time progress'
            running_times = load_obj('running_times')
            
        print

        #generate networks
        for r in range(1, num_repeats+1):

            network_rename = "{}_{}".format(r,network)
            first_level_rename = "{}_{}".format(r,first_level)
            gml_filename = 'embedded_network_{}.gml'.format(r)

            if not os.path.isfile(network_rename):

                process = subprocess.Popen(change_dir_cmd + " && " + generate_network_cmd, 
                                        stdout=output_file, 
                                        stderr=output_file, 
                                        shell=True)
                process.wait()
                
                print 'generated graph {}'.format(r)

                os.rename(network, network_rename)
                os.rename(first_level, first_level_rename)
                
                print 'renamed graph {}'.format(r)

            if not os.path.isfile(gml_filename):

                ##embed graph
                embed_main(network_rename, first_level_rename)

                print 'embedded graph {} as {} in {}'.format(r, gml_filename, os.getcwd())

            ##score for this network
            if not np.all(nmi_scores[r-1]):

                start_time = time()

                print 'starting ghsom for: {}/{}/{}'.format(k1, p, gml_filename)
                nmi_score = ghsom_main(params, gml_filename, labels)
                nmi_scores[r-1] = nmi_score

                running_time = time() - start_time
                print 'running time of algorithm: {}'.format(running_time)
                running_times[r-1] = running_time

                #save
                save_obj(nmi_scores, 'nmi_scores')
                save_obj(running_times, 'running_times')

                print 'saved nmi score for network {}: {}'.format(gml_filename, nmi_score)
                print

        ##output nmi scores to csv file
        print 'writing nmi scores and running times to file'
        np.savetxt('nmi_scores.csv',nmi_scores,delimiter=',')
        np.savetxt('running_times.csv',running_times,delimiter=',')
        print
    
print 'DONE'

print 'OVERALL NMI SCORES'
print overall_nmi_scores









    



already completed 3/1, loading scores and continuing
already completed 3/0.9, loading scores and continuing
already completed 3/0.8, loading scores and continuing
loading nmi score progress
loading running time progress

starting ghsom for: 3/0.7/embedded_network_3.gml



In [3]:

    
for scores in overall_nmi_scores:
    
    print scores
    idx = np.argsort(scores)[::-1]
    
    print parameter_settings[idx[0]]









    



[ 0.35632148  0.33486922  0.35229303  0.22688871  0.11498475]
0.2
[ 0.66275255  0.60090522  0.50301436  0.49588778  0.09876066]
0.2
[ 0.75081969  0.73525978  0.71798402  0.51541049  0.36483149]
0.2
[ 0.74710549  0.73859407  0.62326257  0.46137132  0.27593319]
0.2
[ 0.78002338  0.70857     0.65989855  0.53043045  0.33119491]
0.2
[ 0.77031049  0.73689024  0.67103124  0.50923749  0.04067397]
0.2
[ 0.78066878  0.71794796  0.68473928  0.51168871  0.17288067]
0.2