notebook.community

Edit and run



In [1]:

    
import os
from shutil import copyfile
import subprocess
from save_embedded_graph27 import main_hierarchical as embed_main
from spearmint_ghsom import main as ghsom_main
import numpy as np
import pickle
from time import time

def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

#root dir
os.chdir("C:\Miniconda3\Jupyter\GHSOM_simplex_dsd")

#save directory
dir = os.path.abspath("synthetic_benchmarks")

#number of times to repeat
num_repeats = 100

#number of micro communities
k1 = 16
#number of macro communities
k2 = 4
#number of nodes in micro communitiy
s1 = 32
#number of nodes in macro community = s1 * s2
s2 = 4
#number of nodes in the network
N = s1 * s2 * k2
#number of links to same micro community
z1 = 16
#number of links to same macro community
z2 = 16
#nuber of nodes in micro community
minc = s1
maxc = s1
#number of nodes in macro community
minC = s1 * s2
maxC = s1 * s2

#make save directory
if not os.path.isdir(dir):
    os.mkdir(dir)

#change to dir
os.chdir(dir)    

#network file names -- output of network generator
network = "network.dat"
first_level = "community_first_level.dat"
second_level = "community_second_level.dat"

#community labels
labels = 'firstlevelcommunity,secondlevelcommunity'

#ghsom parameters
params = {'w': 0.0001,
         'eta': 0.0001,
         'sigma': 1,
         'e_sg': 0.8,
         'e_en': 0.8}

mixing_factors = [16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36]

overall_nmi_scores = np.zeros((len(mixing_factors), num_repeats, 2))

for i in range(len(mixing_factors)):
    
    z3 = mixing_factors[i]
    
    #node degree
    k = z1 + z2 + z3
    maxk = k
    
    #mixing factors
    mu1 = float(z3) / k
    mu2 = float(z2) / k 
    
    #create directory
    dir_string = os.path.join(dir, str(z3))
    if not os.path.isdir(dir_string):
        os.mkdir(dir_string)
    
    #change working directory    
    os.chdir(dir_string)
    
    if os.path.isfile('nmi_scores.csv'):
        print 'already completed {}, loading nmi scores and continuing'.format(z3)
        nmi_scores = np.genfromtxt('nmi_scores.csv', delimiter=',')
        overall_nmi_scores[i] = nmi_scores
        continue
    
    #copy executable
    ex = "hbenchmark.exe"   
    if not os.path.isfile(ex):
        
        source = "C:\Users\davem\Documents\PhD\Benchmark Graph Generators\hierarchical_bench2_2\hbenchmark.exe"
        copyfile(source, ex)
        
    #make benchmark parameter file
    filename = "benchmark_flags_{}.dat".format(z3)
    if not os.path.isfile(filename):
        with open(filename,"w") as f:
            f.write("-N {} -k {} -maxk {} -minc {} -maxc {} -minC {} -maxC {} -mu1 {} -mu2 {}".format(N, k, maxk, minc, maxc, minC, maxC, mu1, mu2))
    
    #cmd strings
    change_dir_cmd = "cd {}".format(dir_string)
    generate_network_cmd = "hbenchmark -f {}".format(filename)
    
    #output of cmd
    output_file = open("cmd_output.out", 'w')
    
    #record NMI scores
    if not os.path.isfile('nmi_scores.pkl'):
        print 'creating new nmi scores array'
        nmi_scores = np.zeros((num_repeats, len(labels.split(','))))
    else:
        print 'loading nmi score progress'
        nmi_scores = load_obj('nmi_scores')
        
    #record running times
    if not os.path.isfile('running_times.pkl'):
        print 'creating new running time array'
        running_times = np.zeros(num_repeats)
    else:
        print 'loading running time progress'
        running_times = load_obj('running_times')
    
    #generate networks
    for r in range(1, num_repeats+1):
        
        network_rename = "{}_{}".format(r,network)
        first_level_rename = "{}_{}".format(r,first_level)
        second_level_rename = "{}_{}".format(r,second_level)
        gml_filename = 'embedded_network_{}.gml'.format(r)
        
        if not os.path.isfile(network_rename):
        
            process = subprocess.Popen(change_dir_cmd + " && " + generate_network_cmd, 
                                    stdout=output_file, 
                                    stderr=output_file, 
                                    shell=True)
            process.wait()

            os.rename(network, network_rename)
            os.rename(first_level, first_level_rename)
            os.rename(second_level, second_level_rename)
            
        if not os.path.isfile(gml_filename):
            
            ##embed graph
            embed_main(network_rename, first_level_rename, second_level_rename)
            
            print 'created {} in {}'.format(gml_filename, os.getcwd())
            
        ##score for this network
        if not np.all(nmi_scores[r-1]):
            
            start_time = time()
            
            print 'starting ghsom for: {}/{}'.format(z3, gml_filename)
            nmi_score, communities_detected = ghsom_main(params, gml_filename, labels, 10000)
            nmi_scores[r-1] = nmi_score
            
            running_time = time() - start_time
            print 'running time of algorithm: {}'.format(running_time)
            running_times[r-1] = running_time
            
            #save
            save_obj(nmi_scores, 'nmi_scores')
            save_obj(running_times, 'running_times')
            
            print 'saved nmi score for network {}: {}'.format(gml_filename, nmi_score)
            print
            
    ##output nmi scores to csv file
    print 'writing nmi scores and running times to file'
    np.savetxt('nmi_scores.csv',nmi_scores,delimiter=',')
    np.savetxt('running_times.csv',running_times,delimiter=',')
    
print 'DONE'

print 'OVERALL NMI SCORES'
print overall_nmi_scores









    



already completed 16, loading nmi scores and continuing
already completed 18, loading nmi scores and continuing
already completed 20, loading nmi scores and continuing
already completed 22, loading nmi scores and continuing
already completed 24, loading nmi scores and continuing
already completed 26, loading nmi scores and continuing
already completed 28, loading nmi scores and continuing
already completed 30, loading nmi scores and continuing
already completed 32, loading nmi scores and continuing
already completed 34, loading nmi scores and continuing
already completed 36, loading nmi scores and continuing
DONE
OVERALL NMI SCORES
[[[ 0.99175515  0.97777752]
  [ 0.99175515  0.9808856 ]
  [ 0.99175515  0.98210264]
  ..., 
  [ 1.          0.99056249]
  [ 0.99175515  0.98427047]
  [ 0.99175515  0.98741652]]

 [[ 1.          0.98047967]
  [ 0.99175515  0.98428166]
  [ 0.99175515  0.98741652]
  ..., 
  [ 0.99175515  0.98746914]
  [ 1.          0.98588353]
  [ 0.99175515  0.9905736 ]]

 [[ 0.99175515  0.98443377]
  [ 0.99175515  0.99058472]
  [ 0.99175515  0.98428165]
  ..., 
  [ 0.99175515  0.98741652]
  [ 0.99175515  0.98275361]
  [ 0.99175515  0.98901346]]

 ..., 
 [[ 0.99175515  0.98588354]
  [ 0.99175515  0.97653324]
  [ 0.99175515  0.98428165]
  ..., 
  [ 0.99175515  0.98427047]
  [ 0.99175515  0.98742766]
  [ 0.99175515  0.98902466]]

 [[ 0.99175515  0.99215448]
  [ 0.99175515  0.98587237]
  [ 0.99175515  0.98588353]
  ..., 
  [ 1.          0.98741652]
  [ 0.99175515  0.98902459]
  [ 0.99175515  0.98902466]]

 [[ 0.99175515  0.98427047]
  [ 1.          0.99056249]
  [ 0.94331312  0.98132673]
  ..., 
  [ 0.87382722  0.90503445]
  [ 0.93205266  0.85479979]
  [ 0.93310021  0.88711499]]]



In [13]:

    
import matplotlib.pyplot as plt
import numpy as np


os.chdir("C:\Miniconda3\Jupyter\GHSOM_simplex_dsd")

first_level = np.genfromtxt('first_level.csv',delimiter=',')
first_level = first_level[1:]
first_level[:,0] = np.rint(first_level[:,0])

first_level_m = np.zeros((len(first_level) / 3, 4))
for i in range(len(first_level) / 3):
    first_level_m[i] = np.mean(first_level[3*i:3*i+2], axis=0)
first_level_m[first_level_m > 1] = 1

second_level = np.genfromtxt('second_level.csv',delimiter=',')
second_level = second_level[1:]
second_level[:,0] = np.rint(second_level[:,0])

second_level_m = np.zeros((len(second_level) / 3, 4))
for i in range(len(second_level) / 3):
    second_level_m[i] = np.mean(second_level[3*i:3*i+2], axis=0)
second_level_m[second_level_m > 1] = 1

means = np.zeros((len(mixing_factors), 2))
ses = np.zeros((len(mixing_factors), 2))

for i in range(len(mixing_factors)):
# for score in overall_nmi_scores:
    score = overall_nmi_scores[i]
    m = np.mean(score, axis=0)
    means[i] = m
    print m
    sd = np.std(score, axis=0)
    se = sd / np.sqrt(num_repeats)
    ses[i] = se
    print se
    print

plt.errorbar(mixing_factors, means[:, 0], yerr=ses[:, 0], fmt='-o')
plt.errorbar(mixing_factors, means[:, 1], yerr=ses[:, 1], fmt='-o')
plt.axis([16, 36, 0.95, 1])
plt.legend(['First level','Second level'], loc=3)
plt.xlabel('Mixing parameter $z_3$')
plt.ylabel('Normalized mutual information')

plt.show()

##first level
plt.errorbar(mixing_factors, means[:, 0], yerr=ses[:, 0], fmt='-o')
plt.plot(mixing_factors, first_level_m[:, 1], '-rx')
plt.plot(mixing_factors, first_level_m[:, 2], '-gx')
plt.plot(mixing_factors, first_level_m[:, 3], '-bx')
plt.axis([16, 36, 0.4, 1])
plt.legend(['FM', 'FUC', 'PMC', 'GHSOM'], loc=3)
plt.xlabel('Mixing parameter $z_3$')
plt.ylabel('Normalized mutual information')
plt.title('First level')
# plt.title('mixing factor vs. NMI score for both levels of community')

plt.show()

#second level
plt.errorbar(mixing_factors, means[:, 1], yerr=ses[:, 0], fmt='-o')
plt.plot(mixing_factors, second_level_m[:, 1], '-rx')
plt.plot(mixing_factors, second_level_m[:, 2], '-gx')
plt.plot(mixing_factors, second_level_m[:, 3], '-bx')
plt.axis([16, 36, 0.4, 1])
plt.legend(['FM', 'FUC', 'PMC', 'GHSOM'], loc=3)
plt.xlabel('Mixing parameter $z_3$')
plt.ylabel('Normalized mutual information')
plt.title('Second level')



plt.show()









    



[ 0.99356902  0.97989865]
[ 0.00034154  0.0007498 ]

[ 0.99348657  0.98440784]
[ 0.00033582  0.00054881]

[ 0.99290943  0.98582934]
[ 0.00028609  0.00041675]

[ 0.99247662  0.97924011]
[ 0.00131861  0.00226556]

[ 0.9902869   0.97872596]
[ 0.00193363  0.00244965]

[ 0.99113687  0.96862301]
[ 0.00182047  0.00365033]

[ 0.99214682  0.98020752]
[ 0.00130966  0.00211052]

[ 0.9906281  0.9825737]
[ 0.00193497  0.00209826]

[ 0.993162    0.98497286]
[ 0.00061329  0.00102487]

[ 0.98849682  0.98630473]
[ 0.002418    0.00071384]

[ 0.9772096   0.97276342]
[ 0.00355746  0.00338161]



In [17]:

    
import numpy as np

first_level = np.genfromtxt('first_level.csv',delimiter=',')
first_level = first_level[1:]
first_level[:,0] = np.rint(first_level[:,0])

print first_level

first_level_m = np.zeros((len(first_level) / 3, 4))
for i in range(len(first_level) / 3):
    first_level_m[i] = np.mean(first_level[3*i:3*i+2], axis=0)
first_level_m[first_level_m > 1] = 1
print first_level_m









    



[[ 16.        0.89251   1.00327   0.98537]
 [ 16.        0.89254   1.00326   0.98537]
 [ 16.        0.8927    1.0032    0.98534]
 [ 18.        0.89902   1.00162   0.98224]
 [ 18.        0.89903   1.00163   0.98222]
 [ 18.        0.89906   1.00165   0.98208]
 [ 20.        0.89577   1.00331   0.97395]
 [ 20.        0.89575   1.0033    0.97394]
 [ 20.        0.89549   1.00326   0.97379]
 [ 22.        0.88925   1.00001   0.97072]
 [ 22.        0.88924   1.00001   0.97068]
 [ 22.        0.88914   1.        0.97047]
 [ 24.        0.89251   1.00315   0.95344]
 [ 24.        0.89317   1.00325   0.95277]
 [ 24.        0.8932    1.00326   0.95274]
 [ 26.        0.90724   1.00163   0.93914]
 [ 26.        0.90717   1.00156   0.93886]
 [ 26.        0.90689   1.00139   0.93811]
 [ 28.        0.89251   0.9999    0.91748]
 [ 28.        0.89267   1.        0.91715]
 [ 28.        0.89281   1.00007   0.91694]
 [ 30.        0.89202   1.00041   0.91205]
 [ 30.        0.89088   1.00008   0.9119 ]
 [ 30.        0.89061   1.        0.91186]
 [ 32.        0.7564    0.95278   0.87134]
 [ 32.        0.75407   0.9517    0.86999]
 [ 32.        0.75288   0.95114   0.86929]
 [ 34.        0.63518   0.87781   0.76704]
 [ 34.        0.63323   0.87645   0.76547]
 [ 34.        0.6329    0.87622   0.76521]
 [ 36.        0.58143   0.85029   0.76561]
 [ 36.        0.58103   0.85016   0.76594]
 [ 36.        0.57965   0.84974   0.7671 ]]
[[ 1.        0.892525  1.        0.98537 ]
 [ 1.        0.899025  1.        0.98223 ]
 [ 1.        0.89576   1.        0.973945]
 [ 1.        0.889245  1.        0.9707  ]
 [ 1.        0.89284   1.        0.953105]
 [ 1.        0.907205  1.        0.939   ]
 [ 1.        0.89259   0.99995   0.917315]
 [ 1.        0.89145   1.        0.911975]
 [ 1.        0.755235  0.95224   0.870665]
 [ 1.        0.634205  0.87713   0.766255]
 [ 1.        0.58123   0.850225  0.765775]]



In [22]:

    
for score in overall_nmi_scores:
    m = np.mean(score, axis=0)
    print m
    sd = np.std(score, axis=0)
    se = sd / np.sqrt(100)
    print se
    print









    



[ 0.99356902  0.97989865]
[ 0.00034154  0.0007498 ]

[ 0.99348657  0.98440784]
[ 0.00033582  0.00054881]

[ 0.99290943  0.98582934]
[ 0.00028609  0.00041675]

[ 0.99247662  0.97924011]
[ 0.00131861  0.00226556]

[ 0.9902869   0.97872596]
[ 0.00193363  0.00244965]

[ 0.99113687  0.96862301]
[ 0.00182047  0.00365033]

[ 0.99214682  0.98020752]
[ 0.00130966  0.00211052]

[ 0.9906281  0.9825737]
[ 0.00193497  0.00209826]

[ 0.993162    0.98497286]
[ 0.00061329  0.00102487]

[ 0.98849682  0.98630473]
[ 0.002418    0.00071384]

[ 0.9772096   0.97276342]
[ 0.00355746  0.00338161]



In [2]:

    
os.chdir("C:\Miniconda3\Jupyter\GHSOM_simplex_dsd")
gml_filename = 'embedded_network_69.gml'
nmi_scores, num_communities = ghsom_main(params, gml_filename, labels, 10000)
print nmi_scores









    



[ 0.99175515  0.9812816 ]



In [ ]: