In [3]:
import os
from shutil import copyfile
import subprocess
from spearmint_ghsom import main as ghsom_main
import numpy as np
import pickle
from time import time
import networkx as nx

def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

#root dir
os.chdir("C:\Miniconda3\Jupyter\GHSOM_simplex_dsd")

#save directory
dir = os.path.abspath("real_world_benchmarks_derived")

#number of times to repeat
num_repeats = 100

#make save directory
if not os.path.isdir(dir):
    os.mkdir(dir)

#change to dir
os.chdir(dir)    



#network names
network_names = ['karate','dolphin','polbooks','football']

#community labels
labels = ['club','group','value','value']

overall_nmi_scores = np.zeros((len(network_names), num_repeats))
overall_communities_detected = np.zeros((len(network_names), num_repeats))

for i in range(len(network_names)):
    
    #name of current network
    network_name = network_names[i]

    #label of current network
    label = labels[i]
    
    #create directory
    dir_string = os.path.join(dir, network_name)
    if not os.path.isdir(dir_string):
        os.mkdir(dir_string)
    
    #change working directory    
    os.chdir(dir_string)
    
    gml_filename = 'embedded_{}.gml'.format(network_name)  
    if not os.path.isfile(gml_filename):
        
        source = "C:\Miniconda3\Jupyter\GHSOM_simplex_dsd\{}".format(gml_filename)
        copyfile(source, gml_filename)
    ##calculate density and derive parameter setting
    
    #load graph and calculate density
    G = nx.read_gml(gml_filename)
    density = nx.density(G)
    
    #derive parameter setting -- from scipy
    e_sg = 0.377746404462 * density + 0.590217653032
    
    print 'density of network={}'.format(density)
    print 'e_sg={}'.format(e_sg)
    
    if os.path.isfile('nmi_scores.csv'):
        print 'already completed {} network, loading nmi scores and continuing'.format(network_name)
        nmi_scores = np.genfromtxt('nmi_scores.csv', delimiter=',')
        overall_nmi_scores[i] = nmi_scores
        communities_detected = np.genfromtxt('communties_detected.csv', delimiter=',')
        overall_communities_detected[i] = communities_detected
        continue
    
    #record NMI scores
    if not os.path.isfile('nmi_scores.pkl'):
        print 'creating new nmi scores array'
        nmi_scores = np.zeros(num_repeats)
    else:
        print 'loading nmi score progress'
        nmi_scores = load_obj('nmi_scores')
        
    #record running times
    if not os.path.isfile('running_times.pkl'):
        print 'creating new running time array'
        running_times = np.zeros(num_repeats)
    else:
        print 'loading running time progress'
        running_times = load_obj('running_times')
        
    #record communities detected
    if not os.path.isfile('communities_detected.pkl'):
        print 'creating new communites detected array'
        communities_detected = np.zeros(num_repeats)
    else:
        print 'loading communites detected progress'
        communities_detected = load_obj('communities_detected')
        
    #copy embedded gml
    gml_filename = 'embedded_{}.gml'.format(network_name)  
    if not os.path.isfile(gml_filename):
        
        source = "C:\Miniconda3\Jupyter\GHSOM_simplex_dsd\{}".format(gml_filename)
        copyfile(source, gml_filename)
    ##calculate density and derive parameter setting
    
    #load graph and calculate density
    G = nx.read_gml(gml_filename)
    density = nx.density(G)
    
    #derive parameter setting -- from scipy
    e_sg = 0.377746404462 * density + 0.590217653032
    
    print 'density of network={}'.format(density)
    print 'e_sg={}'.format(e_sg)
    
    #ghsom parameters
    params = {'w': 0.0001,
         'eta': 0.0001,
         'sigma': 1,
         'e_sg': e_sg,
         'e_en': 0.8}
    
    #generate networks
    for r in range(1,num_repeats+1):
            
        ##score for this network
        if not np.all(nmi_scores[r-1]):
            
            start_time = time()
            
            print 'starting ghsom for: {}, repeat: {}'.format(gml_filename, r)
            nmi_score, comm_det = ghsom_main(params, gml_filename, label, 10000)
            nmi_scores[r-1] = nmi_score
            communities_detected[r-1] = comm_det
            
            running_time = time() - start_time
            print 'running time of algorithm: {}'.format(running_time)
            running_times[r-1] = running_time
            
            #save
            save_obj(nmi_scores, 'nmi_scores')
            save_obj(running_times, 'running_times')
            save_obj(communities_detected, 'communities_detected')
            
            print 'saved nmi score for network {}: {}'.format(gml_filename, nmi_score)
            print 'saved communities detected for network {}: {}'.format(gml_filename, comm_det)
            print
            
    ##output nmi scores to csv file
    print 'writing nmi scores and running times to file'
    np.savetxt('nmi_scores.csv',nmi_scores,delimiter=',')
    np.savetxt('running_times.csv',running_times,delimiter=',')
    np.savetxt('communties_detected.csv',communities_detected,delimiter=',')
    
    overall_nmi_scores[i] = nmi_scores
    overall_communities_detected[i] = communities_detected
    
print 'DONE'

print 'OVERALL NMI SCORES'
print overall_nmi_scores
print overall_communities_detected


density of network=0.139037433155
e_sg=0.642738543492
already completed karate network, loading nmi scores and continuing
density of network=0.0840824960338
e_sg=0.621979513587
already completed dolphin network, loading nmi scores and continuing
density of network=0.0807692307692
e_sg=0.620727939546
already completed polbooks network, loading nmi scores and continuing
density of network=0.0939740655988
e_sg=0.625716018425
already completed football network, loading nmi scores and continuing
DONE
OVERALL NMI SCORES
[[ 0.5294656   0.64121264  0.43852015  0.43852015  0.5294656   0.44880013
   0.46959898  0.43852015  0.4767809   0.5294656   0.44880013  0.5294656
   0.43852015  0.5294656   0.5294656   0.52460052  0.4767809   0.58033157
   0.44880013  0.5294656   0.43852015  0.52460052  0.44880013  0.59204037
   0.43852015  0.44880013  0.52003736  0.52460052  0.43852015  0.5294656
   0.52003736  0.46959898  0.5294656   0.58842383  0.62312666  0.47834623
   0.43852015  0.5294656   0.44880013  0.43852015  0.52141818  0.43852015
   0.43324696  0.5294656   0.52141818  0.43852015  0.52141818  0.44880013
   0.5294656   0.43324696  0.44880013  0.58033157  0.52003736  0.64121264
   0.5294656   0.59204037  0.52460052  0.52460052  0.58033157  0.43852015
   0.52141818  0.44880013  0.52460052  0.46959898  0.52460052  0.43852015
   0.43852015  0.62312666  0.43310466  0.52460052  0.58842383  0.43324696
   0.44880013  0.52141818  0.43852015  0.62312666  0.44880013  0.43852015
   0.5294656   0.43324696  0.44880013  0.5294656   0.43324696  0.43324696
   0.43324696  0.5294656   0.5294656   0.43852015  0.5294656   0.43852015
   0.5294656   0.43852015  0.44880013  0.64121264  0.52218299  0.58033157
   0.5294656   0.43852015  0.58842383  0.5294656 ]
 [ 0.49585313  0.49609041  0.49450469  0.4917876   0.63614901  0.48984984
   0.49609041  0.53135785  0.51999994  0.48984984  0.48984984  0.49788256
   0.49609041  0.48707936  0.48390454  0.48984984  0.50423703  0.49609041
   0.4865605   0.47649063  0.4865605   0.49609041  0.47675483  0.50563041
   0.48984984  0.4865605   0.47675483  0.4865605   0.5057058   0.49788256
   0.49609041  0.4865605   0.48390454  0.49585313  0.48390454  0.48984984
   0.48707936  0.47649063  0.4865605   0.48984984  0.51999994  0.4865605
   0.50423703  0.4917876   0.48390454  0.48984984  0.48390454  0.47649063
   0.48952268  0.49609041  0.48707936  0.49788256  0.4865605   0.47675483
   0.41803608  0.50423703  0.4865605   0.49609041  0.47675483  0.4917876
   0.50423703  0.50563041  0.48984984  0.48756107  0.4865605   0.49450469
   0.49609041  0.49609041  0.49609041  0.53533936  0.48984984  0.5057058
   0.50563041  0.53492331  0.55696111  0.47675483  0.4865605   0.49788256
   0.49609041  0.49585313  0.41784177  0.48984984  0.48548981  0.50423703
   0.47675483  0.5057058   0.49609041  0.49585313  0.47675483  0.41803608
   0.4865605   0.4865605   0.49992211  0.4917876   0.47675483  0.53533936
   0.48952268  0.49609041  0.49609041  0.4865605 ]
 [ 0.52008129  0.51076215  0.51076215  0.51076215  0.55233631  0.51312341
   0.52854272  0.50739484  0.52008129  0.51076215  0.51076215  0.51076215
   0.51076215  0.5198172   0.51009765  0.51076215  0.51076215  0.51076215
   0.51009765  0.52008129  0.51076215  0.51076215  0.51009765  0.51009765
   0.51076215  0.51009765  0.51009765  0.51076215  0.5155065   0.51076215
   0.51009765  0.55442909  0.51009765  0.5198172   0.5198172   0.51076215
   0.53003463  0.51009765  0.51076215  0.51312341  0.51009765  0.51312341
   0.51076215  0.51009765  0.56007797  0.51076215  0.51009765  0.51009765
   0.53746942  0.51009765  0.50739484  0.53746942  0.51813975  0.51076215
   0.51312341  0.54654507  0.51076215  0.51813975  0.51813975  0.51813975
   0.54834672  0.51076215  0.55583245  0.5198172   0.51076215  0.51076215
   0.56779484  0.55428333  0.51312341  0.50739484  0.51009765  0.51076215
   0.51009765  0.51076215  0.55703709  0.5385141   0.51076215  0.51009765
   0.51076215  0.51076215  0.51009765  0.51076215  0.51009765  0.51312341
   0.51009765  0.51009765  0.53746942  0.51076215  0.51009765  0.51009765
   0.51076215  0.56007797  0.5443955   0.51009765  0.51076215  0.52106516
   0.51076215  0.51009765  0.53683612  0.51076215]
 [ 0.74697188  0.74570757  0.74570757  0.73077253  0.74508974  0.72049412
   0.74749562  0.74540174  0.75120458  0.75120458  0.73099384  0.73183017
   0.73995116  0.7535908   0.73066913  0.74556709  0.73940396  0.74697188
   0.75211174  0.73258787  0.75211174  0.75864818  0.7167881   0.74629601
   0.73077253  0.72900981  0.7535814   0.75013003  0.73099384  0.72049412
   0.74749562  0.744242    0.70722108  0.70569521  0.74697188  0.7209483
   0.69404868  0.74749562  0.73077253  0.73066913  0.72378162  0.74697188
   0.7535908   0.73258787  0.70773246  0.73099384  0.73404276  0.70722108
   0.74472482  0.71868264  0.71806573  0.71751767  0.74325465  0.7185385
   0.74472482  0.70773246  0.74393581  0.73738924  0.70722108  0.74413533
   0.73258787  0.71751767  0.70722108  0.73404276  0.70821157  0.74749562
   0.73738924  0.74393581  0.73916499  0.74325465  0.73236701  0.74809465
   0.74592634  0.73258787  0.73275273  0.74413533  0.73258787  0.74556709
   0.70722108  0.73110737  0.74472482  0.72933692  0.71976677  0.76688797
   0.69797732  0.74697188  0.7251193   0.74103259  0.71806573  0.74483646
   0.71883809  0.74013123  0.7251193   0.74809465  0.74540174  0.72950091
   0.72042274  0.72049412  0.73077253  0.72636191]]
[[ 4.  3.  3.  3.  4.  3.  4.  3.  3.  4.  3.  4.  3.  4.  4.  3.  3.  4.
   3.  4.  3.  3.  3.  3.  3.  3.  3.  3.  3.  4.  3.  4.  4.  3.  3.  4.
   3.  4.  3.  3.  3.  3.  4.  4.  3.  3.  3.  3.  4.  4.  3.  4.  3.  3.
   4.  3.  3.  3.  4.  3.  3.  3.  3.  4.  3.  3.  3.  3.  3.  3.  3.  4.
   3.  3.  3.  3.  3.  3.  4.  4.  3.  4.  4.  4.  4.  4.  4.  3.  4.  3.
   4.  3.  3.  3.  4.  4.  4.  3.  3.  4.]
 [ 4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.
   4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.
   4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.
   4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.
   4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.
   4.  4.  4.  4.  4.  4.  4.  4.  4.  4.]
 [ 4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.
   4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.
   4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.
   4.  4.  4.  4.  4.  4.  3.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.
   4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.
   4.  4.  3.  4.  4.  4.  4.  4.  4.  4.]
 [ 6.  6.  6.  6.  7.  6.  6.  6.  6.  6.  6.  8.  7.  7.  6.  6.  6.  6.
   6.  6.  6.  6.  6.  6.  6.  6.  6.  7.  6.  6.  6.  6.  6.  6.  6.  6.
   7.  6.  6.  6.  7.  6.  7.  6.  6.  6.  6.  6.  6.  6.  6.  6.  6.  6.
   6.  6.  6.  7.  6.  6.  6.  6.  6.  6.  6.  6.  7.  6.  7.  6.  6.  6.
   6.  6.  6.  6.  6.  6.  6.  6.  6.  6.  6.  7.  7.  6.  6.  6.  6.  6.
   6.  6.  6.  6.  6.  6.  6.  6.  6.  6.]]

In [2]:
for score in overall_nmi_scores:
    
    mean = np.mean(score)
    print mean
    se = np.std(score) / np.sqrt(num_repeats)
    print se
    print


0.499800072199
0.00590115523935

0.493281211109
0.00236444823087

0.518261769794
0.00145977599303

0.733839840555
0.00147853394021


In [3]:
import networkx as nx

G = nx.read_gml('embedded_karate.gml')

print nx.density(G)
print 2.0 * nx.number_of_edges(G) / (nx.number_of_nodes(G) * (nx.number_of_nodes(G) - 1))


0.139037433155
0.139037433155

In [ ]: