In [9]:
import numpy as np
d = 10000
a = np.random.rand(d, d)

In [10]:
%%time
b = np.zeros((d, d))

for i in range(d):
    for j in range(d):
        b[i,j] = a[i,j] **2


CPU times: user 1min 18s, sys: 516 ms, total: 1min 18s
Wall time: 1min 18s

In [ ]:
b[:,:10]


Out[ ]:
array([[  6.82961196e-01,   4.31189329e-01,   5.08442146e-04, ...,
          5.32621899e-01,   6.79329769e-02,   9.15521458e-02],
       [  9.39742860e-01,   2.60077525e-01,   8.66301350e-02, ...,
          4.86237149e-01,   2.33623552e-02,   1.23503827e-03],
       [  2.21616708e-01,   3.61749788e-01,   1.50717371e-01, ...,
          2.12207188e-02,   4.65212627e-04,   2.43299309e-01],
       ..., 
       [  1.63550148e-01,   4.93263561e-02,   3.60081757e-01, ...,
          2.36083817e-02,   3.68516362e-01,   6.43844836e-01],
       [  9.60720724e-01,   3.59559677e-02,   1.26460849e-06, ...,
          1.26616110e-01,   3.48504545e-01,   9.87195717e-01],
       [  4.38418989e-01,   1.45910525e-01,   7.47248093e-01, ...,
          1.30751385e-01,   6.21159762e-01,   8.75461290e-01]])

In [ ]:
%%time
c = np.array([[j ** 2 for j in i] for i in a])


CPU times: user 54.3 s, sys: 2.05 s, total: 56.3 s
Wall time: 1min 30s

In [ ]:
c[:,:10]


Out[ ]:
array([[  6.82961196e-01,   4.31189329e-01,   5.08442146e-04, ...,
          5.32621899e-01,   6.79329769e-02,   9.15521458e-02],
       [  9.39742860e-01,   2.60077525e-01,   8.66301350e-02, ...,
          4.86237149e-01,   2.33623552e-02,   1.23503827e-03],
       [  2.21616708e-01,   3.61749788e-01,   1.50717371e-01, ...,
          2.12207188e-02,   4.65212627e-04,   2.43299309e-01],
       ..., 
       [  1.63550148e-01,   4.93263561e-02,   3.60081757e-01, ...,
          2.36083817e-02,   3.68516362e-01,   6.43844836e-01],
       [  9.60720724e-01,   3.59559677e-02,   1.26460849e-06, ...,
          1.26616110e-01,   3.48504545e-01,   9.87195717e-01],
       [  4.38418989e-01,   1.45910525e-01,   7.47248093e-01, ...,
          1.30751385e-01,   6.21159762e-01,   8.75461290e-01]])

In [1]:
import networkx as nx

G = nx.florentine_families_graph()

In [3]:
G.nodes(data=True)


Out[3]:
[('Strozzi', {}),
 ('Tornabuoni', {}),
 ('Medici', {}),
 ('Albizzi', {}),
 ('Guadagni', {}),
 ('Pazzi', {}),
 ('Acciaiuoli', {}),
 ('Bischeri', {}),
 ('Peruzzi', {}),
 ('Ginori', {}),
 ('Salviati', {}),
 ('Castellani', {}),
 ('Lamberteschi', {}),
 ('Ridolfi', {}),
 ('Barbadori', {})]

In [4]:
G.edges()


Out[4]:
[('Strozzi', 'Ridolfi'),
 ('Strozzi', 'Castellani'),
 ('Strozzi', 'Bischeri'),
 ('Strozzi', 'Peruzzi'),
 ('Tornabuoni', 'Guadagni'),
 ('Tornabuoni', 'Ridolfi'),
 ('Tornabuoni', 'Medici'),
 ('Medici', 'Barbadori'),
 ('Medici', 'Albizzi'),
 ('Medici', 'Acciaiuoli'),
 ('Medici', 'Salviati'),
 ('Medici', 'Ridolfi'),
 ('Albizzi', 'Guadagni'),
 ('Albizzi', 'Ginori'),
 ('Guadagni', 'Bischeri'),
 ('Guadagni', 'Lamberteschi'),
 ('Pazzi', 'Salviati'),
 ('Bischeri', 'Peruzzi'),
 ('Peruzzi', 'Castellani'),
 ('Castellani', 'Barbadori')]

In [8]:
import os
import networkx as nx
import numpy as np
from spearmint_ghsom import main_no_labels as ghsom_main
import pickle
import shutil

def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)
    
root_dir = "/home/david/Documents/ghsom"

data = "florentine_families"
init = 1

for p in np.arange(0.1, 1, 0.1)[::-1]:
    
    print "p={}".format(p)
    
    os.chdir(root_dir)
    
    #ghsom parameters
    params = {'w': 0.0001,
             'eta': 0.001,
             'sigma': 1,
              'e_sg': p,
             'e_en': 10}
    
    map_file = '{}_communities_{}'.format(data, p, init)
    
    if not os.path.isfile("{}.pkl".format(map_file)):
    
        #run ghsom and save output
        print "running GHSOM and saving to {}.pkl".format(map_file)
        G, map = ghsom_main(params, 'embedded_{}.gml'.format(data), init=init, lam=1000)
        print '\nnumber of communities detected: {}, saved map to {}'.format(len(map), map_file)
        save_obj((G, map), map_file)
    
    else:
        
        print "{}.pkl already exists, loading map".format(map_file)    
        #load output
        G, map = load_obj(map_file)

    #save results to file
    dir_name = "{}_communities_{}_{}".format(data, p, init)
    if os.path.isdir(dir_name):
        shutil.rmtree(dir_name)
        print "deleted directory {}".format(dir_name)
    
    os.mkdir(dir_name)
    print 'made directory {}'.format(dir_name)

    os.chdir(dir_name)
    print "moved to {}".format(dir_name)
    
    #all genes
    all_genes_file = "all_families.txt"
    with open(all_genes_file, 'w') as f:
        for n in G.nodes():
            f.write("{}\n".format(n))
    print "written {}".format(all_genes_file)
    
    #save shortest path matrix
    shortest_path = nx.floyd_warshall_numpy(map).astype(np.int)
    np.savetxt("shortest_path.csv", shortest_path, fmt='%i', delimiter=",")
    print 'written shortest path matrix'
    
    #save communities to file
    c = 0
    for n, d in map.nodes(data=True):
        ls = d['ls']
        with open('community_{}.txt'.format(c),'w') as f:
            for l in ls:
                f.write('{}\n'.format(l))
        print 'written community_{}.txt'.format(c)
        c += 1
    print


p=0.9
florentine_families_communities_0.9.pkl already exists, loading map
deleted directory florentine_families_communities_0.9_1
made directory florentine_families_communities_0.9_1
moved to florentine_families_communities_0.9_1
written all_families.txt
written shortest path matrix
written community_0.txt
written community_1.txt

p=0.8
florentine_families_communities_0.8.pkl already exists, loading map
deleted directory florentine_families_communities_0.8_1
made directory florentine_families_communities_0.8_1
moved to florentine_families_communities_0.8_1
written all_families.txt
written shortest path matrix
written community_0.txt
written community_1.txt
written community_2.txt

p=0.7
florentine_families_communities_0.7.pkl already exists, loading map
deleted directory florentine_families_communities_0.7_1
made directory florentine_families_communities_0.7_1
moved to florentine_families_communities_0.7_1
written all_families.txt
written shortest path matrix
written community_0.txt
written community_1.txt
written community_2.txt

p=0.6
florentine_families_communities_0.6.pkl already exists, loading map
deleted directory florentine_families_communities_0.6_1
made directory florentine_families_communities_0.6_1
moved to florentine_families_communities_0.6_1
written all_families.txt
written shortest path matrix
written community_0.txt
written community_1.txt
written community_2.txt
written community_3.txt

p=0.5
running GHSOM and saving to florentine_families_communities_0.5.pkl
Layer: 1, training epoch: 999/1000, size of map: 5, MQE: 1.17045209273, target: 1.15048111807          
number of communities detected: 5, saved map to florentine_families_communities_0.5
made directory florentine_families_communities_0.5_1
moved to florentine_families_communities_0.5_1
written all_families.txt
written shortest path matrix
written community_0.txt
written community_1.txt
written community_2.txt
written community_3.txt
written community_4.txt

p=0.4
running GHSOM and saving to florentine_families_communities_0.4.pkl
Layer: 1, training epoch: 999/1000, size of map: 6, MQE: 0.914657435617, target: 0.911181033509          
number of communities detected: 6, saved map to florentine_families_communities_0.4
made directory florentine_families_communities_0.4_1
moved to florentine_families_communities_0.4_1
written all_families.txt
written shortest path matrix
written community_0.txt
written community_1.txt
written community_2.txt
written community_3.txt
written community_4.txt
written community_5.txt

p=0.3
running GHSOM and saving to florentine_families_communities_0.3.pkl
Layer: 1, training epoch: 999/1000, size of map: 9, MQE: 0.773073940195, target: 0.690287206847          deleted node 1
Layer: 1, training epoch: 999/1000, size of map: 9, MQE: 0.702602456879, target: 0.690287206847          
number of communities detected: 9, saved map to florentine_families_communities_0.3
made directory florentine_families_communities_0.3_1
moved to florentine_families_communities_0.3_1
written all_families.txt
written shortest path matrix
written community_0.txt
written community_1.txt
written community_2.txt
written community_3.txt
written community_4.txt
written community_5.txt
written community_6.txt
written community_7.txt
written community_8.txt

p=0.2
running GHSOM and saving to florentine_families_communities_0.2.pkl
Layer: 1, training epoch: 999/1000, size of map: 9, MQE: 0.74423901819, target: 0.477347052181           deleted node 3
Layer: 1, training epoch: 999/1000, size of map: 12, MQE: 0.505562870218, target: 0.477347052181          deleted node 2
Layer: 1, training epoch: 999/1000, size of map: 12, MQE: 0.497234674377, target: 0.477347052181          
number of communities detected: 12, saved map to florentine_families_communities_0.2
made directory florentine_families_communities_0.2_1
moved to florentine_families_communities_0.2_1
written all_families.txt
written shortest path matrix
written community_0.txt
written community_1.txt
written community_2.txt
written community_3.txt
written community_4.txt
written community_5.txt
written community_6.txt
written community_7.txt
written community_8.txt
written community_9.txt
written community_10.txt
written community_11.txt

p=0.1
running GHSOM and saving to florentine_families_communities_0.1.pkl
Layer: 1, training epoch: 999/1000, size of map: 8, MQE: 0.73776147777, target: 0.230094153104           deleted node 4
Layer: 1, training epoch: 999/1000, size of map: 10, MQE: 0.543389809879, target: 0.230094153104          deleted node 10
Layer: 1, training epoch: 999/1000, size of map: 10, MQE: 0.538464997153, target: 0.230094153104          deleted node 10
Layer: 1, training epoch: 999/1000, size of map: 12, MQE: 0.438319706325, target: 0.230094153104          deleted node 10
Layer: 1, training epoch: 999/1000, size of map: 12, MQE: 0.388944554273, target: 0.230094153104          deleted node 1
Layer: 1, training epoch: 999/1000, size of map: 13, MQE: 0.365465900623, target: 0.230094153104          deleted node 2
Layer: 1, training epoch: 999/1000, size of map: 14, MQE: 0.3273728652, target: 0.230094153104            deleted node 8
deleted node 14
Layer: 1, training epoch: 999/1000, size of map: 14, MQE: 0.278699283453, target: 0.230094153104          deleted node 15
Layer: 1, training epoch: 999/1000, size of map: 14, MQE: 0.255615818705, target: 0.230094153104          deleted node 17
Layer: 1, training epoch: 999/1000, size of map: 14, MQE: 0.244618929165, target: 0.230094153104          
number of communities detected: 14, saved map to florentine_families_communities_0.1
made directory florentine_families_communities_0.1_1
moved to florentine_families_communities_0.1_1
written all_families.txt
written shortest path matrix
written community_0.txt
written community_1.txt
written community_2.txt
written community_3.txt
written community_4.txt
written community_5.txt
written community_6.txt
written community_7.txt
written community_8.txt
written community_9.txt
written community_10.txt
written community_11.txt
written community_12.txt
written community_13.txt


In [1]:
import networkx as nx

G = nx.read_gml("embedded_florentine_families.gml")

In [5]:
from spearmint_ghsom import get_embedding

X = get_embedding(G)

In [6]:
X


Out[6]:
array([[-1.43165824,  1.79572143,  0.9457063 ],
       [ 0.13828552, -0.58830276,  1.52235122],
       [ 1.32435508, -0.00661569, -0.18298048],
       [ 0.34054865, -1.62206859, -0.83471532],
       [ 4.00375279,  0.3216568 ,  1.0139943 ],
       [-0.83092064,  2.70107665, -0.84797045],
       [-1.40382518, -1.50220382,  0.40191042],
       [ 0.76113966,  1.65182777, -1.2534288 ],
       [-3.00560247, -0.14055027, -0.08664172],
       [ 0.44617875, -2.70938716, -2.02062477],
       [-2.27788471, -2.51964934,  1.09285843],
       [ 2.05753231,  0.07625903, -0.41500201],
       [-2.80139994,  1.69075954, -0.46063064],
       [ 2.66405393,  0.15752055,  0.41550691],
       [ 0.01544451,  0.69395587,  0.70966662]])

In [ ]: