In [3]:
from os import listdir
from os.path import isfile, join
from sklearn.metrics import normalized_mutual_info_score

In [5]:
test_file = "karate_metadata.txt"
with open(test_file) as f:
    test_set = []
    for line in f.readlines():
        a, b = line.split()
        b = int(b)
        test_set.append(b)
test_set


Out[5]:
[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 2,
 2,
 1,
 1,
 2,
 1,
 2,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2]

In [20]:
onlyfiles = [ f for f in listdir('./') if 'comm' in f]
train_set = [0] * len(test_set)
print(onlyfiles)
for file in onlyfiles:
    
    with open(file) as f:
        now_label = 0
        for line in f.readlines():
            now_label += 1
            node_list = line.split()
            for node in node_list:
                node = int(node)
                train_set[node - 1] = now_label
    print(normalized_mutual_info_score(test_set, train_set))
    print(train_set)


['comm3', 'comm4', 'comm5']
0.22776171119
[3, 1, 1, 1, 3, 3, 3, 1, 1, 0, 3, 0, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 0, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1]
0.33707662446
[3, 3, 3, 3, 3, 3, 3, 3, 2, 0, 3, 0, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 0, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2]
0.186169626583
[1, 1, 1, 1, 3, 3, 3, 1, 2, 0, 3, 0, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 0, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2]

In [6]:
a = [3,9,10,15,16,19,21,23,24,25,26,27,28,29,30,31,32,33,34]
b = [1,2,3,4,5,6,7,8,11,12,13,14,17,18,20,22]
c = [0] * 35
for i in range(1, 35):
    if i in b:
        c[i] = 1
c


Out[6]:
[0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [8]:
print(normalized_mutual_info_score(test_set, c[1:]))


0.837170117219