In [1]:
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

Understand TopicSimilarity.json


In [17]:
# read a test version (w/o JS codes) of TopicSimilarity.json
file = 'testSim.json'
with open(file) as train_file:
    dict_train = json.load(train_file)

In [20]:
dict_train


Out[20]:
{'links': [{'source': 0, 'target': 14, 'value': 111.10588683198937},
  {'source': 16, 'target': 11, 'value': 88.72795786804235},
  {'source': 17, 'target': 14, 'value': 81.4638009166188},
  {'source': 18, 'target': 10, 'value': 569.2500102128241},
  {'source': 19, 'target': 14, 'value': 89.26014158750893},
  {'source': 20, 'target': 11, 'value': 150.2149910819981},
  {'source': 20, 'target': 14, 'value': 112.667176302661},
  {'source': 21, 'target': 10, 'value': 167.7876087676736},
  {'source': 24, 'target': 10, 'value': 207.71486059868357},
  {'source': 27, 'target': 11, 'value': 218.00577671735817},
  {'source': 28, 'target': 10, 'value': 176.95673061111427},
  {'source': 28, 'target': 11, 'value': 162.37268157511465},
  {'source': 29, 'target': 4, 'value': 103.52125451925266},
  {'source': 29, 'target': 14, 'value': 115.56674761370896},
  {'source': 1, 'target': 40, 'value': 170.41693241733225},
  {'source': 3, 'target': 40, 'value': 118.7892640798752},
  {'source': 4, 'target': 42, 'value': 141.56620924061926},
  {'source': 7, 'target': 39, 'value': 103.0905737751473},
  {'source': 10, 'target': 34, 'value': 238.6946228178874},
  {'source': 11, 'target': 33, 'value': 124.246032917932},
  {'source': 12, 'target': 34, 'value': 110.44221815916602},
  {'source': 13, 'target': 33, 'value': 103.55134935882569},
  {'source': 13, 'target': 35, 'value': 137.99276905471447},
  {'source': 13, 'target': 43, 'value': 30.084548503925973},
  {'source': 14, 'target': 32, 'value': 229.57624457457734},
  {'source': 15, 'target': 44, 'value': 83.21830181179601},
  {'source': 31, 'target': 46, 'value': 116.37472963459285},
  {'source': 31, 'target': 55, 'value': 109.94928747752881},
  {'source': 32, 'target': 54, 'value': 176.99531870481647},
  {'source': 32, 'target': 57, 'value': 113.91169147037333},
  {'source': 33, 'target': 51, 'value': 35.040861966492066},
  {'source': 35, 'target': 46, 'value': 233.8913772071753},
  {'source': 35, 'target': 47, 'value': 223.7481463549817},
  {'source': 35, 'target': 48, 'value': 103.64731030622019},
  {'source': 35, 'target': 49, 'value': 169.2415226120869},
  {'source': 35, 'target': 52, 'value': 214.05183338494763},
  {'source': 35, 'target': 58, 'value': 88.57978642908431},
  {'source': 36, 'target': 50, 'value': 89.3508620532698},
  {'source': 38, 'target': 46, 'value': 174.4605052363067},
  {'source': 38, 'target': 47, 'value': 143.14487760260585},
  {'source': 38, 'target': 49, 'value': 114.4001764407993},
  {'source': 38, 'target': 52, 'value': 221.6030062865838},
  {'source': 38, 'target': 58, 'value': 526.4023297325416},
  {'source': 39, 'target': 46, 'value': 101.33633724361925},
  {'source': 39, 'target': 47, 'value': 214.76607482478735},
  {'source': 39, 'target': 53, 'value': 134.80050868036332},
  {'source': 40, 'target': 57, 'value': 190.20227834963154},
  {'source': 41, 'target': 57, 'value': 100.12603006760858},
  {'source': 42, 'target': 45, 'value': 246.4159906066294},
  {'source': 42, 'target': 46, 'value': 136.48867944965457},
  {'source': 42, 'target': 56, 'value': 248.84349605924015},
  {'source': 43, 'target': 47, 'value': 185.3138786628756},
  {'source': 43, 'target': 59, 'value': 200.83163903556436},
  {'source': 44, 'target': 46, 'value': 124.30713886740004},
  {'source': 44, 'target': 54, 'value': 516.9597236302735},
  {'source': 44, 'target': 55, 'value': 142.17280162946653},
  {'source': 46, 'target': 68, 'value': 105.4163590419313},
  {'source': 46, 'target': 73, 'value': 537.5768469469148},
  {'source': 47, 'target': 66, 'value': 131.6031463832092},
  {'source': 47, 'target': 67, 'value': 124.4425903744238},
  {'source': 47, 'target': 70, 'value': 180.67865974360316},
  {'source': 48, 'target': 60, 'value': 146.56760764475436},
  {'source': 48, 'target': 68, 'value': 103.9980560737277},
  {'source': 49, 'target': 68, 'value': 130.83035220806818},
  {'source': 52, 'target': 67, 'value': 211.93454425622923},
  {'source': 53, 'target': 61, 'value': 149.97368668619066},
  {'source': 53, 'target': 70, 'value': 114.67302707657328},
  {'source': 54, 'target': 61, 'value': 111.73929209768883},
  {'source': 54, 'target': 68, 'value': 197.29442050296723},
  {'source': 55, 'target': 73, 'value': 559.786631247631},
  {'source': 57, 'target': 60, 'value': 178.1208960383366},
  {'source': 57, 'target': 62, 'value': 228.06321247895625},
  {'source': 57, 'target': 67, 'value': 191.42798152307972},
  {'source': 57, 'target': 68, 'value': 167.96169523583245},
  {'source': 57, 'target': 69, 'value': 188.77373019873525},
  {'source': 57, 'target': 70, 'value': 246.63187841229944},
  {'source': 58, 'target': 64, 'value': 100.24396193370141},
  {'source': 59, 'target': 63, 'value': 100.51032342699445},
  {'source': 60, 'target': 81, 'value': 170.43452234038475},
  {'source': 60, 'target': 84, 'value': 170.41411636960206},
  {'source': 60, 'target': 88, 'value': 103.10694453147072},
  {'source': 61, 'target': 76, 'value': 107.54665502988767},
  {'source': 61, 'target': 84, 'value': 132.83382297708553},
  {'source': 61, 'target': 89, 'value': 150.6318110207267},
  {'source': 62, 'target': 76, 'value': 106.19166114926524},
  {'source': 62, 'target': 78, 'value': 33.640061798111084},
  {'source': 62, 'target': 89, 'value': 160.86698722272433},
  {'source': 63, 'target': 77, 'value': 103.67339487788212},
  {'source': 63, 'target': 80, 'value': 196.06603890142748},
  {'source': 63, 'target': 81, 'value': 161.19303385481967},
  {'source': 63, 'target': 85, 'value': 106.7801851719701},
  {'source': 64, 'target': 79, 'value': 107.14068118559734},
  {'source': 65, 'target': 80, 'value': 112.23351046763155},
  {'source': 65, 'target': 81, 'value': 135.55548630993036},
  {'source': 65, 'target': 86, 'value': 164.02148464991373},
  {'source': 65, 'target': 89, 'value': 133.78380030733194},
  {'source': 66, 'target': 80, 'value': 134.3866698058844},
  {'source': 66, 'target': 84, 'value': 241.16664828502542},
  {'source': 66, 'target': 85, 'value': 218.28624779944218},
  {'source': 66, 'target': 89, 'value': 228.34275083407616},
  {'source': 67, 'target': 80, 'value': 142.2515353428355},
  {'source': 67, 'target': 85, 'value': 247.80075553270868},
  {'source': 67, 'target': 88, 'value': 186.6607082346655},
  {'source': 68, 'target': 75, 'value': 196.06029169764162},
  {'source': 68, 'target': 77, 'value': 225.06709493026727},
  {'source': 68, 'target': 80, 'value': 35.66265869484768},
  {'source': 68, 'target': 81, 'value': 185.86982521743062},
  {'source': 68, 'target': 85, 'value': 183.74444596185415},
  {'source': 68, 'target': 87, 'value': 243.9126728278684},
  {'source': 69, 'target': 78, 'value': 534.9798894114062},
  {'source': 70, 'target': 75, 'value': 587.3437043807792},
  {'source': 70, 'target': 76, 'value': 504.0182396305203},
  {'source': 70, 'target': 87, 'value': 36.33132723511851},
  {'source': 71, 'target': 77, 'value': 102.82948605745688},
  {'source': 73, 'target': 83, 'value': 80.9033759598084},
  {'source': 75, 'target': 93, 'value': 124.22273855831607},
  {'source': 75, 'target': 95, 'value': 227.07876969754187},
  {'source': 76, 'target': 91, 'value': 109.66227198017285},
  {'source': 76, 'target': 92, 'value': 87.25793804209955},
  {'source': 76, 'target': 103, 'value': 149.41569680430362},
  {'source': 77, 'target': 95, 'value': 228.3407599548334},
  {'source': 77, 'target': 96, 'value': 502.5052237696251},
  {'source': 77, 'target': 103, 'value': 178.85128293375172},
  {'source': 78, 'target': 93, 'value': 106.76547274931222},
  {'source': 78, 'target': 97, 'value': 134.08974412813834},
  {'source': 79, 'target': 91, 'value': 118.62831181874081},
  {'source': 79, 'target': 99, 'value': 162.14029403438616},
  {'source': 79, 'target': 101, 'value': 184.82863682385673},
  {'source': 80, 'target': 94, 'value': 128.0704543693566},
  {'source': 80, 'target': 102, 'value': 183.2442424705959},
  {'source': 82, 'target': 91, 'value': 136.45470202101453},
  {'source': 83, 'target': 93, 'value': 502.27777192788386},
  {'source': 83, 'target': 95, 'value': 170.6696441826099},
  {'source': 83, 'target': 104, 'value': 30.304993448075667},
  {'source': 84, 'target': 93, 'value': 146.74207704833344},
  {'source': 84, 'target': 94, 'value': 102.38871152648865},
  {'source': 85, 'target': 95, 'value': 177.2207724047664},
  {'source': 88, 'target': 96, 'value': 162.49328798823188},
  {'source': 88, 'target': 98, 'value': 170.05688259431696}],
 'nodes': [{'name': '0_10', 'value': 26},
  {'name': '1_13', 'value': 56},
  {'name': '1_12', 'value': 77},
  {'name': '1_11', 'value': 43},
  {'name': '1_10', 'value': 64},
  {'name': '1_14', 'value': 40},
  {'name': '1_7', 'value': 36},
  {'name': '1_6', 'value': 30},
  {'name': '1_5', 'value': 22},
  {'name': '1_4', 'value': 37},
  {'name': '1_3', 'value': 32},
  {'name': '1_2', 'value': 46},
  {'name': '1_1', 'value': 20},
  {'name': '1_0', 'value': 35},
  {'name': '1_9', 'value': 53},
  {'name': '1_8', 'value': 28},
  {'name': '0_11', 'value': 35},
  {'name': '0_12', 'value': 16},
  {'name': '0_13', 'value': 13},
  {'name': '0_14', 'value': 25},
  {'name': '0_8', 'value': 5},
  {'name': '0_9', 'value': 10},
  {'name': '0_6', 'value': 13},
  {'name': '0_7', 'value': 8},
  {'name': '0_4', 'value': 10},
  {'name': '0_5', 'value': 7},
  {'name': '0_2', 'value': 4},
  {'name': '0_3', 'value': 7},
  {'name': '0_0', 'value': 2},
  {'name': '0_1', 'value': 5},
  {'name': '2_8', 'value': 97},
  {'name': '2_9', 'value': 83},
  {'name': '2_11', 'value': 72},
  {'name': '2_0', 'value': 61},
  {'name': '2_1', 'value': 71},
  {'name': '2_2', 'value': 70},
  {'name': '2_3', 'value': 66},
  {'name': '2_4', 'value': 45},
  {'name': '2_5', 'value': 69},
  {'name': '2_6', 'value': 44},
  {'name': '2_7', 'value': 63},
  {'name': '2_12', 'value': 34},
  {'name': '2_14', 'value': 43},
  {'name': '2_13', 'value': 42},
  {'name': '2_10', 'value': 43},
  {'name': '3_1', 'value': 198},
  {'name': '3_0', 'value': 119},
  {'name': '3_3', 'value': 110},
  {'name': '3_2', 'value': 112},
  {'name': '3_5', 'value': 99},
  {'name': '3_4', 'value': 73},
  {'name': '3_7', 'value': 89},
  {'name': '3_6', 'value': 75},
  {'name': '3_9', 'value': 64},
  {'name': '3_8', 'value': 78},
  {'name': '3_14', 'value': 75},
  {'name': '3_11', 'value': 92},
  {'name': '3_10', 'value': 93},
  {'name': '3_13', 'value': 82},
  {'name': '3_12', 'value': 67},
  {'name': '4_10', 'value': 528},
  {'name': '4_13', 'value': 324},
  {'name': '4_11', 'value': 337},
  {'name': '4_14', 'value': 241},
  {'name': '4_8', 'value': 252},
  {'name': '4_9', 'value': 270},
  {'name': '4_12', 'value': 260},
  {'name': '4_2', 'value': 262},
  {'name': '4_3', 'value': 190},
  {'name': '4_0', 'value': 198},
  {'name': '4_1', 'value': 194},
  {'name': '4_6', 'value': 148},
  {'name': '4_7', 'value': 180},
  {'name': '4_4', 'value': 161},
  {'name': '4_5', 'value': 153},
  {'name': '5_14', 'value': 588},
  {'name': '5_13', 'value': 495},
  {'name': '5_12', 'value': 392},
  {'name': '5_11', 'value': 343},
  {'name': '5_10', 'value': 321},
  {'name': '5_3', 'value': 405},
  {'name': '5_2', 'value': 330},
  {'name': '5_1', 'value': 486},
  {'name': '5_0', 'value': 254},
  {'name': '5_7', 'value': 279},
  {'name': '5_6', 'value': 318},
  {'name': '5_5', 'value': 266},
  {'name': '5_4', 'value': 278},
  {'name': '5_9', 'value': 193},
  {'name': '5_8', 'value': 264},
  {'name': '6_4', 'value': 23},
  {'name': '6_5', 'value': 17},
  {'name': '6_6', 'value': 25},
  {'name': '6_7', 'value': 14},
  {'name': '6_0', 'value': 16},
  {'name': '6_1', 'value': 11},
  {'name': '6_2', 'value': 6},
  {'name': '6_3', 'value': 12},
  {'name': '6_11', 'value': 38},
  {'name': '6_10', 'value': 8},
  {'name': '6_8', 'value': 17},
  {'name': '6_9', 'value': 19},
  {'name': '6_14', 'value': 5},
  {'name': '6_12', 'value': 8},
  {'name': '6_13', 'value': 15}]}

In [26]:
len(dict_train['links']), len(dict_train['nodes'])


Out[26]:
(139, 105)

In [31]:
links = pd.DataFrame(dict_train['links'])
nodes = pd.DataFrame(dict_train['nodes'])

In [35]:
links.head(2)


Out[35]:
source target value
0 0 14 111.105887
1 16 11 88.727958

In [34]:
links[links.value > 540]


Out[34]:
source target value
3 18 10 569.250010
69 55 73 559.786631
110 70 75 587.343704

In [53]:
nodes[(nodes.index == 10) | (nodes.index == 18) | (nodes.index == 55) | (nodes.index == 70) | (nodes.index == 73) | (nodes.index == 75)]


Out[53]:
name value
10 1_3 32
18 0_13 13
55 3_14 75
70 4_1 194
73 4_4 161
75 5_14 588

Now we see that the values of "source" and "target" in links indicate the indexes of nodes. I haven't figured out what does "value" in nodes represent.

Generate a test version of TopicSimilarity.json


In [109]:
plt.scatter(links.source, links.target, alpha=0.5)
plt.title('node id: source to target')


Out[109]:
<matplotlib.text.Text at 0x24422a73320>

In [95]:
# generate "source" and "target" of 100 links
source = np.random.randint(105 - 10, size=100) 
source = np.array(sorted(source))
target = source + np.random.randint(5,10)

In [96]:
source


Out[96]:
array([ 2,  6,  7,  7,  9,  9, 12, 14, 17, 20, 21, 23, 24, 25, 25, 25, 25,
       25, 27, 28, 29, 30, 30, 31, 32, 32, 32, 33, 33, 33, 34, 35, 37, 38,
       39, 39, 40, 43, 43, 44, 45, 45, 46, 46, 47, 48, 49, 49, 50, 51, 51,
       52, 52, 53, 54, 54, 54, 55, 55, 55, 56, 56, 57, 58, 59, 60, 61, 61,
       64, 65, 66, 67, 69, 71, 71, 72, 73, 73, 74, 74, 76, 79, 79, 80, 80,
       80, 82, 83, 83, 86, 86, 87, 87, 89, 89, 91, 93, 94, 94, 94])

In [97]:
target


Out[97]:
array([ 11,  15,  16,  16,  18,  18,  21,  23,  26,  29,  30,  32,  33,
        34,  34,  34,  34,  34,  36,  37,  38,  39,  39,  40,  41,  41,
        41,  42,  42,  42,  43,  44,  46,  47,  48,  48,  49,  52,  52,
        53,  54,  54,  55,  55,  56,  57,  58,  58,  59,  60,  60,  61,
        61,  62,  63,  63,  63,  64,  64,  64,  65,  65,  66,  67,  68,
        69,  70,  70,  73,  74,  75,  76,  78,  80,  80,  81,  82,  82,
        83,  83,  85,  88,  88,  89,  89,  89,  91,  92,  92,  95,  95,
        96,  96,  98,  98, 100, 102, 103, 103, 103])

In [65]:
links.value.describe()


Out[65]:
count    139.000000
mean     175.880108
std      112.358724
min       30.084549
25%      110.195753
50%      149.415697
75%      196.063165
max      587.343704
Name: value, dtype: float64

In [98]:
# generate 100 random values
value = np.random.normal(175.88, 112.36, size=100)
value = np.array([np.abs(i) for i in value])

In [99]:
value


Out[99]:
array([  37.52558764,   73.47095167,   32.74254392,  120.31288379,
        182.27911563,   33.42614662,  308.96586875,  225.1504056 ,
        131.95522969,  226.66907637,  384.88254363,  254.4247618 ,
        188.50315407,  286.64989475,   33.12730444,   44.52703707,
          2.50956898,  286.14679979,   54.31273271,   46.51976845,
         62.5626287 ,   99.42049997,  277.84346708,   25.49595031,
        210.94869112,  404.18935816,  234.18316809,  137.86415538,
        165.90375921,  188.37292299,   63.60533858,  126.58724853,
        203.72095049,   75.9362364 ,  215.25626131,   59.3294442 ,
        267.96866456,  244.57171195,   68.62796756,   98.76957969,
        211.88194678,  214.20107059,  299.99376083,   66.61688431,
        219.29848635,  228.17159179,    2.37252616,  227.37659083,
        303.31293842,  229.58750266,  158.0959986 ,  412.59999529,
        318.47819416,  112.70339704,  122.6023584 ,  279.4101562 ,
         92.96336786,  176.55098726,  201.38278096,  251.21882734,
        176.65508654,   67.43765028,   34.72061905,  375.71637616,
         48.03813113,   22.15105731,  243.8612855 ,  130.38887759,
        104.41154614,   68.6282522 ,  160.3221239 ,  226.51669635,
        144.50702237,  116.81873578,  240.02051294,  300.01143156,
         56.91975002,    2.16315962,  122.11704358,  103.31960254,
        253.59132334,  256.63587433,  104.87651909,  207.70936084,
        251.24698969,  228.63981231,  295.4334772 ,   97.91320469,
         76.06113736,  240.90068023,  134.34390845,   73.72539121,
        339.94120225,   51.13648318,   30.65916127,  168.44858838,
        183.48044736,   74.36760504,   51.12208732,   18.39793547])

In [104]:
newlink = pd.DataFrame({'source':source, 'target':target, 'value':value}).to_json(orient='records')

In [103]:
newlink


Out[103]:
'{"source":2,"target":11,"value":37.5255876362},{"source":6,"target":15,"value":73.4709516679},{"source":7,"target":16,"value":32.7425439228},{"source":7,"target":16,"value":120.3128837894},{"source":9,"target":18,"value":182.2791156297},{"source":9,"target":18,"value":33.4261466158},{"source":12,"target":21,"value":308.9658687458},{"source":14,"target":23,"value":225.1504055995},{"source":17,"target":26,"value":131.9552296855},{"source":20,"target":29,"value":226.6690763689},{"source":21,"target":30,"value":384.8825436339},{"source":23,"target":32,"value":254.4247617997},{"source":24,"target":33,"value":188.5031540721},{"source":25,"target":34,"value":286.6498947501},{"source":25,"target":34,"value":33.1273044436},{"source":25,"target":34,"value":44.5270370673},{"source":25,"target":34,"value":2.5095689839},{"source":25,"target":34,"value":286.14679979},{"source":27,"target":36,"value":54.3127327083},{"source":28,"target":37,"value":46.5197684539},{"source":29,"target":38,"value":62.5626287015},{"source":30,"target":39,"value":99.4204999749},{"source":30,"target":39,"value":277.8434670752},{"source":31,"target":40,"value":25.495950307},{"source":32,"target":41,"value":210.9486911177},{"source":32,"target":41,"value":404.1893581638},{"source":32,"target":41,"value":234.1831680903},{"source":33,"target":42,"value":137.8641553834},{"source":33,"target":42,"value":165.9037592146},{"source":33,"target":42,"value":188.3729229904},{"source":34,"target":43,"value":63.6053385756},{"source":35,"target":44,"value":126.5872485324},{"source":37,"target":46,"value":203.7209504863},{"source":38,"target":47,"value":75.9362364025},{"source":39,"target":48,"value":215.2562613126},{"source":39,"target":48,"value":59.3294441994},{"source":40,"target":49,"value":267.9686645594},{"source":43,"target":52,"value":244.5717119505},{"source":43,"target":52,"value":68.6279675632},{"source":44,"target":53,"value":98.7695796926},{"source":45,"target":54,"value":211.8819467775},{"source":45,"target":54,"value":214.2010705904},{"source":46,"target":55,"value":299.9937608276},{"source":46,"target":55,"value":66.6168843076},{"source":47,"target":56,"value":219.2984863516},{"source":48,"target":57,"value":228.1715917947},{"source":49,"target":58,"value":2.3725261643},{"source":49,"target":58,"value":227.3765908324},{"source":50,"target":59,"value":303.3129384182},{"source":51,"target":60,"value":229.5875026631},{"source":51,"target":60,"value":158.0959986024},{"source":52,"target":61,"value":412.5999952857},{"source":52,"target":61,"value":318.4781941616},{"source":53,"target":62,"value":112.7033970413},{"source":54,"target":63,"value":122.6023584036},{"source":54,"target":63,"value":279.4101561963},{"source":54,"target":63,"value":92.9633678608},{"source":55,"target":64,"value":176.5509872608},{"source":55,"target":64,"value":201.3827809641},{"source":55,"target":64,"value":251.2188273392},{"source":56,"target":65,"value":176.6550865374},{"source":56,"target":65,"value":67.4376502811},{"source":57,"target":66,"value":34.7206190499},{"source":58,"target":67,"value":375.7163761645},{"source":59,"target":68,"value":48.0381311263},{"source":60,"target":69,"value":22.1510573061},{"source":61,"target":70,"value":243.8612854987},{"source":61,"target":70,"value":130.3888775878},{"source":64,"target":73,"value":104.4115461381},{"source":65,"target":74,"value":68.6282522017},{"source":66,"target":75,"value":160.3221239048},{"source":67,"target":76,"value":226.5166963489},{"source":69,"target":78,"value":144.5070223747},{"source":71,"target":80,"value":116.8187357776},{"source":71,"target":80,"value":240.0205129375},{"source":72,"target":81,"value":300.0114315628},{"source":73,"target":82,"value":56.9197500171},{"source":73,"target":82,"value":2.1631596202},{"source":74,"target":83,"value":122.1170435813},{"source":74,"target":83,"value":103.3196025389},{"source":76,"target":85,"value":253.59132334},{"source":79,"target":88,"value":256.6358743283},{"source":79,"target":88,"value":104.8765190934},{"source":80,"target":89,"value":207.70936084},{"source":80,"target":89,"value":251.2469896908},{"source":80,"target":89,"value":228.639812306},{"source":82,"target":91,"value":295.4334771976},{"source":83,"target":92,"value":97.9132046917},{"source":83,"target":92,"value":76.0611373575},{"source":86,"target":95,"value":240.9006802335},{"source":86,"target":95,"value":134.3439084468},{"source":87,"target":96,"value":73.7253912141},{"source":87,"target":96,"value":339.9412022497},{"source":89,"target":98,"value":51.136483177},{"source":89,"target":98,"value":30.659161271},{"source":91,"target":100,"value":168.4485883829},{"source":93,"target":102,"value":183.4804473604},{"source":94,"target":103,"value":74.3676050374},{"source":94,"target":103,"value":51.1220873166},{"source":94,"target":103,"value":18.3979354698}'

In [107]:
with open("newlink.json", "w") as outfile:
    json.dump({'nodes':dict_train['nodes'], 'links':newlink}, outfile)

Test

Then:

  • rename it to "TopicSimilarity.json" and add JS codes;
  • create a data folder called "TEST", copy files from CHI to TEST;
  • replace the old "TopicSimilarity.json";
  • change all 3 function names to "*_TEST";
  • rename the new files

However, after several tests, the tool wouldn't open the new test set. The trigger might be somewhere in the scripts. But at least I know how the tool works and what each data point represents.