In [1]:
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
In [17]:
# read a test version (w/o JS codes) of TopicSimilarity.json
file = 'testSim.json'
with open(file) as train_file:
dict_train = json.load(train_file)
In [20]:
dict_train
Out[20]:
{'links': [{'source': 0, 'target': 14, 'value': 111.10588683198937},
{'source': 16, 'target': 11, 'value': 88.72795786804235},
{'source': 17, 'target': 14, 'value': 81.4638009166188},
{'source': 18, 'target': 10, 'value': 569.2500102128241},
{'source': 19, 'target': 14, 'value': 89.26014158750893},
{'source': 20, 'target': 11, 'value': 150.2149910819981},
{'source': 20, 'target': 14, 'value': 112.667176302661},
{'source': 21, 'target': 10, 'value': 167.7876087676736},
{'source': 24, 'target': 10, 'value': 207.71486059868357},
{'source': 27, 'target': 11, 'value': 218.00577671735817},
{'source': 28, 'target': 10, 'value': 176.95673061111427},
{'source': 28, 'target': 11, 'value': 162.37268157511465},
{'source': 29, 'target': 4, 'value': 103.52125451925266},
{'source': 29, 'target': 14, 'value': 115.56674761370896},
{'source': 1, 'target': 40, 'value': 170.41693241733225},
{'source': 3, 'target': 40, 'value': 118.7892640798752},
{'source': 4, 'target': 42, 'value': 141.56620924061926},
{'source': 7, 'target': 39, 'value': 103.0905737751473},
{'source': 10, 'target': 34, 'value': 238.6946228178874},
{'source': 11, 'target': 33, 'value': 124.246032917932},
{'source': 12, 'target': 34, 'value': 110.44221815916602},
{'source': 13, 'target': 33, 'value': 103.55134935882569},
{'source': 13, 'target': 35, 'value': 137.99276905471447},
{'source': 13, 'target': 43, 'value': 30.084548503925973},
{'source': 14, 'target': 32, 'value': 229.57624457457734},
{'source': 15, 'target': 44, 'value': 83.21830181179601},
{'source': 31, 'target': 46, 'value': 116.37472963459285},
{'source': 31, 'target': 55, 'value': 109.94928747752881},
{'source': 32, 'target': 54, 'value': 176.99531870481647},
{'source': 32, 'target': 57, 'value': 113.91169147037333},
{'source': 33, 'target': 51, 'value': 35.040861966492066},
{'source': 35, 'target': 46, 'value': 233.8913772071753},
{'source': 35, 'target': 47, 'value': 223.7481463549817},
{'source': 35, 'target': 48, 'value': 103.64731030622019},
{'source': 35, 'target': 49, 'value': 169.2415226120869},
{'source': 35, 'target': 52, 'value': 214.05183338494763},
{'source': 35, 'target': 58, 'value': 88.57978642908431},
{'source': 36, 'target': 50, 'value': 89.3508620532698},
{'source': 38, 'target': 46, 'value': 174.4605052363067},
{'source': 38, 'target': 47, 'value': 143.14487760260585},
{'source': 38, 'target': 49, 'value': 114.4001764407993},
{'source': 38, 'target': 52, 'value': 221.6030062865838},
{'source': 38, 'target': 58, 'value': 526.4023297325416},
{'source': 39, 'target': 46, 'value': 101.33633724361925},
{'source': 39, 'target': 47, 'value': 214.76607482478735},
{'source': 39, 'target': 53, 'value': 134.80050868036332},
{'source': 40, 'target': 57, 'value': 190.20227834963154},
{'source': 41, 'target': 57, 'value': 100.12603006760858},
{'source': 42, 'target': 45, 'value': 246.4159906066294},
{'source': 42, 'target': 46, 'value': 136.48867944965457},
{'source': 42, 'target': 56, 'value': 248.84349605924015},
{'source': 43, 'target': 47, 'value': 185.3138786628756},
{'source': 43, 'target': 59, 'value': 200.83163903556436},
{'source': 44, 'target': 46, 'value': 124.30713886740004},
{'source': 44, 'target': 54, 'value': 516.9597236302735},
{'source': 44, 'target': 55, 'value': 142.17280162946653},
{'source': 46, 'target': 68, 'value': 105.4163590419313},
{'source': 46, 'target': 73, 'value': 537.5768469469148},
{'source': 47, 'target': 66, 'value': 131.6031463832092},
{'source': 47, 'target': 67, 'value': 124.4425903744238},
{'source': 47, 'target': 70, 'value': 180.67865974360316},
{'source': 48, 'target': 60, 'value': 146.56760764475436},
{'source': 48, 'target': 68, 'value': 103.9980560737277},
{'source': 49, 'target': 68, 'value': 130.83035220806818},
{'source': 52, 'target': 67, 'value': 211.93454425622923},
{'source': 53, 'target': 61, 'value': 149.97368668619066},
{'source': 53, 'target': 70, 'value': 114.67302707657328},
{'source': 54, 'target': 61, 'value': 111.73929209768883},
{'source': 54, 'target': 68, 'value': 197.29442050296723},
{'source': 55, 'target': 73, 'value': 559.786631247631},
{'source': 57, 'target': 60, 'value': 178.1208960383366},
{'source': 57, 'target': 62, 'value': 228.06321247895625},
{'source': 57, 'target': 67, 'value': 191.42798152307972},
{'source': 57, 'target': 68, 'value': 167.96169523583245},
{'source': 57, 'target': 69, 'value': 188.77373019873525},
{'source': 57, 'target': 70, 'value': 246.63187841229944},
{'source': 58, 'target': 64, 'value': 100.24396193370141},
{'source': 59, 'target': 63, 'value': 100.51032342699445},
{'source': 60, 'target': 81, 'value': 170.43452234038475},
{'source': 60, 'target': 84, 'value': 170.41411636960206},
{'source': 60, 'target': 88, 'value': 103.10694453147072},
{'source': 61, 'target': 76, 'value': 107.54665502988767},
{'source': 61, 'target': 84, 'value': 132.83382297708553},
{'source': 61, 'target': 89, 'value': 150.6318110207267},
{'source': 62, 'target': 76, 'value': 106.19166114926524},
{'source': 62, 'target': 78, 'value': 33.640061798111084},
{'source': 62, 'target': 89, 'value': 160.86698722272433},
{'source': 63, 'target': 77, 'value': 103.67339487788212},
{'source': 63, 'target': 80, 'value': 196.06603890142748},
{'source': 63, 'target': 81, 'value': 161.19303385481967},
{'source': 63, 'target': 85, 'value': 106.7801851719701},
{'source': 64, 'target': 79, 'value': 107.14068118559734},
{'source': 65, 'target': 80, 'value': 112.23351046763155},
{'source': 65, 'target': 81, 'value': 135.55548630993036},
{'source': 65, 'target': 86, 'value': 164.02148464991373},
{'source': 65, 'target': 89, 'value': 133.78380030733194},
{'source': 66, 'target': 80, 'value': 134.3866698058844},
{'source': 66, 'target': 84, 'value': 241.16664828502542},
{'source': 66, 'target': 85, 'value': 218.28624779944218},
{'source': 66, 'target': 89, 'value': 228.34275083407616},
{'source': 67, 'target': 80, 'value': 142.2515353428355},
{'source': 67, 'target': 85, 'value': 247.80075553270868},
{'source': 67, 'target': 88, 'value': 186.6607082346655},
{'source': 68, 'target': 75, 'value': 196.06029169764162},
{'source': 68, 'target': 77, 'value': 225.06709493026727},
{'source': 68, 'target': 80, 'value': 35.66265869484768},
{'source': 68, 'target': 81, 'value': 185.86982521743062},
{'source': 68, 'target': 85, 'value': 183.74444596185415},
{'source': 68, 'target': 87, 'value': 243.9126728278684},
{'source': 69, 'target': 78, 'value': 534.9798894114062},
{'source': 70, 'target': 75, 'value': 587.3437043807792},
{'source': 70, 'target': 76, 'value': 504.0182396305203},
{'source': 70, 'target': 87, 'value': 36.33132723511851},
{'source': 71, 'target': 77, 'value': 102.82948605745688},
{'source': 73, 'target': 83, 'value': 80.9033759598084},
{'source': 75, 'target': 93, 'value': 124.22273855831607},
{'source': 75, 'target': 95, 'value': 227.07876969754187},
{'source': 76, 'target': 91, 'value': 109.66227198017285},
{'source': 76, 'target': 92, 'value': 87.25793804209955},
{'source': 76, 'target': 103, 'value': 149.41569680430362},
{'source': 77, 'target': 95, 'value': 228.3407599548334},
{'source': 77, 'target': 96, 'value': 502.5052237696251},
{'source': 77, 'target': 103, 'value': 178.85128293375172},
{'source': 78, 'target': 93, 'value': 106.76547274931222},
{'source': 78, 'target': 97, 'value': 134.08974412813834},
{'source': 79, 'target': 91, 'value': 118.62831181874081},
{'source': 79, 'target': 99, 'value': 162.14029403438616},
{'source': 79, 'target': 101, 'value': 184.82863682385673},
{'source': 80, 'target': 94, 'value': 128.0704543693566},
{'source': 80, 'target': 102, 'value': 183.2442424705959},
{'source': 82, 'target': 91, 'value': 136.45470202101453},
{'source': 83, 'target': 93, 'value': 502.27777192788386},
{'source': 83, 'target': 95, 'value': 170.6696441826099},
{'source': 83, 'target': 104, 'value': 30.304993448075667},
{'source': 84, 'target': 93, 'value': 146.74207704833344},
{'source': 84, 'target': 94, 'value': 102.38871152648865},
{'source': 85, 'target': 95, 'value': 177.2207724047664},
{'source': 88, 'target': 96, 'value': 162.49328798823188},
{'source': 88, 'target': 98, 'value': 170.05688259431696}],
'nodes': [{'name': '0_10', 'value': 26},
{'name': '1_13', 'value': 56},
{'name': '1_12', 'value': 77},
{'name': '1_11', 'value': 43},
{'name': '1_10', 'value': 64},
{'name': '1_14', 'value': 40},
{'name': '1_7', 'value': 36},
{'name': '1_6', 'value': 30},
{'name': '1_5', 'value': 22},
{'name': '1_4', 'value': 37},
{'name': '1_3', 'value': 32},
{'name': '1_2', 'value': 46},
{'name': '1_1', 'value': 20},
{'name': '1_0', 'value': 35},
{'name': '1_9', 'value': 53},
{'name': '1_8', 'value': 28},
{'name': '0_11', 'value': 35},
{'name': '0_12', 'value': 16},
{'name': '0_13', 'value': 13},
{'name': '0_14', 'value': 25},
{'name': '0_8', 'value': 5},
{'name': '0_9', 'value': 10},
{'name': '0_6', 'value': 13},
{'name': '0_7', 'value': 8},
{'name': '0_4', 'value': 10},
{'name': '0_5', 'value': 7},
{'name': '0_2', 'value': 4},
{'name': '0_3', 'value': 7},
{'name': '0_0', 'value': 2},
{'name': '0_1', 'value': 5},
{'name': '2_8', 'value': 97},
{'name': '2_9', 'value': 83},
{'name': '2_11', 'value': 72},
{'name': '2_0', 'value': 61},
{'name': '2_1', 'value': 71},
{'name': '2_2', 'value': 70},
{'name': '2_3', 'value': 66},
{'name': '2_4', 'value': 45},
{'name': '2_5', 'value': 69},
{'name': '2_6', 'value': 44},
{'name': '2_7', 'value': 63},
{'name': '2_12', 'value': 34},
{'name': '2_14', 'value': 43},
{'name': '2_13', 'value': 42},
{'name': '2_10', 'value': 43},
{'name': '3_1', 'value': 198},
{'name': '3_0', 'value': 119},
{'name': '3_3', 'value': 110},
{'name': '3_2', 'value': 112},
{'name': '3_5', 'value': 99},
{'name': '3_4', 'value': 73},
{'name': '3_7', 'value': 89},
{'name': '3_6', 'value': 75},
{'name': '3_9', 'value': 64},
{'name': '3_8', 'value': 78},
{'name': '3_14', 'value': 75},
{'name': '3_11', 'value': 92},
{'name': '3_10', 'value': 93},
{'name': '3_13', 'value': 82},
{'name': '3_12', 'value': 67},
{'name': '4_10', 'value': 528},
{'name': '4_13', 'value': 324},
{'name': '4_11', 'value': 337},
{'name': '4_14', 'value': 241},
{'name': '4_8', 'value': 252},
{'name': '4_9', 'value': 270},
{'name': '4_12', 'value': 260},
{'name': '4_2', 'value': 262},
{'name': '4_3', 'value': 190},
{'name': '4_0', 'value': 198},
{'name': '4_1', 'value': 194},
{'name': '4_6', 'value': 148},
{'name': '4_7', 'value': 180},
{'name': '4_4', 'value': 161},
{'name': '4_5', 'value': 153},
{'name': '5_14', 'value': 588},
{'name': '5_13', 'value': 495},
{'name': '5_12', 'value': 392},
{'name': '5_11', 'value': 343},
{'name': '5_10', 'value': 321},
{'name': '5_3', 'value': 405},
{'name': '5_2', 'value': 330},
{'name': '5_1', 'value': 486},
{'name': '5_0', 'value': 254},
{'name': '5_7', 'value': 279},
{'name': '5_6', 'value': 318},
{'name': '5_5', 'value': 266},
{'name': '5_4', 'value': 278},
{'name': '5_9', 'value': 193},
{'name': '5_8', 'value': 264},
{'name': '6_4', 'value': 23},
{'name': '6_5', 'value': 17},
{'name': '6_6', 'value': 25},
{'name': '6_7', 'value': 14},
{'name': '6_0', 'value': 16},
{'name': '6_1', 'value': 11},
{'name': '6_2', 'value': 6},
{'name': '6_3', 'value': 12},
{'name': '6_11', 'value': 38},
{'name': '6_10', 'value': 8},
{'name': '6_8', 'value': 17},
{'name': '6_9', 'value': 19},
{'name': '6_14', 'value': 5},
{'name': '6_12', 'value': 8},
{'name': '6_13', 'value': 15}]}
In [26]:
len(dict_train['links']), len(dict_train['nodes'])
Out[26]:
(139, 105)
In [31]:
links = pd.DataFrame(dict_train['links'])
nodes = pd.DataFrame(dict_train['nodes'])
In [35]:
links.head(2)
Out[35]:
source
target
value
0
0
14
111.105887
1
16
11
88.727958
In [34]:
links[links.value > 540]
Out[34]:
source
target
value
3
18
10
569.250010
69
55
73
559.786631
110
70
75
587.343704
In [53]:
nodes[(nodes.index == 10) | (nodes.index == 18) | (nodes.index == 55) | (nodes.index == 70) | (nodes.index == 73) | (nodes.index == 75)]
Out[53]:
name
value
10
1_3
32
18
0_13
13
55
3_14
75
70
4_1
194
73
4_4
161
75
5_14
588
Now we see that the values of "source" and "target" in links indicate the indexes of nodes. I haven't figured out what does "value" in nodes represent.
In [109]:
plt.scatter(links.source, links.target, alpha=0.5)
plt.title('node id: source to target')
Out[109]:
<matplotlib.text.Text at 0x24422a73320>
In [95]:
# generate "source" and "target" of 100 links
source = np.random.randint(105 - 10, size=100)
source = np.array(sorted(source))
target = source + np.random.randint(5,10)
In [96]:
source
Out[96]:
array([ 2, 6, 7, 7, 9, 9, 12, 14, 17, 20, 21, 23, 24, 25, 25, 25, 25,
25, 27, 28, 29, 30, 30, 31, 32, 32, 32, 33, 33, 33, 34, 35, 37, 38,
39, 39, 40, 43, 43, 44, 45, 45, 46, 46, 47, 48, 49, 49, 50, 51, 51,
52, 52, 53, 54, 54, 54, 55, 55, 55, 56, 56, 57, 58, 59, 60, 61, 61,
64, 65, 66, 67, 69, 71, 71, 72, 73, 73, 74, 74, 76, 79, 79, 80, 80,
80, 82, 83, 83, 86, 86, 87, 87, 89, 89, 91, 93, 94, 94, 94])
In [97]:
target
Out[97]:
array([ 11, 15, 16, 16, 18, 18, 21, 23, 26, 29, 30, 32, 33,
34, 34, 34, 34, 34, 36, 37, 38, 39, 39, 40, 41, 41,
41, 42, 42, 42, 43, 44, 46, 47, 48, 48, 49, 52, 52,
53, 54, 54, 55, 55, 56, 57, 58, 58, 59, 60, 60, 61,
61, 62, 63, 63, 63, 64, 64, 64, 65, 65, 66, 67, 68,
69, 70, 70, 73, 74, 75, 76, 78, 80, 80, 81, 82, 82,
83, 83, 85, 88, 88, 89, 89, 89, 91, 92, 92, 95, 95,
96, 96, 98, 98, 100, 102, 103, 103, 103])
In [65]:
links.value.describe()
Out[65]:
count 139.000000
mean 175.880108
std 112.358724
min 30.084549
25% 110.195753
50% 149.415697
75% 196.063165
max 587.343704
Name: value, dtype: float64
In [98]:
# generate 100 random values
value = np.random.normal(175.88, 112.36, size=100)
value = np.array([np.abs(i) for i in value])
In [99]:
value
Out[99]:
array([ 37.52558764, 73.47095167, 32.74254392, 120.31288379,
182.27911563, 33.42614662, 308.96586875, 225.1504056 ,
131.95522969, 226.66907637, 384.88254363, 254.4247618 ,
188.50315407, 286.64989475, 33.12730444, 44.52703707,
2.50956898, 286.14679979, 54.31273271, 46.51976845,
62.5626287 , 99.42049997, 277.84346708, 25.49595031,
210.94869112, 404.18935816, 234.18316809, 137.86415538,
165.90375921, 188.37292299, 63.60533858, 126.58724853,
203.72095049, 75.9362364 , 215.25626131, 59.3294442 ,
267.96866456, 244.57171195, 68.62796756, 98.76957969,
211.88194678, 214.20107059, 299.99376083, 66.61688431,
219.29848635, 228.17159179, 2.37252616, 227.37659083,
303.31293842, 229.58750266, 158.0959986 , 412.59999529,
318.47819416, 112.70339704, 122.6023584 , 279.4101562 ,
92.96336786, 176.55098726, 201.38278096, 251.21882734,
176.65508654, 67.43765028, 34.72061905, 375.71637616,
48.03813113, 22.15105731, 243.8612855 , 130.38887759,
104.41154614, 68.6282522 , 160.3221239 , 226.51669635,
144.50702237, 116.81873578, 240.02051294, 300.01143156,
56.91975002, 2.16315962, 122.11704358, 103.31960254,
253.59132334, 256.63587433, 104.87651909, 207.70936084,
251.24698969, 228.63981231, 295.4334772 , 97.91320469,
76.06113736, 240.90068023, 134.34390845, 73.72539121,
339.94120225, 51.13648318, 30.65916127, 168.44858838,
183.48044736, 74.36760504, 51.12208732, 18.39793547])
In [104]:
newlink = pd.DataFrame({'source':source, 'target':target, 'value':value}).to_json(orient='records')
In [103]:
newlink
Out[103]:
'{"source":2,"target":11,"value":37.5255876362},{"source":6,"target":15,"value":73.4709516679},{"source":7,"target":16,"value":32.7425439228},{"source":7,"target":16,"value":120.3128837894},{"source":9,"target":18,"value":182.2791156297},{"source":9,"target":18,"value":33.4261466158},{"source":12,"target":21,"value":308.9658687458},{"source":14,"target":23,"value":225.1504055995},{"source":17,"target":26,"value":131.9552296855},{"source":20,"target":29,"value":226.6690763689},{"source":21,"target":30,"value":384.8825436339},{"source":23,"target":32,"value":254.4247617997},{"source":24,"target":33,"value":188.5031540721},{"source":25,"target":34,"value":286.6498947501},{"source":25,"target":34,"value":33.1273044436},{"source":25,"target":34,"value":44.5270370673},{"source":25,"target":34,"value":2.5095689839},{"source":25,"target":34,"value":286.14679979},{"source":27,"target":36,"value":54.3127327083},{"source":28,"target":37,"value":46.5197684539},{"source":29,"target":38,"value":62.5626287015},{"source":30,"target":39,"value":99.4204999749},{"source":30,"target":39,"value":277.8434670752},{"source":31,"target":40,"value":25.495950307},{"source":32,"target":41,"value":210.9486911177},{"source":32,"target":41,"value":404.1893581638},{"source":32,"target":41,"value":234.1831680903},{"source":33,"target":42,"value":137.8641553834},{"source":33,"target":42,"value":165.9037592146},{"source":33,"target":42,"value":188.3729229904},{"source":34,"target":43,"value":63.6053385756},{"source":35,"target":44,"value":126.5872485324},{"source":37,"target":46,"value":203.7209504863},{"source":38,"target":47,"value":75.9362364025},{"source":39,"target":48,"value":215.2562613126},{"source":39,"target":48,"value":59.3294441994},{"source":40,"target":49,"value":267.9686645594},{"source":43,"target":52,"value":244.5717119505},{"source":43,"target":52,"value":68.6279675632},{"source":44,"target":53,"value":98.7695796926},{"source":45,"target":54,"value":211.8819467775},{"source":45,"target":54,"value":214.2010705904},{"source":46,"target":55,"value":299.9937608276},{"source":46,"target":55,"value":66.6168843076},{"source":47,"target":56,"value":219.2984863516},{"source":48,"target":57,"value":228.1715917947},{"source":49,"target":58,"value":2.3725261643},{"source":49,"target":58,"value":227.3765908324},{"source":50,"target":59,"value":303.3129384182},{"source":51,"target":60,"value":229.5875026631},{"source":51,"target":60,"value":158.0959986024},{"source":52,"target":61,"value":412.5999952857},{"source":52,"target":61,"value":318.4781941616},{"source":53,"target":62,"value":112.7033970413},{"source":54,"target":63,"value":122.6023584036},{"source":54,"target":63,"value":279.4101561963},{"source":54,"target":63,"value":92.9633678608},{"source":55,"target":64,"value":176.5509872608},{"source":55,"target":64,"value":201.3827809641},{"source":55,"target":64,"value":251.2188273392},{"source":56,"target":65,"value":176.6550865374},{"source":56,"target":65,"value":67.4376502811},{"source":57,"target":66,"value":34.7206190499},{"source":58,"target":67,"value":375.7163761645},{"source":59,"target":68,"value":48.0381311263},{"source":60,"target":69,"value":22.1510573061},{"source":61,"target":70,"value":243.8612854987},{"source":61,"target":70,"value":130.3888775878},{"source":64,"target":73,"value":104.4115461381},{"source":65,"target":74,"value":68.6282522017},{"source":66,"target":75,"value":160.3221239048},{"source":67,"target":76,"value":226.5166963489},{"source":69,"target":78,"value":144.5070223747},{"source":71,"target":80,"value":116.8187357776},{"source":71,"target":80,"value":240.0205129375},{"source":72,"target":81,"value":300.0114315628},{"source":73,"target":82,"value":56.9197500171},{"source":73,"target":82,"value":2.1631596202},{"source":74,"target":83,"value":122.1170435813},{"source":74,"target":83,"value":103.3196025389},{"source":76,"target":85,"value":253.59132334},{"source":79,"target":88,"value":256.6358743283},{"source":79,"target":88,"value":104.8765190934},{"source":80,"target":89,"value":207.70936084},{"source":80,"target":89,"value":251.2469896908},{"source":80,"target":89,"value":228.639812306},{"source":82,"target":91,"value":295.4334771976},{"source":83,"target":92,"value":97.9132046917},{"source":83,"target":92,"value":76.0611373575},{"source":86,"target":95,"value":240.9006802335},{"source":86,"target":95,"value":134.3439084468},{"source":87,"target":96,"value":73.7253912141},{"source":87,"target":96,"value":339.9412022497},{"source":89,"target":98,"value":51.136483177},{"source":89,"target":98,"value":30.659161271},{"source":91,"target":100,"value":168.4485883829},{"source":93,"target":102,"value":183.4804473604},{"source":94,"target":103,"value":74.3676050374},{"source":94,"target":103,"value":51.1220873166},{"source":94,"target":103,"value":18.3979354698}'
In [107]:
with open("newlink.json", "w") as outfile:
json.dump({'nodes':dict_train['nodes'], 'links':newlink}, outfile)
Then:
However, after several tests, the tool wouldn't open the new test set. The trigger might be somewhere in the scripts. But at least I know how the tool works and what each data point represents.
Content source: estepona/PERCEIVE-freddie
Similar notebooks: