In [9]:
import sys
import json
import os
sys.path.append("../")

from tag2hierarchy.hierarchy import HTMLPLOT
from tag2hierarchy.hierarchy import tree2Dict
from tag2hierarchy.hierarchy import treeHandlers

from tag2hierarchy.inference import training

In [4]:
os.listdir("../data")


Out[4]:
['moreTuples.json',
 'myTree.json',
 'biology_tags.txt',
 'myTree2.json',
 'molreTuples.json']

In [7]:
TagTuples = [a.split() for a in open("../data/biology_tags.txt","r").read().split("\n")]

In [8]:
TagTuples[:10]


Out[8]:
[['ribosome', 'binding-sites', 'translation', 'synthetic-biology'],
 ['rna', 'biochemistry'],
 ['immunology', 'cell-biology', 'hematology'],
 ['cell-culture'],
 ['splicing', 'mrna', 'spliceosome', 'introns', 'exons'],
 ['dna', 'biochemistry', 'molecular-biology'],
 ['neuroscience', 'synapses'],
 ['plasmids'],
 ['molecular-genetics', 'gene-expression', 'experimental-design'],
 ['evolution', 'mitochondria', 'chloroplasts']]

In [10]:
allTrees,homeless, parentToDescendantStats, bestParentToDescendantStats = training.obtainTreeNoise2(TagTuples, 0.2)

In [12]:
HTMLPLOT.vizualizeObjectTree("../visualization/",allTrees, "stackExchangeBiology")