In [2]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [39]:
import nltk
from tethne.readers import zotero
import matplotlib.pyplot as plt
from helpers import normalize_token, filter_token
import networkx as nx
from itertools import groupby

In [110]:
from nltk.corpus import stopwords

wordnet = nltk.WordNetLemmatizer()
stoplist = stopwords.words('english')

In [580]:
def filter_token(token):
    """
    Evaluate whether or not to retain ``token``.

    Parameters
    ----------
    token : str

    Returns
    -------
    keep : bool
    """
    token = token.lower()
    return token not in stoplist and token.isalpha() and len(token) > 1

1.5. Colocates: Scaling up

In a previous notebook, we used Pointwise Mutual Information to identify N-grams that were likely to constitute multi-word phrases. In this notebook, we'll use a similar procedure to examine patterns co-occurrence among features on somewhat larger scales.


In [24]:
text_root = '../../data/SystemsBiology/'
documents = nltk.corpus.PlaintextCorpusReader(text_root, '.+.txt')

In [4]:
import pandas as pd

In [17]:
df = pd.read_csv('../../data/systemsBiologyTerms.csv')

Windows


In [28]:
bigram_measures = nltk.collocations.BigramAssocMeasures()

In [625]:
sortfunc = lambda t: t[0]
tokens_by_year = groupby(sorted([(int(fileid[:4]), token.lower())
                  for fileid in documents.fileids()
                  for token in documents.words(fileids=[fileid])], key=sortfunc), sortfunc)

In [626]:
bigrams_by_year = []
for year, tagged_tokens in tokens_by_year:
    _, tokens = zip(*tagged_tokens)
    finder = nltk.collocations.BigramCollocationFinder.from_words(tokens, window_size=5)
    finder.apply_freq_filter(2)
    finder.apply_word_filter(lambda token: not filter_token(token))
    bigrams_by_year.append((year, finder.score_ngrams(bigram_measures.pmi)))

In [632]:
bigrams_by_year[0][1][::-1]


Out[632]:
[((u'systems', u'systems'), -1.0400875179763336),
 ((u'biology', u'systems'), -0.5271362621705649),
 ((u'networks', u'systems'), -0.05140283120416811),
 ((u'model', u'systems'), 0.3636346680746776),
 ((u'biology', u'biology'), 0.5707774943563582),
 ((u'model', u'ab'), 0.7824637465158943),
 ((u'cell', u'ab'), 0.7824637465158943),
 ((u'ab', u'cell'), 0.7824637465158943),
 ((u'biological', u'ab'), 0.8438642911800365),
 ((u'ab', u'biological'), 0.8438642911800365),
 ((u'cell', u'biology'), 0.8765859238804445),
 ((u'functional', u'systems'), 0.9042030494373794),
 ((u'networks', u'networks'), 0.9372818555679991),
 ((u'biology', u'biological'), 0.9379864685445867),
 ((u'systems', u'understanding'), 0.9485971687958319),
 ((u'ab', u'networks'), 0.9523887479582065),
 ((u'systems', u'models'), 0.994400858408957),
 ((u'systems', u'disease'), 1.041706573187314),
 ((u'ab', u'expression'), 1.0454981523496887),
 ((u'systems', u'using'), 1.090616173668261),
 ((u'information', u'systems'), 1.090616173668261),
 ((u'biological', u'data'), 1.1008369442266304),
 ((u'approach', u'biology'), 1.1396203297142389),
 ((u'systems', u'ab'), 1.186096482522487),
 ((u'pathways', u'systems'), 1.1937096666323637),
 ((u'ab', u'biology'), 1.2136209111580119),
 ((u'cell', u'biological'), 1.2437948980686748),
 ((u'systems', u'health'), 1.248157450654741),
 ((u'functional', u'ab'), 1.3230321278785961),
 ((u'analysis', u'networks'), 1.3523193548468413),
 ((u'systems', u'response'), 1.3636346680746776),
 ((u'ab', u'gene'), 1.3977998962805689),
 ((u'ab', u'models'), 1.4132299368501755),
 ((u'functional', u'biology'), 1.4171543052431463),
 ((u'regulatory', u'systems'), 1.4250352127388197),
 ((u'important', u'systems'), 1.4250352127388197),
 ((u'new', u'biology'), 1.4615484246016006),
 ((u'study', u'systems'), 1.4891655501585337),
 ((u'systems', u'genomics'), 1.4891655501585355),
 ((u'genomics', u'systems'), 1.4891655501585355),
 ((u'approach', u'biological'), 1.5068293039024674),
 ((u'environmental', u'ab'), 1.5094452521094794),
 ((u'systems', u'new'), 1.5335596695169862),
 ((u'biological', u'network'), 1.5433551799275804),
 ((u'ab', u'systems'), 1.5486665619071935),
 ((u'computational', u'systems'), 1.556279746017072),
 ((u'analysis', u'data'), 1.6243989002836425),
 ((u'cellular', u'systems'), 1.6266690739084702),
 ((u'expression', u'ab'), 1.630460653070843),
 ((u'biology', u'ab'), 1.6990477383282556),
 ((u'ab', u'protein'), 1.7235700574623252),
 ((u'model', u'cell'), 1.7673568541256852),
 ((u'ab', u'analysis'), 1.7824637465158943),
 ((u'study', u'biological'), 1.7843632794313766),
 ((u'biological', u'networks'), 1.8287573987898291),
 ((u'expression', u'data'), 1.8874333061174369),
 ((u'genes', u'genes'), 1.890157943453973),
 ((u'systems', u'one'), 1.948597168795832),
 ((u'human', u'biology'), 2.0021168059643006),
 ((u'genomics', u'biology'), 2.002116805964304),
 ((u'biology', u'genomics'), 2.002116805964304),
 ((u'expression', u'analysis'), 2.0303912599594796),
 ((u'computational', u'biology'), 2.069231001822841),
 ((u'biology', u'computational'), 2.069231001822841),
 ((u'expression', u'genes'), 2.0917918046236235),
 ((u'biological', u'health'), 2.1283176806487383),
 ((u'cellular', u'biology'), 2.139620329714239),
 ((u'pathways', u'networks'), 2.182394353404531),
 ((u'networks', u'pathways'), 2.182394353404531),
 ((u'data', u'new'), 2.2093614010047986),
 ((u'signaling', u'systems'), 2.248157450654741),
 ((u'rights', u'systems'), 2.248157450654741),
 ((u'reserved', u'systems'), 2.248157450654741),
 ((u'dynamics', u'systems'), 2.248157450654741),
 ((u'ab', u'molecular'), 2.2799634059867113),
 ((u'approach', u'expression'), 2.293425665793274),
 ((u'ab', u'human'), 2.323032127878596),
 ((u'data', u'using'), 2.3513804058772276),
 ((u'model', u'model'), 2.3523193548468395),
 ((u'systems', u'integration'), 2.363634668074676),
 ((u'gene', u'analysis'), 2.3826930038903598),
 ((u'understanding', u'biological'), 2.4137198995109834),
 ((u'cell', u'genes'), 2.4137198995109834),
 ((u'networks', u'important'), 2.413719899510987),
 ((u'biological', u'studies'), 2.4364399760110693),
 ((u'biological', u'knowledge'), 2.4364399760110693),
 ((u'ab', u'genomic'), 2.4605356516285326),
 ((u'biology', u'approach'), 2.461548424601599),
 ((u'biology', u'one'), 2.4615484246016006),
 ((u'biology', u'new'), 2.4615484246016006),
 ((u'regulation', u'systems'), 2.4891655501585355),
 ((u'using', u'model'), 2.4943383597192703),
 ((u'understanding', u'networks'), 2.5222443562891534),
 ((u'biological', u'system'), 2.5557389043834124),
 ((u'models', u'networks'), 2.5680480459022785),
 ((u'approach', u'study'), 2.570959641322183),
 ((u'approach', u'functional'), 2.570959641322183),
 ((u'function', u'biological'), 2.580829885346244),
 ((u'model', u'based'), 2.597431852683375),
 ((u'based', u'model'), 2.597431852683375),
 ((u'genes', u'proteins'), 2.6063649774533815),
 ((u'network', u'functional'), 2.607485517347296),
 ((u'ab', u'genome'), 2.612538745073582),
 ((u'data', u'analysis'), 2.6243989002836425),
 ((u'systems', u'paper'), 2.62666907390847),
 ((u'simulations', u'systems'), 2.62666907390847),
 ((u'paper', u'systems'), 2.62666907390847),
 ((u'systems', u'approach'), 2.626669073908472),
 ((u'cellular', u'ab'), 2.630460653070843),
 ((u'approach', u'gene'), 2.6457274097241523),
 ((u'biology', u'growth'), 2.654193502543997),
 ((u'ab', u'scale'), 2.666986529095958),
 ((u'ab', u'mathematical'), 2.666986529095958),
 ((u'gene', u'network'), 2.682253285749267),
 ((u'model', u'experimental'), 2.708463165072118),
 ((u'systems', u'level'), 2.7262047474593825),
 ((u'chemical', u'systems'), 2.7262047474593825),
 ((u'reserved', u'biology'), 2.761108706460508),
 ((u'multicellular', u'systems'), 2.7786721673535215),
 ((u'process', u'ab'), 2.7824637465158943),
 ((u'effects', u'systems'), 2.8331199513758953),
 ((u'biology', u'discovery'), 2.8765859238804445),
 ((u'regulatory', u'genes'), 2.890157943453973),
 ((u'genes', u'high'), 2.890157943453973),
 ((u'tools', u'ab'), 2.907994628599754),
 ((u'including', u'ab'), 2.907994628599754),
 ((u'ab', u'dna'), 2.907994628599754),
 ((u'networks', u'one'), 2.937281855567999),
 ((u'systems', u'map'), 2.948597168795832),
 ((u'systems', u'diverse'), 2.948597168795832),
 ((u'role', u'systems'), 2.948597168795832),
 ((u'diverse', u'systems'), 2.948597168795832),
 ((u'metabolic', u'network'), 2.951439918564656),
 ((u'analysis', u'studies'), 2.9600019320680815),
 ((u'well', u'data'), 2.9614338875612134),
 ((u'gene', u'networks'), 2.967655504611516),
 ((u'protein', u'expression'), 2.971497570905912),
 ((u'study', u'biology'), 3.0021168059643006),
 ((u'networks', u'biochemical'), 3.0303912599594796),
 ((u'environmental', u'human'), 3.034906741081974),
 ((u'cells', u'ab'), 3.045498152349687),
 ((u'drug', u'ab'), 3.0454981523496887),
 ((u'ab', u'paper'), 3.0454981523496887),
 ((u'use', u'gene'), 3.060764909002998),
 ((u'analysis', u'using'), 3.0793008604404246),
 ((u'proteins', u'functional'), 3.085532814151941),
 ((u'human', u'proteins'), 3.085532814151941),
 ((u'light', u'genes'), 3.0917918046236235),
 ((u'important', u'approach'), 3.0917918046236235),
 ((u'expression', u'high'), 3.0917918046236235),
 ((u'model', u'many'), 3.104391841403258),
 ((u'gene', u'using'), 3.109674509483943),
 ((u'regulatory', u'network'), 3.1283176806487383),
 ((u'complex', u'networks'), 3.129926933510392),
 ((u'structure', u'networks'), 3.1299269335103954),
 ((u'gene', u'function'), 3.1347654904467763),
 ((u'systems', u'approaches'), 3.1412422467382264),
 ((u'complex', u'systems'), 3.1412422467382264),
 ((u'point', u'systems'), 3.141242246738228),
 ((u'experimental', u'data'), 3.1504677119512294),
 ((u'many', u'genes'), 3.1657923860674),
 ((u'genes', u'provide'), 3.1657923860674),
 ((u'environmental', u'disease'), 3.1724102648319086),
 ((u'genome', u'networks'), 3.182394353404531),
 ((u'used', u'study'), 3.1924480180684522),
 ((u'conditions', u'ab'), 3.1975012457947383),
 ((u'ab', u'recent'), 3.1975012457947383),
 ((u'emerging', u'systems'), 3.2116315746296245),
 ((u'metabolic', u'networks'), 3.2368421374269047),
 ((u'process', u'biological'), 3.243794898068673),
 ((u'study', u'protein'), 3.2490315464348214),
 ((u'quantitative', u'analysis'), 3.264856513596504),
 ((u'metabolism', u'model'), 3.264856513596504),
 ((u'high', u'data'), 3.2707619456689407),
 ((u'central', u'biology'), 3.2916234231592885),
 ((u'gene', u'protein'), 3.3237993148367906),
 ((u'using', u'approach'), 3.342335266274219),
 ((u'expression', u'using'), 3.342335266274219),
 ((u'pathway', u'model'), 3.352319354846843),
 ((u'systems', u'parallel'), 3.363634668074676),
 ((u'systems', u'cardiovascular'), 3.363634668074676),
 ((u'living', u'systems'), 3.363634668074676),
 ((u'central', u'systems'), 3.363634668074676),
 ((u'health', u'environmental'), 3.3788611422993338),
 ((u'one', u'gene'), 3.3826930038903615),
 ((u'understanding', u'regulatory'), 3.413719899510985),
 ((u'understanding', u'genetic'), 3.413719899510985),
 ((u'complex', u'biological'), 3.4364399760110693),
 ((u'model', u'organisms'), 3.4454287592383235),
 ((u'ab', u'major'), 3.4605356516285326),
 ((u'role', u'biology'), 3.4615484246016006),
 ((u'biology', u'map'), 3.4615484246016006),
 ((u'biology', u'applications'), 3.4615484246016006),
 ((u'understanding', u'human'), 3.477850236930699),
 ((u'understanding', u'processes'), 3.4778502369307027),
 ((u'networks', u'control'), 3.4778502369307027),
 ((u'health', u'based'), 3.4819546352634383),
 ((u'model', u'system'), 3.4943383597192703),
 ((u'problem', u'biological'), 3.5068293039024674),
 ((u'molecular', u'expression'), 3.5278909194302983),
 ((u'dynamic', u'systems'), 3.533559669516986),
 ((u'based', u'protein'), 3.5385381636298057),
 ((u'new', u'approaches'), 3.5449644327892393),
 ((u'level', u'model'), 3.5449644327892393),
 ((u'cell', u'structure'), 3.5449644327892393),
 ((u'cell', u'growth'), 3.5449644327892393),
 ((u'based', u'analysis'), 3.597431852683375),
 ((u'chemical', u'biological'), 3.6063649774533797),
 ((u'regulatory', u'proteins'), 3.6063649774533815),
 ((u'biochemical', u'networks'), 3.615353760680634),
 ((u'new', u'methods'), 3.6153537606806374),
 ((u'systems', u'essential'), 3.62666907390847),
 ((u'ab', u'problem'), 3.630460653070843),
 ((u'mathematical', u'model'), 3.6518796367057504),
 ((u'mathematical', u'analysis'), 3.6518796367057504),
 ((u'biology', u'approaches'), 3.654193502543995),
 ((u'point', u'biology'), 3.654193502543997),
 ((u'regulatory', u'pathways'), 3.658832397347517),
 ((u'using', u'approaches'), 3.6869834376616666),
 ((u'well', u'new'), 3.689354342124412),
 ((u'genetic', u'network'), 3.7132801813698926),
 ((u'effects', u'biological'), 3.7132801813698926),
 ((u'dynamics', u'biological'), 3.7132801813698926),
 ((u'emerging', u'biology'), 3.724582830435393),
 ((u'systems', u'cebs'), 3.7262047474593825),
 ((u'biochemical', u'network'), 3.744989041097231),
 ((u'biological', u'systems'), 3.7469633076261815),
 ((u'data', u'databases'), 3.749929782367502),
 ((u'new', u'research'), 3.767356854125685),
 ((u'model', u'parameters'), 3.767356854125685),
 ((u'regulatory', u'protein'), 3.7698637097362617),
 ((u'ab', u'years'), 3.7824637465158943),
 ((u'based', u'approaches'), 3.790076930625771),
 ((u'signaling', u'networks'), 3.821804638148061),
 ((u'biological', u'complexity'), 3.828757398789831),
 ((u'information', u'function'), 3.831373346996841),
 ((u'global', u'network'), 3.8445247146481467),
 ((u'quantitative', u'understanding'), 3.84981901431766),
 ((u'gene', u'regulatory'), 3.8591310478333494),
 ((u'models', u'used'), 3.867608327761186),
 ((u'biology', u'modular'), 3.8765859238804445),
 ((u'biology', u'cardiovascular'), 3.8765859238804445),
 ((u'new', u'experimental'), 3.878388166514428),
 ((u'data', u'public'), 3.887433306117437),
 ((u'regulatory', u'genetic'), 3.890157943453973),
 ((u'human', u'understanding'), 3.892887736209545),
 ((u'analysis', u'tools'), 3.892887736209545),
 ((u'metabolic', u'pathways'), 3.896992134542282),
 ((u'reaction', u'networks'), 3.937281855567999),
 ((u'organs', u'systems'), 3.9485971687958337),
 ((u'signaling', u'network'), 3.951439918564658),
 ((u'biological', u'processes'), 3.954288280873687),
 ((u'identify', u'genes'), 3.9542882808736888),
 ((u'genetic', u'interactions'), 3.9542882808736888),
 ((u'experimental', u'results'), 3.971497570905912),
 ((u'human', u'disease'), 3.985997140601027),
 ((u'ab', u'arrays'), 3.9889146239833178),
 ((u'genetic', u'networks'), 3.9986824002321413),
 ((u'expression', u'parameters'), 4.0303912599594796),
 ((u'environment', u'cell'), 4.0303912599594796),
 ((u'cell', u'environment'), 4.0303912599594796),
 ((u'analysis', u'coli'), 4.0303912599594796),
 ((u'human', u'environmental'), 4.034906741081974),
 ((u'patterns', u'gene'), 4.060764909003),
 ((u'genomics', u'approaches'), 4.085532814151941),
 ((u'individual', u'genes'), 4.0917918046236235),
 ((u'response', u'provide'), 4.104391841403256),
 ((u'two', u'experimental'), 4.123500664350962),
 ((u'new', u'science'), 4.129926933510395),
 ((u'human', u'genome'), 4.138000234046077),
 ((u'using', u'high'), 4.1407014051045685),
 ((u'field', u'systems'), 4.141242246738228),
 ((u'cell', u'regulatory'), 4.150685493677191),
 ((u'silico', u'analysis'), 4.182394353404529),
 ((u'understand', u'functional'), 4.192448018068454),
 ((u'network', u'behavior'), 4.192448018068454),
 ((u'metabolic', u'control'), 4.192448018068454),
 ((u'integration', u'data'), 4.209361401004797),
 ((u'systems', u'humans'), 4.2116315746296245),
 ((u'results', u'studies'), 4.223036337901876),
 ((u'used', u'properties'), 4.2368421374269065),
 ((u'scale', u'networks'), 4.2368421374269065),
 ((u'field', u'biology'), 4.239156003265151),
 ((u'understanding', u'function'), 4.2743168428455665),
 ((u'genome', u'organisms'), 4.275503757796013),
 ((u'mathematical', u'models'), 4.282645827040032),
 ((u'patterns', u'expression'), 4.293425665793274),
 ((u'changes', u'expression'), 4.293425665793274),
 ((u'genomics', u'research'), 4.307925235488389),
 ((u'network', u'simulations'), 4.329951541818389),
 ((u'disease', u'specific'), 4.329951541818389),
 ((u'structural', u'networks'), 4.352319354846843),
 ((u'reaction', u'model'), 4.352319354846843),
 ((u'starting', u'systems'), 4.363634668074676),
 ((u'platform', u'systems'), 4.363634668074676),
 ((u'advent', u'systems'), 4.363634668074676),
 ((u'values', u'ab'), 4.36742624723705),
 ((u'structure', u'based'), 4.375039431346925),
 ((u'computational', u'modeling'), 4.375039431346925),
 ((u'single', u'gene'), 4.38269300389036),
 ((u'identifying', u'gene'), 4.3826930038903615),
 ((u'biological', u'reaction'), 4.413719899510985),
 ((u'regulatory', u'networks'), 4.413719899510987),
 ((u'protein', u'interactions'), 4.418956547877132),
 ((u'modular', u'biology'), 4.461548424601599),
 ((u'define', u'metabolic'), 4.481954635263438),
 ((u'development', u'methods'), 4.499876543260699),
 ((u'human', u'studies'), 4.500570313430785),
 ((u'insight', u'biological'), 4.506829303902467),
 ((u'gene', u'regulation'), 4.508223885974219),
 ((u'metabolic', u'reactions'), 4.536402419285812),
 ((u'potential', u'health'), 4.536402419285814),
 ((u'death', u'genes'), 4.539250781594843),
 ((u'human', u'risk'), 4.570959641322183),
 ((u'point', u'gene'), 4.575338081832758),
 ((u'protein', u'levels'), 4.592985947652183),
 ((u'experimental', u'techniques'), 4.592985947652183),
 ((u'systems', u'biology'), 4.602146754774399),
 ((u'identification', u'genes'), 4.6063649774533815),
 ((u'high', u'level'), 4.6063649774533815),
 ((u'groups', u'genes'), 4.6063649774533815),
 ((u'genes', u'groups'), 4.6063649774533815),
 ((u'biological', u'cebs'), 4.6063649774533815),
 ((u'understanding', u'components'), 4.615353760680637),
 ((u'environmental', u'however'), 4.61986924180313),
 ((u'complex', u'organisms'), 4.63807383718072),
 ((u'complex', u'biochemical'), 4.63807383718072),
 ((u'health', u'disease'), 4.651879636705749),
 ((u'metabolic', u'engineering'), 4.65187963670575),
 ((u'development', u'novel'), 4.65187963670575),
 ((u'genome', u'metabolic'), 4.704347056599886),
 ((u'genomic', u'methods'), 4.708463165072118),
 ((u'important', u'understand'), 4.713280181369893),
 ((u'expression', u'regulation'), 4.7408846427644935),
 ((u'cellular', u'processes'), 4.7408846427644935),
 ((u'environmental', u'toxicology'), 4.757372765553065),
 ((u'components', u'system'), 4.757372765553065),
 ((u'induced', u'cell'), 4.767356854125683),
 ((u'new', u'bioinformatics'), 4.767356854125687),
 ((u'constraint', u'analysis'), 4.767356854125687),
 ((u'complete', u'model'), 4.767356854125687),
 ((u'human', u'health'), 4.7774105187896065),
 ((u'used', u'including'), 4.77741051878961),
 ((u'understand', u'processes'), 4.77741051878961),
 ((u'processes', u'integrated'), 4.77741051878961),
 ((u'ab', u'issue'), 4.782463746515894),
 ((u'ab', u'advent'), 4.782463746515894),
 ((u'focused', u'gene'), 4.7977305031692055),
 ((u'powerful', u'approach'), 4.807998838623034),
 ((u'individual', u'proteins'), 4.807998838623034),
 ((u'approach', u'toward'), 4.807998838623034),
 ((u'high', u'microarray'), 4.828757398789831),
 ((u'generate', u'biological'), 4.828757398789831),
 ((u'biological', u'second'), 4.828757398789831),
 ((u'metabolic', u'profiling'), 4.844524714648147),
 ((u'complex', u'reactions'), 4.844524714648147),
 ((u'public', u'based'), 4.860466258517167),
 ((u'genomic', u'research'), 4.860466258517167),
 ((u'platform', u'biology'), 4.8765859238804445),
 ((u'paradigm', u'biology'), 4.8765859238804445),
 ((u'circadian', u'biology'), 4.8765859238804445),
 ((u'biology', u'medicine'), 4.8765859238804445),
 ((u'advent', u'biology'), 4.8765859238804445),
 ((u'quality', u'data'), 4.887433306117437),
 ((u'functional', u'relationships'), 4.892887736209545),
 ((u'functional', u'food'), 4.892887736209545),
 ((u'national', u'environmental'), 4.909375858998114),
 ((u'theme', u'systems'), 4.948597168795833),
 ((u'systems', u'exponential'), 4.948597168795833),
 ((u'knowledge', u'discovery'), 4.960001932068082),
 ((u'environmental', u'health'), 4.963823643020488),
 ((u'experimental', u'design'), 4.97149757090591),
 ((u'may', u'provide'), 4.988914623983321),
 ((u'simulation', u'pathways'), 5.012469351962218),
 ((u'reductionist', u'approach'), 5.03039125995948),
 ((u'propose', u'approach'), 5.03039125995948),
 ((u'parallel', u'expression'), 5.03039125995948),
 ((u'model', u'fluctuations'), 5.03039125995948),
 ((u'expression', u'thousands'), 5.03039125995948),
 ((u'expression', u'profiles'), 5.03039125995948),
 ((u'molecular', u'level'), 5.0424640922600545),
 ((u'multiple', u'pathways'), 5.066917135984593),
 ((u'metabolic', u'computed'), 5.066917135984594),
 ((u'identifying', u'environmental'), 5.079300860440427),
 ((u'environmental', u'sciences'), 5.079300860440427),
 ((u'proteomic', u'approaches'), 5.085532814151942),
 ((u'human', u'population'), 5.085532814151942),
 ((u'differentially', u'genes'), 5.0917918046236235),
 ((u'sensitivity', u'analysis'), 5.129926933510394),
 ((u'map', u'proteins'), 5.129926933510395),
 ((u'use', u'proteomic'), 5.155922142043338),
 ((u'proteomic', u'methods'), 5.155922142043338),
 ((u'advances', u'genomics'), 5.155922142043338),
 ((u'expressed', u'genes'), 5.191327478174536),
 ((u'developmental', u'networks'), 5.200316261401792),
 ((u'data', u'obtained'), 5.209361401004797),
 ((u'areas', u'data'), 5.2093614010047995),
 ((u'signal', u'cells'), 5.223036337901877),
 ((u'reaction', u'network'), 5.236842137426905),
 ((u'health', u'sciences'), 5.2368421374269065),
 ((u'time', u'parameters'), 5.264856513596504),
 ((u'using', u'nmr'), 5.271945938382823),
 ((u'associated', u'information'), 5.271945938382823),
 ((u'light', u'carbon'), 5.293425665793275),
 ((u'expression', u'sets'), 5.293425665793275),
 ((u'expression', u'conventional'), 5.293425665793275),
 ((u'advances', u'methods'), 5.293425665793275),
 ((u'genomics', u'proteomics'), 5.30792523548839),
 ((u'network', u'units'), 5.329951541818388),
 ((u'cv', u'network'), 5.329951541818388),
 ((u'irradiation', u'cell'), 5.352319354846841),
 ((u'predicted', u'model'), 5.352319354846843),
 ((u'model', u'aspects'), 5.352319354846843),
 ((u'alternative', u'model'), 5.352319354846843),
 ((u'rights', u'ab'), 5.367426247237049),
 ((u'reserved', u'ab'), 5.367426247237049),
 ((u'ab', u'motivation'), 5.367426247237051),
 ((u'gene', u'expression'), 5.38269300389036),
 ((u'programs', u'gene'), 5.3826930038903615),
 ((u'computational', u'models'), 5.398123044459968),
 ((u'thousands', u'genes'), 5.413719899510985),
 ((u'genes', u'involved'), 5.413719899510985),
 ((u'understand', u'signal'), 5.429487215369303),
 ((u'diet', u'health'), 5.429487215369303),
 ((u'one', u'time'), 5.434781515038813),
 ((u'transduction', u'cells'), 5.445428759238324),
 ((u'transcription', u'networks'), 5.451855028397755),
 ((u'theme', u'biology'), 5.4615484246016015),
 ((u'biology', u'exponential'), 5.4615484246016015),
 ((u'biology', u'despite'), 5.4615484246016015),
 ((u'integrating', u'data'), 5.472395806838591),
 ((u'physiological', u'processes'), 5.477850236930702),
 ((u'environmental', u'conditions'), 5.4943383597192685),
 ((u'using', u'throughput'), 5.49433835971927),
 ((u'obtained', u'using'), 5.49433835971927),
 ((u'information', u'relevant'), 5.49433835971927),
 ((u'public', u'health'), 5.499876543260699),
 ((u'light', u'signaling'), 5.499876543260699),
 ((u'renal', u'function'), 5.5194293406821),
 ((u'scale', u'metabolic'), 5.536402419285812),
 ((u'mathematical', u'dynamics'), 5.536402419285815),
 ((u'organisms', u'including'), 5.570959641322184),
 ((u'known', u'pathways'), 5.597431852683375),
 ((u'programs', u'expression'), 5.615353760680637),
 ((u'integrative', u'approach'), 5.615353760680637),
 ((u'differential', u'expression'), 5.615353760680637),
 ((u'coordinated', u'expression'), 5.615353760680637),
 ((u'comprehensive', u'understanding'), 5.615353760680637),
 ((u'data', u'mining'), 5.624398900283643),
 ((u'national', u'health'), 5.651879636705749),
 ((u'mathematical', u'modeling'), 5.651879636705749),
 ((u'understand', u'transduction'), 5.65187963670575),
 ((u'novel', u'techniques'), 5.65187963670575),
 ((u'network', u'architecture'), 5.65187963670575),
 ((u'metabolic', u'toxicological'), 5.65187963670575),
 ((u'metabolic', u'fingerprint'), 5.65187963670575),
 ((u'integration', u'arrays'), 5.65187963670575),
 ((u'integrated', u'functions'), 5.65187963670575),
 ((u'structure', u'behavior'), 5.670495314873097),
 ((u'reaction', u'many'), 5.689354342124413),
 ((u'light', u'plants'), 5.708463165072118),
 ((u'thousands', u'protein'), 5.708463165072119),
 ((u'major', u'changes'), 5.708463165072119),
 ((u'individual', u'biochemical'), 5.708463165072119),
 ((u'heart', u'disease'), 5.708463165072119),
 ((u'exposure', u'disease'), 5.708463165072119),
 ((u'coli', u'cells'), 5.708463165072119),
 ((u'biochemical', u'simulations'), 5.708463165072119),
 ((u'toward', u'understanding'), 5.71488943423155),
 ((u'sensory', u'networks'), 5.71488943423155),
 ((u'problems', u'environmental'), 5.757372765553065),
 ((u'information', u'processing'), 5.757372765553065),
 ((u'component', u'analysis'), 5.767356854125686),
 ((u'cell', u'differentiation'), 5.767356854125686),
 ((u'specific', u'metabolites'), 5.77741051878961),
 ((u'signaling', u'set'), 5.77741051878961),
 ((u'identify', u'specific'), 5.77741051878961),
 ((u'gene', u'products'), 5.7977305031692055),
 ((u'toxicity', u'chemical'), 5.807998838623033),
 ((u'science', u'toxicology'), 5.807998838623033),
 ((u'results', u'presented'), 5.807998838623033),
 ((u'high', u'temporal'), 5.828757398789829),
 ((u'genes', u'together'), 5.828757398789829),
 ((u'antioxidant', u'pathways'), 5.860466258517167),
 ((u'activity', u'function'), 5.881999420066809),
 ((u'data', u'sets'), 5.887433306117437),
 ((u'comparative', u'functional'), 5.892887736209544),
 ((u'functional', u'foods'), 5.892887736209546),
 ((u'network', u'motifs'), 5.914914042539542),
 ((u'individual', u'reactions'), 5.914914042539545),
 ((u'health', u'risk'), 5.914914042539545),
 ((u'health', u'assessment'), 5.914914042539545),
 ((u'dynamics', u'simulations'), 5.914914042539545),
 ((u'transduction', u'pathways'), 5.919359947570735),
 ((u'linkage', u'analysis'), 5.937281855567997),
 ((u'small', u'number'), 5.937281855567999),
 ((u'greater', u'understanding'), 5.937281855567999),
 ((u'diverse', u'properties'), 5.937281855567999),
 ((u'signal', u'pathways'), 5.96000193206808),
 ((u'two', u'groups'), 5.960001932068082),
 ((u'simulation', u'signal'), 5.960001932068082),
 ((u'presented', u'two'), 5.960001932068082),
 ((u'science', u'rights'), 6.014449716090457),
 ((u'functional', u'genomics'), 6.018418618293403),
 ((u'bioinformatics', u'methods'), 6.030391259959479),
 ((u'cell', u'cycle'), 6.03039125995948),
 ((u'groups', u'time'), 6.042464092260055),
 ((u'large', u'number'), 6.062812737651856),
 ((u'genome', u'scale'), 6.066917135984593),
 ((u'silico', u'integrated'), 6.066917135984594),
 ((u'reconstructed', u'metabolic'), 6.066917135984594),
 ((u'metabolic', u'eukaryotic'), 6.066917135984594),
 ((u'biodegradation', u'network'), 6.066917135984594),
 ((u'institute', u'environmental'), 6.079300860440427),
 ((u'environmental', u'factors'), 6.079300860440427),
 ((u'environmental', u'agents'), 6.079300860440427),
 ((u'high', u'quality'), 6.0917918046236235),
 ((u'cardiovascular', u'function'), 6.104391841403257),
 ((u'coli', u'metabolism'), 6.112853420151451),
 ((u'functional', u'units'), 6.155922142043338),
 ((u'dna', u'technology'), 6.155922142043338),
 ((u'proteomic', u'patterns'), 6.155922142043339),
 ((u'modeling', u'process'), 6.18239435340453),
 ((u'based', u'upon'), 6.18239435340453),
 ((u'heterogeneous', u'data'), 6.2093614010047995),
 ((u'data', u'still'), 6.2093614010048),
 ((u'analyzing', u'data'), 6.2093614010048),
 ((u'cell', u'death'), 6.214815831096905),
 ((u'whole', u'studies'), 6.223036337901877),
 ((u'technology', u'powerful'), 6.223036337901877),
 ((u'genomic', u'era'), 6.223036337901877),
 ((u'metabolic', u'fingerprinting'), 6.236842137426905),
 ((u'used', u'generate'), 6.236842137426907),
 ((u'institute', u'health'), 6.236842137426907),
 ((u'central', u'metabolism'), 6.264856513596501),
 ((u'time', u'dose'), 6.264856513596504),
 ((u'death', u'regulatory'), 6.276216375761049),
 ((u'protein', u'complexes'), 6.293425665793273),
 ((u'major', u'problem'), 6.293425665793273),
 ((u'statistical', u'methods'), 6.293425665793274),
 ((u'methods', u'analyze'), 6.293425665793274),
 ((u'experimental', u'thus'), 6.293425665793274),
 ((u'experimental', u'observations'), 6.293425665793274),
 ((u'consistent', u'experimental'), 6.293425665793274),
 ((u'cellular', u'tissue'), 6.293425665793274),
 ((u'cellular', u'tensegrity'), 6.293425665793274),
 ((u'blue', u'light'), 6.293425665793274),
 ((u'carbon', u'light'), 6.293425665793275),
 ((u'study', u'circadian'), 6.30792523548839),
 ((u'human', u'populations'), 6.30792523548839),
 ((u'population', u'studies'), 6.322572011452789),
 ((u'modelling', u'signal'), 6.32257201145279),
 ((u'growth', u'state'), 6.32257201145279),
 ((u'biochemical', u'reactions'), 6.329951541818388),
 ((u'virtual', u'cell'), 6.35231935484684),
 ((u'model', u'checking'), 6.35231935484684),
 ((u'mediate', u'cell'), 6.35231935484684),
 ((u'symbolic', u'model'), 6.352319354846843),
 ((u'response', u'times'), 6.352319354846843),
 ((u'neural', u'networks'), 6.352319354846843),
 ((u'model', u'checker'), 6.352319354846843),
 ((u'crude', u'cell'), 6.352319354846843),
 ((u'cell', u'machinery'), 6.352319354846843),
 ((u'cell', u'lysates'), 6.352319354846843),
 ((u'benefits', u'cell'), 6.352319354846843),
 ((u'dna', u'arrays'), 6.362373019510764),
 ((u'provide', u'means'), 6.367426247237051),
 ((u'subset', u'genes'), 6.413719899510986),
 ((u'genetic', u'variation'), 6.413719899510986),
 ((u'collections', u'genes'), 6.413719899510986),
 ((u'understand', u'mechanisms'), 6.429487215369303),
 ((u'chemical', u'effects'), 6.429487215369303),
 ((u'systemic', u'biochemical'), 6.445428759238324),
 ((u'red', u'carbon'), 6.445428759238324),
 ((u'recent', u'advances'), 6.445428759238324),
 ((u'living', u'cells'), 6.445428759238324),
 ((u'carbon', u'red'), 6.445428759238324),
 ((u'comparative', u'genomics'), 6.4778502369307),
 ((u'factors', u'including'), 6.477850236930701),
 ((u'sources', u'information'), 6.49433835971927),
 ((u'elsevier', u'science'), 6.544964432789237),
 ((u'transcripts', u'proteins'), 6.544964432789239),
 ((u'transcription', u'cascades'), 6.544964432789239),
 ((u'omic', u'approaches'), 6.544964432789239),
 ((u'linked', u'diseases'), 6.544964432789239),
 ((u'linkage', u'studies'), 6.544964432789239),
 ((u'groups', u'single'), 6.544964432789239),
 ((u'functions', u'first'), 6.544964432789239),
 ((u'diet', u'diseases'), 6.544964432789239),
 ((u'animal', u'studies'), 6.544964432789239),
 ((u'genomic', u'proteomic'), 6.570959641322184),
 ((u'multicellular', u'silico'), 6.597431852683374),
 ((u'genome', u'project'), 6.597431852683374),
 ((u'genome', u'sequences'), 6.597431852683375),
 ((u'constraint', u'based'), 6.597431852683375),
 ((u'sciences', u'elsevier'), 6.615353760680635),
 ((u'food', u'plants'), 6.615353760680635),
 ((u'antioxidant', u'response'), 6.615353760680635),
 ((u'cv', u'properties'), 6.615353760680637),
 ((u'approach', u'characterizing'), 6.615353760680637),
 ((u'systemic', u'reactions'), 6.65187963670575),
 ((u'specific', u'profiles'), 6.65187963670575),
 ((u'signaling', u'molecules'), 6.65187963670575),
 ((u'higher', u'levels'), 6.65187963670575),
 ((u'health', u'risks'), 6.65187963670575),
 ((u'decisions', u'health'), 6.65187963670575),
 ((u'tools', u'biologists'), 6.670495314873097),
 ((u'many', u'aspects'), 6.689354342124413),
 ((u'organisms', u'humans'), 6.708463165072119),
 ((u'mammalian', u'cells'), 6.708463165072119),
 ((u'green', u'protein'), 6.708463165072119),
 ((u'design', u'parameter'), 6.708463165072119),
 ((u'linked', u'pathway'), 6.71488943423155),
 ((u'complex', u'interacting'), 6.737609510731635),
 ((u'public', u'databases'), 6.7408846427644935),
 ((u'sequencing', u'genome'), 6.767356854125684),
 ((u'red', u'light'), 6.767356854125684),
 ((u'microarray', u'technology'), 6.767356854125684),
 ((u'sciences', u'national'), 6.767356854125687),
 ((u'related', u'diseases'), 6.767356854125687),
 ((u'reductionist', u'functions'), 6.767356854125687),
 ((u'post', u'research'), 6.767356854125687),
 ((u'dynamic', u'simulation'), 6.767356854125687),
 ((u'dose', u'parameters'), 6.767356854125687),
 ((u'comparison', u'two'), 6.767356854125687),
 ((u'large', u'scale'), 6.77741051878961),
 ((u'various', u'species'), 6.807998838623032),
 ((u'transcription', u'factor'), 6.807998838623032),
 ((u'signal', u'mammalian'), 6.807998838623032),
 ((u'mechanisms', u'toxicity'), 6.807998838623032),
 ((u'growth', u'receptor'), 6.807998838623032),
 ((u'emerging', u'field'), 6.807998838623032),
 ((u'environmental', u'exposures'), 6.81626645460663),
 ((u'high', u'platforms'), 6.828757398789829),
 ((u'dose', u'time'), 6.849819014317658),
 ((u'multicellular', u'organisms'), 6.860466258517167),
 ((u'induced', u'death'), 6.892887736209544),
 ((u'sequencing', u'human'), 6.892887736209546),
 ((u'phenolic', u'metabolites'), 6.892887736209546),
 ((u'identify', u'characterize'), 6.892887736209546),
 ((u'human', u'susceptibility'), 6.892887736209546),
 ((u'functional', u'dependencies'), 6.892887736209546),
 ((u'dna', u'microarray'), 6.892887736209546),
 ((u'elsevier', u'rights'), 6.914914042539542),
 ((u'demonstrate', u'potential'), 6.914914042539545),
 ((u'analyze', u'dynamics'), 6.914914042539545),
 ((u'one', u'computer'), 6.937281855567999),
 ((u'new', u'discipline'), 6.937281855567999),
 ((u'allow', u'small'), 6.937281855567999),
 ((u'established', u'approaches'), 6.960001932068082),
 ((u'high', u'resolution'), 6.99868240023214),
 ((u'sequences', u'organisms'), 7.030391259959479),
 ((u'transduction', u'mammalian'), 7.03039125995948),
 ((u'phenolic', u'antioxidant'), 7.03039125995948),
 ((u'minimal', u'organisms'), 7.03039125995948),
 ((u'higher', u'order'), 7.03039125995948),
 ((u'far', u'light'), 7.03039125995948),
 ((u'diabetes', u'disease'), 7.03039125995948),
 ((u'conventional', u'parameters'), 7.03039125995948),
 ((u'contrast', u'carbon'), 7.03039125995948),
 ((u'biochemical', u'conversions'), 7.03039125995948),
 ((u'attenuated', u'light'), 7.03039125995948),
 ((u'environmental', u'pollutants'), 7.079300860440425),
 ((u'arabinose', u'system'), 7.079300860440427),
 ((u'together', u'provide'), 7.104391841403257),
 ((u'developmental', u'transcription'), 7.129926933510393),
 ((u'work', u'presented'), 7.1299269335103945),
 ((u'ultimately', u'allow'), 7.1299269335103945),
 ((u'sbml', u'level'), 7.1299269335103945),
 ((u'phytochemicals', u'food'), 7.1299269335103945),
 ((u'efforts', u'toward'), 7.1299269335103945),
 ((u'anaerobic', u'growth'), 7.1299269335103945),
 ((u'region', u'genome'), 7.18239435340453),
 ((u'proteomics', u'metabolomics'), 7.18239435340453),
 ((u'products', u'particular'), 7.18239435340453),
 ((u'pathways', u'theme'), 7.18239435340453),
 ((u'nutrition', u'research'), 7.18239435340453),
 ((u'modulating', u'pathways'), 7.18239435340453),
 ((u'availability', u'genome'), 7.18239435340453),
 ((u'light', u'induction'), 7.200316261401792),
 ((u'may', u'lead'), 7.2368421374269065),
 ((u'intracellular', u'signaling'), 7.2368421374269065),
 ((u'integrated', u'homeostasis'), 7.2368421374269065),
 ((u'functionality', u'levels'), 7.2368421374269065),
 ((u'protein', u'reporters'), 7.293425665793275),
 ((u'fluorescent', u'protein'), 7.293425665793275),
 ((u'presented', u'first'), 7.322572011452791),
 ((u'dietary', u'nutritional'), 7.322572011452791),
 ((u'shock', u'response'), 7.35231935484684),
 ((u'heat', u'response'), 7.35231935484684),
 ((u'single', u'constraints'), 7.352319354846842),
 ((u'reductionist', u'framework'), 7.352319354846842),
 ((u'produce', u'phenolic'), 7.352319354846842),
 ((u'novel', u'tool'), 7.352319354846842),
 ((u'map', u'cardiovascular'), 7.352319354846842),
 ((u'kinetic', u'parameters'), 7.352319354846842),
 ((u'integration', u'heterogeneous'), 7.352319354846842),
 ((u'female', u'rats'), 7.352319354846842),
 ((u'currently', u'available'), 7.352319354846842),
 ((u'cell', u'communications'), 7.352319354846842),
 ((u'phenolic', u'food'), 7.352319354846843),
 ((u'gene', u'group'), 7.3826930038903615),
 ((u'sensory', u'developmental'), 7.392961339344186),
 ((u'growth', u'factor'), 7.392961339344186),
 ((u'high', u'throughput'), 7.413719899510983),
 ((u'regulatory', u'machinery'), 7.413719899510986),
 ((u'equally', u'important'), 7.413719899510986),
 ((u'major', u'regulator'), 7.445428759238324),
 ((u'irradiation', u'death'), 7.4778502369307),
 ((u'produce', u'metabolites'), 7.477850236930702),
 ((u'omic', u'technologies'), 7.477850236930702),
 ((u'interactions', u'sugar'), 7.477850236930702),
 ((u'national', u'toxicogenomics'), 7.544964432789237),
 ((u'traditional', u'approaches'), 7.5449644327892385),
 ((u'metabolite', u'profiling'), 7.5449644327892385),
 ((u'long', u'cascades'), 7.5449644327892385),
 ((u'knowledge', u'base'), 7.5449644327892385),
 ((u'global', u'biodegradation'), 7.5449644327892385),
 ((u'science', u'ltd'), 7.544964432789239),
 ((u'post', u'genomic'), 7.615353760680635),
 ((u'grade', u'plants'), 7.615353760680635),
 ((u'cells', u'organs'), 7.615353760680635),
 ((u'relatively', u'part'), 7.6153537606806365),
 ((u'practice', u'toxicology'), 7.6153537606806365),
 ((u'patterns', u'ic'), 7.6153537606806365),
 ((u'deduction', u'cellular'), 7.6153537606806365),
 ((u'concentration', u'rate'), 7.6153537606806365),
 ((u'phosphate', u'pathway'), 7.615353760680637),
 ((u'light', u'grown'), 7.615353760680637),
 ((u'potential', u'risks'), 7.65187963670575),
 ((u'computation', u'integrated'), 7.65187963670575),
 ((u'cdna', u'arrays'), 7.65187963670575),
 ((u'adverse', u'effects'), 7.65187963670575),
 ((u'including', u'humans'), 7.7408846427644935),
 ((u'red', u'far'), 7.767356854125684),
 ((u'published', u'elsevier'), 7.767356854125684),
 ((u'drugs', u'chemicals'), 7.767356854125684),
 ((u'two', u'hybrid'), 7.767356854125687),
 ((u'single', u'polymorphisms'), 7.767356854125687),
 ((u'revised', u'modeling'), 7.767356854125687),
 ((u'red', u'induction'), 7.767356854125687),
 ((u'national', u'institute'), 7.767356854125687),
 ((u'makes', u'possible'), 7.767356854125687),
 ((u'computation', u'functions'), 7.767356854125687),
 ((u'chemicals', u'agents'), 7.767356854125687),
 ((u'attempts', u'central'), 7.767356854125687),
 ((u'assessments', u'chemicals'), 7.767356854125687),
 ((u'drug', u'discovery'), 7.837746182017084),
 ((u'purine', u'metabolism'), 7.849819014317659),
 ((u'carbon', u'blue'), 7.878388166514429),
 ((u'sensory', u'transcription'), 7.907534512173944),
 ((u'sciences', u'published'), 7.9372818555679965),
 ((u'self', u'organization'), 7.937281855567999),
 ((u'pentose', u'pathway'), 7.937281855567999),
 ((u'male', u'female'), 7.937281855567999),
 ((u'institute', u'sciences'), 7.937281855567999),
 ((u'current', u'status'), 7.937281855567999),
 ((u'analytical', u'framework'), 7.937281855567999),
 ((u'temporal', u'logic'), 8.03039125995948),
 ((u'recently', u'become'), 8.03039125995948),
 ((u'qtl', u'traits'), 8.03039125995948),
 ((u'grown', u'plants'), 8.03039125995948),
 ((u'elsevier', u'sas'), 8.03039125995948),
 ((u'elsevier', u'ltd'), 8.03039125995948),
 ((u'effectively', u'drug'), 8.03039125995948),
 ((u'design', u'reuse'), 8.03039125995948),
 ((u'carbon', u'attenuated'), 8.03039125995948),
 ((u'blue', u'red'), 8.03039125995948),
 ((u'signal', u'transduction'), 8.129926933510394),
 ((u'phenolic', u'phytochemicals'), 8.129926933510394),
 ((u'epidermal', u'growth'), 8.129926933510394),
 ((u'diet', u'related'), 8.129926933510394),
 ((u'bp', u'qtl'), 8.129926933510394),
 ((u'toward', u'greater'), 8.129926933510395),
 ((u'recombinant', u'proteins'), 8.129926933510395),
 ((u'phytochemicals', u'grade'), 8.129926933510395),
 ((u'nmr', u'fingerprint'), 8.129926933510395),
 ((u'exponential', u'growth'), 8.129926933510395),
 ((u'end', u'point'), 8.129926933510395),
 ((u'effect', u'dietary'), 8.129926933510395),
 ((u'comparison', u'groups'), 8.129926933510395),
 ((u'anaerobic', u'state'), 8.129926933510395),
 ((u'renal', u'nacl'), 8.18239435340453),
 ((u'proteomics', u'metabonomics'), 8.18239435340453),
 ((u'define', u'relationship'), 8.18239435340453),
 ((u'chemicals', u'stressors'), 8.18239435340453),
 ((u'bioinformatics', u'software'), 8.18239435340453),
 ((u'risk', u'assessments'), 8.200316261401792),
 ((u'sas', u'rights'), 8.236842137426905),
 ((u'sas', u'reserved'), 8.236842137426905),
 ((u'rights', u'reserved'), 8.236842137426905),
 ((u'ltd', u'rights'), 8.236842137426905),
 ((u'ltd', u'reserved'), 8.236842137426905),
 ((u'oxygen', u'levels'), 8.236842137426907),
 ((u'may', u'determinants'), 8.236842137426907),
 ((u'brownian', u'dynamics'), 8.236842137426907),
 ((u'risk', u'assessment'), 8.293425665793276),
 ((u'factor', u'receptor'), 8.293425665793276),
 ((u'temporal', u'resolution'), 8.352319354846841),
 ((u'small', u'molecule'), 8.352319354846841),
 ((u'single', u'nucleotide'), 8.352319354846841),
 ((u'sensitive', u'delay'), 8.352319354846841),
 ((u'revised', u'process'), 8.352319354846841),
 ((u'posttranslational', u'phosphorylation'), 8.352319354846841),
 ((u'physiological', u'evolutionary'), 8.352319354846841),
 ((u'peroxidase', u'reaction'), 8.352319354846841),
 ((u'oxidation', u'diseases'), 8.352319354846841),
 ((u'organization', u'corresponding'), 8.352319354846841),
 ((u'minimal', u'multicellular'), 8.352319354846841),
 ((u'increase', u'temperature'), 8.352319354846841),
 ((u'functionality', u'higher'), 8.352319354846841),
 ((u'computed', u'consequences'), 8.352319354846841),
 ((u'analytical', u'reductionist'), 8.352319354846841),
 ((u'academic', u'sciences'), 8.352319354846841),
 ((u'much', u'effort'), 8.392961339344186),
 ((u'linked', u'phosphate'), 8.392961339344186),
 ((u'differentially', u'expressed'), 8.392961339344186),
 ((u'reversible', u'responses'), 8.477850236930703),
 ((u'proteomic', u'metabonomic'), 8.477850236930703),
 ((u'death', u'machinery'), 8.477850236930703),
 ((u'irradiation', u'induced'), 8.50432244829189),
 ((u'far', u'red'), 8.50432244829189),
 ((u'yet', u'rapid'), 8.54496443278924),
 ((u'starting', u'point'), 8.54496443278924),
 ((u'sensitivity', u'coefficients'), 8.54496443278924),
 ((u'manage', u'linked'), 8.54496443278924),
 ((u'era', u'brought'), 8.54496443278924),
 ((u'best', u'toward'), 8.54496443278924),
 ((u'rate', u'constants'), 8.615353760680634),
 ((u'escherichia', u'coli'), 8.615353760680634),
 ((u'transcriptome', u'proteome'), 8.615353760680637),
 ((u'sub', u'cellular'), 8.615353760680637),
 ((u'step', u'stimuli'), 8.615353760680637),
 ((u'spectral', u'patterns'), 8.615353760680637),
 ((u'public', u'domain'), 8.615353760680637),
 ((u'optimization', u'problem'), 8.615353760680637),
 ((u'modifications', u'phosphorylation'), 8.615353760680637),
 ((u'metabonomic', u'patterns'), 8.615353760680637),
 ((u'like', u'stimuli'), 8.615353760680637),
 ((u'integrating', u'heterogeneous'), 8.615353760680637),
 ((u'help', u'explain'), 8.615353760680637),
 ((u'criterion', u'problem'), 8.615353760680637),
 ((u'carbon', u'potentiated'), 8.615353760680637),
 ((u'brownian', u'simulations'), 8.615353760680637),
 ((u'affecting', u'public'), 8.615353760680637),
 ((u'proline', u'linked'), 8.71488943423155),
 ((u'postgenomic', u'era'), 8.71488943423155),
 ((u'linked', u'pentose'), 8.71488943423155),
 ((u'national', u'center'), 8.767356854125683),
 ((u'mediate', u'induced'), 8.767356854125683),
 ((u'transcriptomics', u'proteomics'), 8.767356854125687),
 ((u'transcriptional', u'posttranscriptional'), 8.767356854125687),
 ((u'throughput', u'platforms'), 8.767356854125687),
 ((u'reference', u'database'), 8.767356854125687),
 ((u'past', u'years'), 8.767356854125687),
 ((u'next', u'years'), 8.767356854125687),
 ((u'multicellular', u'pharmacodynamics'), 8.767356854125687),
 ((u'mice', u'treated'), 8.767356854125687),
 ((u'inhibits', u'renal'), 8.767356854125687),
 ((u'common', u'polymorphisms'), 8.767356854125687),
 ((u'activated', u'irradiation'), 8.767356854125687),
 ((u'academic', u'published'), 8.767356854125687),
 ((u'play', u'role'), 8.937281855567996),
 ((u'des', u'sciences'), 8.937281855567996),
 ((u'broad', u'range'), 8.937281855567996),
 ((u'step', u'like'), 8.937281855568),
 ((u'oxidase', u'reaction'), 8.937281855568),
 ((u'organization', u'presumably'), 8.937281855568),
 ((u'meet', u'needs'), 8.937281855568),
 ((u'ic', u'affected'), 8.937281855568),
 ((u'food', u'grade'), 8.937281855568),
 ((u'female', u'progeny'), 8.937281855568),
 ((u'draft', u'map'), 8.937281855568),
 ((u'association', u'superoxide'), 8.937281855568),
 ((u'affected', u'affected'), 8.937281855568),
 ((u'subsets', u'mrnas'), 9.03039125995948),
 ((u'parameter', u'estimation'), 9.03039125995948),
 ((u'ncct', u'inhibition'), 9.03039125995948),
 ((u'motifs', u'amino'), 9.03039125995948),
 ((u'epidermal', u'receptor'), 9.03039125995948),
 ((u'blood', u'pressure'), 9.03039125995948),
 ((u'attenuated', u'blue'), 9.03039125995948),
 ((u'mass', u'spectrometry'), 9.129926933510394),
 ((u'correlated', u'bp'), 9.129926933510394),
 ((u'center', u'toxicogenomics'), 9.129926933510394),
 ((u'steady', u'state'), 9.129926933510395),
 ((u'rapid', u'reversible'), 9.129926933510395),
 ((u'phytochemicals', u'targeted'), 9.129926933510395),
 ((u'oxidation', u'linked'), 9.129926933510395),
 ((u'long', u'term'), 9.129926933510395),
 ((u'bp', u'seven'), 9.129926933510395),
 ((u'feedforward', u'loop'), 9.293425665793276),
 ((u'published', u'sas'), 9.352319354846841),
 ((u'posttranslational', u'modifications'), 9.352319354846841),
 ((u'mediate', u'irradiation'), 9.352319354846841),
 ((u'des', u'published'), 9.352319354846841),
 ((u'transcript', u'metabolite'), 9.352319354846843),
 ((u'sign', u'delay'), 9.352319354846843),
 ((u'positive', u'feedback'), 9.352319354846843),
 ((u'phosphorylation', u'sites'), 9.352319354846843),
 ((u'mutant', u'mice'), 9.352319354846843),
 ((u'multiprotein', u'complexes'), 9.352319354846843),
 ((u'male', u'pigs'), 9.352319354846843),
 ((u'male', u'guinea'), 9.352319354846843),
 ((u'made', u'past'), 9.352319354846843),
 ((u'eukaryotic', u'cerevisiae'), 9.352319354846843),
 ((u'dependent', u'concentration'), 9.352319354846843),
 ((u'consomic', u'rats'), 9.352319354846843),
 ((u'availability', u'sequences'), 9.352319354846843),
 ((u'aerobic', u'anaerobic'), 9.352319354846843),
 ((u'proline', u'pentose'), 9.522244356289153),
 ((u'heat', u'shock'), 9.615353760680634),
 ((u'epidermal', u'factor'), 9.615353760680634),
 ((u'white', u'blue'), 9.615353760680637),
 ((u'relieve', u'inhibition'), 9.615353760680637),
 ((u'placed', u'context'), 9.615353760680637),
 ((u'pentose', u'phosphate'), 9.615353760680637),
 ((u'modulating', u'antioxidant'), 9.615353760680637),
 ((u'extracting', u'integrating'), 9.615353760680637),
 ((u'cv', u'ill'), 9.615353760680637),
 ((u'consortium', u'paris'), 9.615353760680637),
 ((u'coherent', u'loop'), 9.615353760680637),
 ((u'coherent', u'feedforward'), 9.615353760680637),
 ((u'cerebral', u'ischemia'), 9.615353760680637),
 ((u'arterial', u'pressure'), 9.615353760680637),
 ((u'strong', u'correlations'), 9.767356854125687),
 ((u'mo', u'treated'), 9.767356854125687),
 ((u'mo', u'age'), 9.767356854125687),
 ((u'making', u'decisions'), 9.767356854125687),
 ((u'age', u'treated'), 9.767356854125687),
 ((u'activated', u'mediate'), 9.767356854125687),
 ((u'academic', u'des'), 9.767356854125687),
 ((u'urine', u'samples'), 9.937281855568),
 ((u'spectral', u'ic'), 9.937281855568),
 ((u'sign', u'sensitive'), 9.937281855568),
 ((u'saccharomyces', u'cerevisiae'), 9.937281855568),
 ((u'mapped', u'region'), 9.937281855568),
 ((u'intermediate', u'mapped'), 9.937281855568),
 ((u'experimentally', u'testable'), 9.937281855568),
 ((u'elementary', u'steps'), 9.937281855568),
 ((u'concentration', u'change'), 9.937281855568),
 ((u'guinea', u'pigs'), 10.352319354846841),
 ((u'etiolated', u'seedlings'), 10.352319354846841),
 ((u'timed', u'default'), 10.352319354846843),
 ((u'private', u'sector'), 10.352319354846843),
 ((u'presumably', u'corresponding'), 10.352319354846843),
 ((u'peroxidase', u'oxidase'), 10.352319354846843),
 ((u'osteoarthritic', u'pigs'), 10.352319354846843),
 ((u'osteoarthritic', u'guinea'), 10.352319354846843),
 ((u'nucleotide', u'polymorphisms'), 10.352319354846843),
 ((u'nuclear', u'resonance'), 10.352319354846843),
 ((u'nuclear', u'magnetic'), 10.352319354846843),
 ((u'mutations', u'relieve'), 10.352319354846843),
 ((u'manage', u'oxidation'), 10.352319354846843),
 ((u'green', u'fluorescent'), 10.352319354846843),
 ((u'amino', u'acids'), 10.352319354846843),
 ((u'unaffected', u'patients'), 10.937281855568),
 ((u'sea', u'urchin'), 10.937281855568),
 ((u'prove', u'valuable'), 10.937281855568),
 ((u'positional', u'cloning'), 10.937281855568),
 ((u'naturally', u'occurring'), 10.937281855568),
 ((u'monte', u'carlo'), 10.937281855568),
 ((u'magnetic', u'resonance'), 10.937281855568),
 ((u'ischemic', u'insult'), 10.937281855568),
 ((u'et', u'al'), 10.937281855568),
 ((u'enzyme', u'substrate'), 10.937281855568),
 ((u'drosophila', u'melanogaster'), 10.937281855568),
 ((u'crude', u'lysates'), 10.937281855568),
 ((u'criterion', u'optimization'), 10.937281855568),
 ((u'clean', u'abstractions'), 10.937281855568),
 ((u'biologic', u'agents'), 10.937281855568),
 ((u'thematic', u'issue'), 11.352319354846843),
 ((u'issue', u'comptes'), 11.352319354846843)]

In [630]:
plt.hist(zip(*bigrams_by_year[0][1])[1], bins=np.arange(-2, 12, 0.35))
plt.show()



In [606]:
graphs[0][1].size(), graphs[0][1].order()


Out[606]:
(7256, 2300)

In [644]:
graphs = []
centralities = []
for year, bigrams in bigrams_by_year:
    # Each one-year selection of the corpus is represented
    #  with a separate undirected graph.
    graph = nx.Graph()
    for gram, score in bigrams:
        # The threshold here is somewhat arbitrary.
        if score > 0. and gram[0] != gram[1]:   # Prevent self-loops.
            graph.add_edge(*gram, weight=score)
            
    centrality = nx.closeness_centrality(graph, normalized=True)#, weight='weight')

    graphs.append((year, graph))
    centralities.append((year, centrality))    
    
    print '\r', year,


2013

In [666]:
systems_years = []
systems_values = []
for i, row in df[df.model == 'systems-oriented'].iterrows():
    these_years = []
    these_values = []
    for year, centrality in centralities:

        try:
            these_values.append(centrality[row.term])#, 0.0))
            these_years.append(year)            
        except KeyError:
            pass

    try:        
        if np.array(these_values).max() > 0:      
            systems_years.append(np.array(these_years))
            systems_values.append(np.array(these_values))    
    except ValueError:
        pass
    
neutral_years = []
neutral_values = []
for i, row in df[df.model == 'neutral'].iterrows():
    these_years = []
    these_values = []
    for year, centrality in centralities:

        try:
            these_values.append(centrality[row.term])#, 0.0))
            these_years.append(year)            
        except KeyError:
            pass

    try:        
        if np.array(these_values).max() > 0:         
            neutral_years.append(np.array(these_years))
            neutral_values.append(np.array(these_values)) 
    except ValueError:
        pass
    
biology_years = []
biology_values = []
for i, row in df[df.model == 'biology-oriented'].iterrows():
    these_years = []
    these_values = []
    for year, centrality in centralities:

        try:
            these_values.append(centrality[row.term])#, 0.0))
            these_years.append(year)
        except KeyError:
            pass
    try:        
        if np.array(these_values).max() > 0:    
            biology_years.append(np.array(these_years))
            biology_values.append(np.array(these_values)) 
    except ValueError:
        pass

In [668]:
plt.scatter(biology_years[2], biology_values[2])


Out[668]:
<matplotlib.collections.PathCollection at 0x154f16a10>

In [329]:
import pymc

In [346]:
systems_values[5].shape


Out[346]:
(11,)

In [527]:
def make_model(systems_data, neutral_data, biology_data):
    systems_X, systems_Y = systems_data    
    neutral_X, neutral_Y = neutral_data
    biology_X, biology_Y = biology_data

    beta_systems = pymc.Uniform('beta_systems', -10., 10., value=0.01)
    beta_neutral = pymc.Uniform('beta_neutral', -10., 10., value=0.01)
    beta_biology = pymc.Uniform('beta_biology', -10., 10., value=0.01)

    beta0_systems = pymc.Uniform('beta0_systems', -15., 25., value=1.)
    beta0_neutral = pymc.Uniform('beta0_neutral', -15., 15., value=0.01)
    beta0_biology = pymc.Uniform('beta0_biology', -15., 15., value=0.01)
    
    response_systems = pymc.Container([pymc.Poisson('response_systems_%i' % i, 
                                        mu=beta_systems*(systems_X[i]-systems_X[i].min()+1.) + beta0_systems, 
                                        value=1000.*systems_Y[i], observed=True)
                        for i in xrange(len(systems_X))])
    response_neutral = pymc.Container([pymc.Poisson('response_neutral_%i' % i, 
                                        mu=beta_neutral*(neutral_X[i]-neutral_X[i].min()+1.) + beta0_neutral, 
                                        value=1000.*neutral_Y[i], observed=True)
                        for i in xrange(len(neutral_X))])
    response_biology = pymc.Container([pymc.Poisson('response_biology_%i' % i, 
                                        mu=beta_biology*(biology_X[i]-biology_X[i].min()+1.) + beta0_biology, 
                                        value=1000.*biology_Y[i], observed=True)
                        for i in xrange(len(biology_X))])    
    
    return locals()

In [ ]:
def make_model(systems_data, neutral_data, biology_data):
    systems_X, systems_Y = systems_data    
    neutral_X, neutral_Y = neutral_data
    biology_X, biology_Y = biology_data

    beta_systems = pymc.Normal('beta_systems', -10., 10., value=0.01)
    beta_neutral = pymc.Normal('beta_neutral', -10., 10., value=0.01)
    beta_biology = pymc.Normal('beta_biology', -10., 10., value=0.01)

    beta0_systems = pymc.Normal('beta0_systems', -15., 25., value=1.)
    beta0_neutral = pymc.Normal('beta0_neutral', -15., 15., value=0.01)
    beta0_biology = pymc.Normal('beta0_biology', -15., 15., value=0.01)
    
    response_systems = pymc.Container([pymc.Poisson('response_systems_%i' % i, 
                                        mu=beta_systems*(systems_X[i]-systems_X[i].min()+1.) + beta0_systems, 
                                        value=1000.*systems_Y[i], observed=True)
                        for i in xrange(len(systems_X))])
    response_neutral = pymc.Container([pymc.Poisson('response_neutral_%i' % i, 
                                        mu=beta_neutral*(neutral_X[i]-neutral_X[i].min()+1.) + beta0_neutral, 
                                        value=1000.*neutral_Y[i], observed=True)
                        for i in xrange(len(neutral_X))])
    response_biology = pymc.Container([pymc.Normal('response_biology_%i' % i, 
                                        mu=beta_biology*(biology_X[i]-biology_X[i].min()+1.) + beta0_biology, 
                                        value=1000.*biology_Y[i], observed=True)
                        for i in xrange(len(biology_X))])    
    
    return locals()

In [669]:
M = pymc.MCMC(make_model((systems_years, systems_values), 
                         (neutral_years, neutral_values), 
                         (biology_years, biology_values)), 
              db='pickle',
              dbname='systemsbiology2.pickle')

In [670]:
M.sample(10000, burn=3000, thin=50)


 [-----------------100%-----------------] 10000 of 10000 complete in 63.7 sec

In [538]:
M.db.close()

In [671]:
systems_beta = M.beta_systems.trace()[:].mean()
systems_beta0 = M.beta0_systems.trace()[:].mean()

neutral_beta = M.beta_neutral.trace()[:].mean()
neutral_beta0 = M.beta0_neutral.trace()[:].mean()

biology_beta = M.beta_biology.trace()[:].mean()
biology_beta0 = M.beta0_biology.trace()[:].mean()

In [672]:
systems_model = lambda x: (systems_beta*(x-x.min()+1.) + sytems_beta0)/1000.
neutral_model = lambda x: (neutral_beta*(x-x.min()+1.) + neutral_beta0)/1000.
biology_model = lambda x: (biology_beta*(x-x.min()+1.) + biology_beta0)/1000.

In [675]:
plt.figure(figsize=(15, 5))
plt.subplot(131)
[plt.scatter(systems_years[i], systems_values[i]) for i in xrange(len(systems_values)) if systems_values[i].max() > 0]
plt.plot(systems_years[0], systems_model(systems_years[0]), lw=2)
# plt.yscale('log')

plt.subplot(132)
[plt.scatter(neutral_years[i], neutral_values[i]) for i in xrange(len(neutral_values)) if neutral_values[i].max() > 0]
plt.plot(neutral_years[0], systems_model(neutral_years[0]), lw=2)
# plt.yscale('log')

plt.subplot(133)
[plt.scatter(biology_years[i], biology_values[i]) for i in xrange(len(biology_values)) if biology_values[i].max() > 0]
plt.plot(biology_years[0], biology_model(biology_years[0]), lw=2)
# plt.yscale('log')

plt.tight_layout()
plt.show()



In [676]:
pymc.Matplot.plot(M)


Plotting beta_neutral
Plotting beta_biology
Plotting beta0_neutral
Plotting beta0_systems
Plotting beta_systems
Plotting beta0_biology

Sentences

Documents


In [35]:
df


Out[35]:
model term
0 neutral production
1 biology-oriented oxidative
2 biology-oriented therapy
3 biology-oriented ms
4 biology-oriented infection
5 biology-oriented microscopy
6 biology-oriented vaccine
7 neutral correlation
8 biology-oriented synthetic
9 biology-oriented omics
10 neutral personalized
11 systems-oriented design
12 neutral platform
13 neutral inference
14 neutral progression
15 biology-oriented epigenetic
16 biology-oriented treatment
17 systems-oriented boolean
18 biology-oriented fluorescence
19 biology-oriented evolutionary
20 systems-oriented bayesian
21 systems-oriented linear
22 systems-oriented optimization
23 biology-oriented lc
24 systems-oriented engineering
25 neutral heterogeneity
26 neutral dependent
27 biology-oriented clinical
28 biology-oriented phenotype
29 systems-oriented robust
... ... ...
270 biology-oriented behavior
271 biology-oriented organism
272 biology-oriented sbml
273 neutral paradigm
274 biology-oriented substrate
275 neutral function
276 biology-oriented biological
277 biology-oriented toxicology
278 neutral detection
279 biology-oriented response
280 biology-oriented physiological
281 systems-oriented systems
282 biology-oriented toxicity
283 neutral functional
284 systems-oriented modeling
285 biology-oriented kinetics
286 biology-oriented proteomic
287 biology-oriented regulatory
288 neutral diverse
289 biology-oriented genomic
290 biology-oriented proteomics
291 biology-oriented metabonomics
292 systems-oriented information
293 neutral emerging
294 biology-oriented transduction
295 biology-oriented genomics
296 systems-oriented signal
297 biology-oriented microarray
298 biology-oriented escherichia
299 biology-oriented coli

300 rows × 2 columns


In [ ]: