Figures from book chapter

Two from earlier investigation



In [1]:

    
import os
import pandas as pd
from corpkit import *
%matplotlib inline
#r = load_all_results()



In [2]:

    
first_fig = pd.read_csv('1900-2000-comparison.csv', index_col = 0)
first_fig









    Out[2]:






  
    
      
      1900
      2000
    
    
      
      
      
    
  
  
    
      economics
      17.7
      22.0
    
    
      politics
      21.1
      20.8
    
    
      health
      1.0
      13.4
    
    
      medicine
      0.0
      3.2
    
    
      sport
      11.5
      7.8
    
    
      arts
      3.8
      5.9
    
    
      legal
      1.9
      4.4
    
    
      security
      0.5
      3.2
    
    
      education
      0.0
      2.4
    
    
      war
      11.5
      2.7
    
    
      industrial relations/work
      1.4
      3.7
    
    
      crime
      2.4
      2.2
    
    
      media
      1.0
      2.0
    
    
      transport/infrastructure
      3.8
      1.2
    
    
      military
      4.3
      1.5
    
    
      social order
      3.3
      0.2
    
    
      technology
      0.0
      5.4
    
    
      environment
      0.0
      1.7
    
    
      disaster
      1.9
      0.5



In [3]:

    
#plotter('Domains of risk discourse', first_fig, kind = 'bar', num_to_plot = 'all', x_label = False,
        #black_and_white = True, figsize = (10,6), style = 'bmh', show_totals = 'plot', save = 'domains_june_final')
plotter('Domains of risk discourse', first_fig, kind = 'bar', num_to_plot = 'all', x_label = False, figsize = (10,6), style = 'bmh', show_totals = 'plot', save = 'domains_june_colour')









    



13:13:19: images/domains_june_colour.png created.



In [4]:

    
import os
import pandas as pd
second_fig = pd.read_csv('key-terms-comparison.csv', index_col = 0)
second_fig = second_fig[['danger', 'threat', 'risk', 'disaster', 'harm', 'uncertainty']]
#from corpkit import plotter
%matplotlib inline



In [5]:

    
#plotter('Risk and related words over time', second_fig, num_to_plot = 'all', 
        #black_and_white = True, figsize = (10, 6), style = 'bmh', save = 'risk_related_june_final')
plotter('Risk and related words over time', second_fig, num_to_plot = 'all', 
        figsize = (10, 6), style = 'bmh', save = 'risk_related_june_colour')









    



13:14:05: images/risk_related_june_colour.png created.



In [2]:

    
pos = editor(r['riskpos'].results, '%', r['allwords'].totals, skip_subcorpora = 1963)
plotter('Risk words by word class', pos.results , style = 'bmh', num_to_plot = 3,
        figsize = (10, 6), black_and_white = True, save = 'risk_by_class_final', 
        y_label = 'Percentage of all parsed words')









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

***Done!***
========================


20:46:08: images/risk_by_class_final.png created.






    Out[2]:





<module 'matplotlib.pyplot' from '/Library/Python/2.7/site-packages/matplotlib/pyplot.pyc'>



In [3]:

    
open_words = ['Noun', 'Verb', 'Adjective', 'Adverb']
maths_done = editor(r['riskpos'].results, '%', r['baseline'].results, sort_by = 'total', just_entries = open_words, skip_subcorpora = [1963])









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 4 entries:
    Noun
    Verb
    Adjective
    Adverb

***Done!***
========================



In [4]:

    
plotter('Percentage of open word classes that are risk words', maths_done.results, black_and_white = True,
        y_label = 'Percentage', figsize = (10, 6), style = 'bmh', num_to_plot = 3, save = 'perc_open_final')
plotter('Percentage of open word classes that are risk words', maths_done.results,
        y_label = 'Percentage', figsize = (10, 6), num_to_plot = 3, save = 'perc_open_final_colour')
plotter('Percentage of open word classes that are risk words', maths_done.results, 
        y_label = 'Percentage', kind = 'area', legend_pos = 'lower right',
        stacked = True, style = 'bmh', black_and_white = True, figsize = (10, 6), save = 'perc_open_area_final')
plotter('Percentage of open word classes that are risk words', maths_done.results, 
        y_label = 'Percentage', kind = 'area', legend_pos = 'lower right',
        stacked = True, figsize = (10, 6), save = 'perc_open_area_final_colour')









    



20:46:35: images/perc_open_final.png created.

20:46:36: images/perc_open_final_colour.png created.

20:46:37: images/perc_open_area_final.png created.

20:46:38: images/perc_open_area_final_colour.png created.






    Out[4]:





<module 'matplotlib.pyplot' from '/Library/Python/2.7/site-packages/matplotlib/pyplot.pyc'>



In [5]:

    
ppm = editor(r['risk_as_part_process_mod'].results, '%', r['risk_as_part_process_mod'].totals)
plotter('Risk as participant, process and modifier', ppm.results.drop('1963'), 
        style = 'bmh', black_and_white = True, figsize = (10, 6), save = 'ppm_final',
        y_label = 'Percentage of risk words in any experiential role')
plotter('Risk as participant, process and modifier', ppm.results.drop('1963'), 
        figsize = (10, 6), save = 'ppm_final_colour',
        y_label = 'Percentage of risk words in any experiential role')









    



***Processing results***
========================

***Done!***
========================


20:47:30: images/ppm_final.png created.

20:47:31: images/ppm_final_colour.png created.






    Out[5]:





<module 'matplotlib.pyplot' from '/Library/Python/2.7/site-packages/matplotlib/pyplot.pyc'>



In [143]:

    
govs = r['all_govs']
ppm = r['risk_as_part_process_mod']
new_govs = editor(govs.results, merge_entries = r'^(root:root|dobj:(take|run|pose))$', newname = 'Process')
new_govs = editor(new_govs.results, merge_entries = r'^(dobj|nsubj|nsubjpass|csubj|acomp|iobj|csubjpass):', newname = 'Participant')
new_govs = editor(new_govs.results, merge_entries = r'^(pobj|nn|amod|rcmod|vmod|tmod|npadvmod|advmod):', newname = 'Modifier')
new_govs = editor(new_govs.results, '%', govs.totals, sort_by = 'total', just_entries = ['Participant', 'Process', 'Modifier'])
plotter('Risk words by experiential role', new_govs.results.drop('1963'), 
        style = 'bmh', figsize = (10, 6), black_and_white = True, y_label = 'Percentage of risk words in any experiential role', save = 'ppmfinal')









    



***Processing results***
========================

Merging 4 entries as "Process":
    dobj:take
    root:root
    dobj:pose
    dobj:run

***Done!***
========================


***Processing results***
========================

Merging 4060 entries as "Participant":
    nsubj:be
    dobj:reduce
    dobj:increase
    dobj:have
    dobj:carry
    dobj:face
    dobj:raise
    dobj:minimize
    dobj:assess
    dobj:create
... and 4050 more ... 

***Done!***
========================


***Processing results***
========================

Merging 5633 entries as "Modifier":
    pobj:at
    pobj:of
    pobj:about
    pobj:with
    pobj:to
    nn:factor
    pobj:for
    pobj:on
    pobj:in
    pobj:as
... and 5623 more ... 

***Done!***
========================


***Processing results***
========================

Keeping 3 entries:
    Participant
    Process
    Modifier

***Done!***
========================


12:04:47: images/ppmfinal.png created.

using collapsed dependencies



In [ ]:

    
#coll_govs_with_pos = interrogator(corpus, 'g', r'(?i)\brisk', lemmatise = True, dep_type = 'collapsed', 
    # add_pos_to_g_d_option=True, quicksave = 'coll_govs_with_pos')
r = load_all_results()
    govs = r['collapsed_govs_with_pos']
exp_roles = [('Process', r'(?i)^(root:root|dobj:.*?:(run|take|pose)|prep_at:v[a-z]*:put|rcmod:.*|xcomp:.*)$'),
            ('Participant', r'(?i)^(xsubj|nsubj|nsubjpass|acomp|agent|appos|cop|dobj|iobj):.*$'),
            ('Modifier', r'(?i)^((advmod|vmod|amod|nn):.*$|(prep_[a-z]*:n|prep_[a-z]*:v))')]
for name, regex in exp_roles:
    govs = editor(govs.results, merge_entries = regex, newname = name)
govs = editor(govs.results, '%', govs.totals, skip_subcorpora = 1963)
plotter('Experiential role', govs.results, y_label = 'Percentage of risk words in any experiential role',
    style = 'bmh', figsize = (10, 6), black_and_white = True)



In [ ]:

    
# currently problematic
plotter('Risk as experiential subject\slash object', govs.results, style = 'bmh', 
        black_and_white = True, figsize = (10, 6))

Adjectival modifiers of risk



In [16]:

    
adjmods = editor(r['adj_modifiers'].results, '%', r['adj_modifiers'].totals, 
                 just_entries = ['high', 'calculated', 'great', 'potential'], skip_subcorpora = 1963)
plotter('Selected adjectives modifying nominal risk', adjmods.results, style = 'bmh', 
        black_and_white = True, figsize = (10, 6), save = 'sel_adjs_final', y_label = 'Percentage of all adjectival risk words')









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 4 entries:
    high
    calculated
    great
    potential

***Done!***
========================


21:24:49: images/sel_adjs_final.png created.






    Out[16]:





<module 'matplotlib.pyplot' from '/Library/Python/2.7/site-packages/matplotlib/pyplot.pyc'>

risk processes



In [62]:

    
def load_result(savename, loaddir = 'data/saved_interrogations'):
    """Reloads a save_result as namedtuple"""
    import collections
    import pickle
    import os
    import pandas
    if not savename.endswith('.p'):
        savename = savename + '.p'
    unpickled = pickle.load(open(os.path.join(loaddir, savename), 'rb'))
    
    if type(unpickled) == pandas.core.frame.DataFrame or type(unpickled) == pandas.core.series.Series:
        output = unpickled
    elif len(unpickled) == 4:
        outputnames = collections.namedtuple('interrogation', ['query', 'results', 'totals', 'table'])
        output = outputnames(unpickled[0], unpickled[1], unpickled[2], unpickled[3])        
    elif len(unpickled) == 3:
        outputnames = collections.namedtuple('interrogation', ['query', 'results', 'totals'])
        output = outputnames(unpickled[0], unpickled[1], unpickled[2])
    elif len(unpickled) == 2:
        outputnames = collections.namedtuple('interrogation', ['query', 'totals'])
        output = outputnames(unpickled[0], unpickled[1])
    return output

processes = load_result('processes')
proc_rel = editor(processes.results, '%', processes.totals, skip_subcorpora = 1963)









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

***Done!***
========================



In [116]:

    
plotter('Risk processes', proc_rel.results, style = 'bmh', legend_pos = 'center left', save = 'risk_proc_final',
        black_and_white=True, figsize = (10, 6), y_label = 'Percentage of all risk processes')









    



23:26:55: images/risk_proc_final.png created.



In [18]:

    
modifiers = r['modifiers']
mods = editor(modifiers.results, '%', modifiers.totals, skip_subcorpora = [1963])
plotter('Types of risk modifiers', mods.results, style = 'bmh', black_and_white = True, 
        figsize = (10, 6), legend_pos = 'lower right', save = 'mod_types_final')









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

***Done!***
========================


21:25:09: images/mod_types_final.png created.






    Out[18]:





<module 'matplotlib.pyplot' from '/Library/Python/2.7/site-packages/matplotlib/pyplot.pyc'>



In [203]:

    
subj_of_risk_process = r['subj_of_risk_process']
noun_lemmata = r['noun_lemmata']



In [204]:

    
rskrs = editor(subj_of_risk_process.results, '%', subj_of_risk_process.totals, 
               just_totals = True, skip_subcorpora = 1963, sort_by = 'total')









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

***Done!***
========================



In [205]:

    
plotter('Riskers, sorted by total frequency', rskrs.results, kind = 'bar', black_and_white = True, style = 'bmh', figsize = (10, 6),
       num_to_plot = 12, y_label = 'Percentage of all riskers', show_totals = 'plot', save = 'riskers_total_final')









    



14:30:03: images/riskers_total_final.png created.



In [208]:

    
entities = ['politician', 'candidate', 'governor', 'lawmaker', 'person', 'man', 'woman', 'child', 'baby']
spec_riskers = editor(subj_of_risk_process.results, '%', noun_lemmata.results, 
                      just_entries = entities, skip_subcorpora = 1963, just_totals = True, sort_by = 'total')
plotter('Percentage of common participants that are in the role of risker', spec_riskers.results, 
        kind = 'bar', black_and_white = True, style = 'bmh', figsize = (10, 6), save = 'rel_risker_final',
       num_to_plot = 12, y_label = 'Percentage of occurrences in the role of risker', show_totals = 'plot')









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 9 entries:
    politician
    candidate
    governor
    lawmaker
    person
    man
    woman
    child
    baby

Threshold: 19


***Done!***
========================


14:38:27: images/rel_risker_final.png created.



In [2]:

    
propernouns = r['propernouns']



In [11]:

    
terms = ['vioxx', 'merck', 'aids', 'clinton', 'obama', 'europe', 'bush']
sel_nnp = editor(propernouns.results, '%', propernouns.totals, just_entries = terms, skip_subcorpora = 1963, sort_by = 'total')









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 7 entries:
    vioxx
    merck
    aids
    clinton
    obama
    europe
    bush

***Done!***
========================



In [14]:

    
plotter('Health crises: comparing social actors that co-occur with risk in \emph{The New York Times}', sel_nnp.results, legend_pos = 'upper right',
       black_and_white = False, figsize = (10, 6), save = 'healthcomp', y_label = 'Percentage of all proper nouns')
plotter('Comparing social actors that co-occur with risk', sel_nnp.results, legend_pos = 'upper left',
       black_and_white = True, style = 'bmh', figsize = (10, 6))









    



14:43:28: images/healthcomp.png created.



In [72]:

    
propernouns.results.aids









    Out[72]:





1963     0
1987    87
1988    46
...
2012    12
2013     8
2014     2
Name: aids, Length: 29, dtype: int64



In [27]:

    
risk_of = r['risk_of']



In [28]:

    
rel_riskof = editor(risk_of.results, '%', risk_of.totals, skip_subcorpora = 1963)
plotter('Risk of (noun)', rel_riskof.results, style = 'fivethirtyeight', 
        figsize = (10, 6), y_label = 'Percentage of all results', save = 'riskof_')









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

***Done!***
========================


15:25:20: images/riskof_.png created.



In [16]:

    
social_act = r['health-social-actors-w-risk-in-sent']



In [17]:

    
social_act.query









    Out[17]:





{'datatype': dtype('int64'),
 'dep_type': 'basic-dependencies',
 'dictionary': 'bnc.p',
 'function': 'interrogator',
 'function_filter': False,
 'lemmatag': False,
 'lemmatise': True,
 'option': 'words',
 'path': 'data/nyt/topics/health',
 'phrases': True,
 'plaintext': False,
 'query': 'NP <# (/NN.?/ !< /(?i).?\\brisk.?\\b/) >> (ROOT << /(?i).?\\brisk.?\\b/)',
 'quicksave': 'health-social-actors-w-risk-in-sent.p',
 'spelling': False,
 'table_size': 50,
 'time_ended': '2015-06=13 23:25:29',
 'time_started': '2015-06-13 23:22:05',
 'titlefilter': True,
 'translated_option': 't'}



In [18]:

    
everyday = editor(social_act.results, just_entries = ['man', 'woman', 'person', 'child', 'consumer', 'baby'])
plotter('Everyday participants in the health subcorpus', everyday.results, 
        style = 'bmh', black_and_white=True, figsize = (10, 6), save = 'everyday_health_final')









    



***Processing results***
========================

Keeping 6 entries:
    man
    woman
    person
    child
    consumer
    baby

***Done!***
========================


14:53:58: images/everyday_health_final.png created.



In [19]:

    
inst = ['insurer', 'hospital', 'commercial insurer', 'business', 'insurance company', 
        'health maintenance organization', 'blue cross', 'disease control', 'employer']
institutional = editor(social_act.results, '%', social_act.totals, just_entries = inst)









    



***Processing results***
========================

Keeping 9 entries:
    insurer
    hospital
    commercial insurer
    business
    insurance company
    health maintenance organization
    blue cross
    disease control
    employer

***Done!***
========================



In [21]:

    
plotter('Decline of health institution risks in the NYT, 1987-2014', institutional.results, figsize = (10, 6),
        y_label = 'Percentage of all participants', black_and_white = False)



In [ ]:

    
#parts = load_result('health-part')
#peop = ['person', 'man', 'woman', 'child', 'baby', 'consumer']
#he_pa = editor(parts.results, just_entries = peop)
#plotter('Everyday participants in the health subcorpus', he_pa.results, 
    #y_label = 'Percentage of all participants',
       #figsize = (10, 6), black_and_white = True, style = 'bmh')

21:55:04: adj_modifiers.p loaded as adj_modifiers. 21:55:04: adj_riskwords.p loaded as adj_riskwords. 21:55:06: all_govs.p loaded as all_govs. 21:55:06: allwords.p loaded as allwords. 21:55:06: at_risk_things.p loaded as at_risk_things. 21:55:06: baseline.p loaded as baseline. 21:55:06: edited_adj_riskwords.p loaded as edited_adj_riskwords. 21:55:06: health-processes.p loaded as health-processes. 21:55:11: health-social-actors-w-risk-in-sent.p loaded as health-social-actors-w-risk-in-sent. 21:55:13: health_kwds.p loaded as health_kwds. 21:55:37: health_kwds_compared_to_nyt.p loaded as health_kwds_compared_to_nyt. 21:55:39: health_ngrams.p loaded as health_ngrams. 21:55:41: kwds.p loaded as kwds. 21:55:41: modals.p loaded as modals. 21:55:41: modals_lemmatised.p loaded as modals_lemmatised. 21:55:41: modifiers.p loaded as modifiers. 21:55:41: n_atrisk_n.p loaded as n_atrisk_n. 21:55:52: ngms.p loaded as ngms. 21:56:12: noun_lemmata.p loaded as noun_lemmata. 21:56:12: predicators.p loaded as predicators. 21:56:12: proc_w_risk_part.p loaded as proc_w_risk_part. 21:56:12: processes.p failed to load. Try using load_result to find out the matter. 21:56:24: propernouns.p loaded as propernouns. 21:56:24: relationals.p loaded as relationals. 21:56:24: risk_as_part_process_mod.p loaded as risk_as_part_process_mod. 21:56:24: risk_objects.p loaded as risk_objects. 21:56:24: risk_of.p loaded as risk_of. 21:56:25: risk_verbing.p loaded as risk_verbing. 21:56:25: riskpos.p loaded as riskpos. 21:56:25: riskwords.p loaded as riskwords. 21:56:26: sayers.p loaded as sayers. 21:56:26: social-actors-w-risk-in-sent.p loaded as social-actors-w-risk-in-sent. 21:56:26: subj_of_risk_process.p loaded as subj_of_risk_process. 21:56:26: terror_heart.p loaded as terror_heart. 21:56:27: uniques.p loaded as uniques. 21:56:27: x_subj_of_risk_process.p loaded as x_subj_of_risk_process.



In [135]:

    
health_ngrams = r['health_ngrams']
inc = editor(health_ngrams.results, sort_by = 'increase')
plotter('n-grams, increasing', inc.results, num_to_plot=10, figsize = (10, 6), y_label = 'Absolute frequency',
        style = 'bmh', black_and_white = True, save = 'ngram_health_inc_final')









    



***Processing results***
========================

***Done!***
========================


23:40:16: images/ngram_health_inc_final.png created.



In [144]:

    
dec = editor(health_ngrams.results, sort_by = 'decrease')
dec = editor(dec.results, skip_entries = [8, 9, 10])









    



***Processing results***
========================

***Done!***
========================


***Processing results***
========================

Skipping 3 entries:
    license makefile
    src stoplists
    dist inputdirectory

***Done!***
========================



In [128]:

    
plotter('n-grams, decreasing', dec.results, num_to_plot=10, figsize = (10, 6),
        style = 'bmh', black_and_white = True, legend_pos = 'upper right', save = 'ngram_health_dec_final')









    



23:32:30: images/ngram_health_dec_final.png created.



In [25]:

    
inc_part = editor(social_act.results, '%', social_act.totals, sort_by = 'increase')









    



***Processing results***
========================

***Done!***
========================



In [129]:

    
plotter('Participants in the health subcorpus, increasing', inc_part.results, num_to_plot=10, figsize = (10, 6),
        style = 'bmh', black_and_white = True, save = 'part_health_inc_final')









    



23:33:13: images/part_health_inc_final.png created.



In [22]:

    
cats = [('Infectious disease', ['aids', 'aid', 'aids virus', 'aids patient', 'transmission', 'flu', 'influenza']),
('Life world', ['person', 'man', 'woman', 'child', 'baby', 'consumer']),
('Intitutions', ['empire', 'hospital', 'commercial', 'business', 'insurance company', 'HMO/health maintenance organisation', 'blue cross', 'disease control', 'employer', 'insurer', 'health insurance association', 'insurance industry', 'office']),
('Non-infectious disease', ['breast cancer', 'cancer', 'heart disease', 'diabetes', 'heart attack', 'prostate cancer', 'stroke', 'ovarian cancer', 'obesity']),
('Science and research',  ['study', 'researcher', 'finding', 'new study', 'author', 'university', 'expert'])]



In [23]:

    
themes = r['health-social-actors-w-risk-in-sent']
themes_all = r['health-social-actors-w-risk-in-sent']
for name, cat in cats:
    themes = editor(themes.results, merge_entries = cat, newname = name)
themes = editor(themes.results, '%', themes_all.totals, just_entries = [n for n, t in cats], sort_by = 'total')









    



***Processing results***
========================

Merging 6 entries as "Infectious disease":
    aid
    aids virus
    aids patient
    transmission
    flu
    influenza

***Done!***
========================


***Processing results***
========================

Merging 6 entries as "Life world":
    person
    man
    woman
    child
    baby
    consumer

***Done!***
========================


***Processing results***
========================

Merging 12 entries as "Intitutions":
    empire
    hospital
    commercial
    business
    insurance company
    blue cross
    disease control
    employer
    insurer
    health insurance association
... and 2 more ... 

***Done!***
========================


***Processing results***
========================

Merging 9 entries as "Non-infectious disease":
    breast cancer
    cancer
    heart disease
    diabetes
    heart attack
    prostate cancer
    stroke
    ovarian cancer
    obesity

***Done!***
========================


***Processing results***
========================

Merging 7 entries as "Science and research":
    study
    researcher
    finding
    new study
    author
    university
    expert

***Done!***
========================


***Processing results***
========================

Keeping 5 entries:
    Infectious disease
    Life world
    Intitutions
    Non-infectious disease
    Science and research

***Done!***
========================



In [26]:

    
plotter('Major themes in articles discussing health risks in the NYT', themes.results, 
        num_to_plot=10, figsize = (10, 6), style = 'fivethirtyeight', save = 'themes-health')









    



15:06:03: images/themes-health.png created.

Accurate counts for experiential roles

Three experiential roles

First, let's get the copula count:



In [119]:

    
#r = load_all_results()
deps = load_result('collapsed_deps')
copula = editor(deps.results, merge_entries = r'^cop:', newname = 'Cop').results['Cop']









    



***Processing results***
========================

Merging 6 entries as "Cop":
    cop:be
    cop:seem
    cop:become
    cop:remain
    cop:appear
    cop:stay

***Done!***
========================

Now, we can do a first pass over the data. Problem is, root includes Attribute/Value.



In [21]:

    
all_riskwords = load_result('coll_govs_with_pos')
#allwords = r['allwords']
govs = load_result('coll_govs_with_pos')
from collections import OrderedDict
exp_roles = OrderedDict({'Process': r'(?i)^(root:root|dobj:.*?:(run|take|pose)|prep_at:v[a-z]*:put)$',
            'Participant': r'(?i)^((xsubj|nsubj|nsubjpass|acomp|agent|appos|dobj|iobj):.*|prep_[a-z]*:v.*?:.*)$',
            'Modifier': r'(?i)^((advmod|vmod|amod|nn):.*$|(prep_[a-z]*:n))'})
#exp_roles = OrderedDict(sorted(exp_roles.items(), key=lambda t: t[0]), reverse)
for name, regex in exp_roles.items():
    govs = editor(govs.results, merge_entries = regex, newname = name)









    



***Processing results***
========================

Merging 26 entries as "Process":
    root:root
    dobj:vb:take
    dobj:vbg:take
    prep_at:vb:put
    dobj:vb:pose
    dobj:vbd:take
    prep_at:vbg:put
    dobj:vbp:run
    dobj:vbz:pose
    dobj:vbz:run
... and 16 more ... 

***Done!***
========================


***Processing results***
========================

Merging 11081 entries as "Modifier":
    nn:nns:factor
    nn:nn:management
    nn:nn:factor
    amod:nns:investment
    amod:nn:business
    amod:nn:behavior
    prep_at:nns:people
    nn:nn:assessment
    prep_of:nn:level
    amod:nns:loan
... and 11071 more ... 

***Done!***
========================


***Processing results***
========================

Merging 16244 entries as "Participant":
    nsubj:vbz:be
    prep_at:vbp:be
    dobj:vb:reduce
    prep_at:vbz:be
    prep_at:vb:be
    prep_at:vbd:be
    nsubj:vbd:be
    dobj:vb:increase
    nsubj:vbp:be
    dobj:vbz:carry
... and 16234 more ... 

***Done!***
========================



In [151]:

    
print govs.results['Process']
print govs.results['Participant']









    



1963     243
1987    1195
1988    1130
...
2012    1238
2013    1305
2014     840
Name: Process, Length: 29, dtype: int64
1963     606
1987    3379
1988    3265
...
2012    4546
2013    4432
2014    2791
Name: Participant, Length: 29, dtype: int64

Now we need to subtract copula results, and add to participant:



In [152]:

    
govs.results['Process'] = govs.results['Process'] - copula
govs.results['Participant'] = govs.results['Participant'] + copula



In [153]:

    
print govs.results['Process']
print govs.results['Participant']









    



1963    181
1987    830
1988    786
...
2012    931
2013    974
2014    661
Name: Process, Length: 29, dtype: int64
1963     668
1987    3744
1988    3609
...
2012    4853
2013    4763
2014    2970
Name: Participant, Length: 29, dtype: int64

And now get relative frequencies:



In [154]:

    
#allwd = interrogator('data/nyt/years', 'c', 'any')



In [22]:

    
all_riskwords = load_result('coll_govs_with_pos')



In [141]:

    
rel_govs = editor(govs.results, '%', all_riskwords.totals, 
              skip_subcorpora = 1963, sort_by = 'total', just_entries = exp_roles.keys())









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 3 entries:
    Process
    Modifier
    Participant

***Done!***
========================



In [199]:

    
p_check = editor(govs.results, sort_by = 'increase', just_entries = exp_roles.keys(), 
                 keep_stats = True, skip_subcorpora = 1963)









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 3 entries:
    Process
    Modifier
    Participant

Process [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] [830, 786, 820, 818, 789, 744, 703, 677, 693, 682, 717, 843, 846, 941, 852, 930, 924, 900, 847, 848, 901, 1064, 1077, 1084, 1015, 931, 974, 661] 8.60946907499 0.00127794870432
Modifier [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] [1552, 1517, 1448, 1573, 1466, 1286, 1422, 1519, 1586, 1446, 1453, 1787, 1687, 1630, 1724, 2066, 1967, 1969, 2051, 1940, 1988, 2252, 2051, 2139, 2218, 2100, 1998, 1227] 24.9622331691 6.47585372162e-05
Participant [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] [3744, 3609, 3793, 3849, 3412, 3421, 3530, 3566, 3566, 3594, 3544, 3976, 3917, 4053, 4324, 5336, 5039, 5084, 5104, 5066, 4856, 5098, 4848, 5030, 5594, 4853, 4763, 2970] 58.8344280241 0.000190708300737
***Done!***
========================



In [201]:

    
p_check.results.ix['p']









    Out[201]:





Participant    0.000191
Modifier       0.000065
Process        0.001278
Name: p, dtype: float64



In [194]:

    
newnames = []
for col in list(p_check.results.columns):
    pval = p_check.results[col]['p']
    newname = '%s (p=%s)' % (col, format(pval, '.5f'))
    newnames.append(newname)
p_check.results.columns = newnames
statfields = ['slope', 'intercept', 'r', 'p', 'stderr']
p_check.results.drop(statfields, axis = 0, inplace = True)
p_check.results









    Out[194]:






  
    
      
      Participant (p=0.00019)
      Modifier (p=0.00006)
      Process (p=0.00128)
    
  
  
    
      1987
       3744
       1552
        830
    
    
      1988
       3609
       1517
        786
    
    
      1989
       3793
       1448
        820
    
    
      1990
       3849
       1573
        818
    
    
      1991
       3412
       1466
        789
    
    
      ...
      ...
      ...
      ...
    
    
      2010
       5030
       2139
       1084
    
    
      2011
       5594
       2218
       1015
    
    
      2012
       4853
       2100
        931
    
    
      2013
       4763
       1998
        974
    
    
      2014
       2970
       1227
        661
    
  

28 rows × 3 columns



In [197]:

    
rel_govs = editor(p_check.results, '%', all_riskwords.totals, 
              skip_subcorpora = 1963, sort_by = 'total')
plotter('Experiential role of risk words', rel_govs.results, 
        y_label = 'Percentage of risk words in any experiential role',
    style = 'bmh', figsize = (10, 6), subplots = True, save = 'subplot-three-roles-final')









    



***Processing results***
========================

Skipping 0 subcorpora:
    

***Done!***
========================


12:11:34: images/subplotthreerolesfinal.png created.

Participant type



In [69]:

    
parts = load_result('coll_govs_with_pos')
parts = editor(parts.results, skip_entries = exp_roles['Process'])
parts = editor(parts.results, just_entries = exp_roles['Participant'])

# must remove processes
part_roles = {'Experiential subject': r'^(nsubj|agent|xsubj):',
              'Experiential object': r'^(nsubjpass|dobj|iobj|acomp):'}


for name, regex in part_roles.items():
    parts = editor(parts.results, merge_entries = regex, newname = name)









    



***Processing results***
========================

Skipping 26 entries:
    root:root
    dobj:vb:take
    dobj:vbg:take
    prep_at:vb:put
    dobj:vb:pose
    dobj:vbd:take
    prep_at:vbg:put
    dobj:vbp:run
    dobj:vbz:pose
    dobj:vbz:run
... and 16 more ... 

***Done!***
========================


***Processing results***
========================

Keeping 16244 entries:
    nsubj:vbz:be
    prep_at:vbp:be
    dobj:vb:reduce
    prep_at:vbz:be
    prep_at:vb:be
    prep_at:vbd:be
    nsubj:vbd:be
    dobj:vb:increase
    nsubj:vbp:be
    dobj:vbz:carry
... and 16234 more ... 

***Done!***
========================


***Processing results***
========================

Merging 3671 entries as "Experiential subject":
    nsubj:vbz:be
    nsubj:vbd:be
    nsubj:vbp:be
    nsubj:jj:high
    nsubj:jj:great
    nsubj:jjr:greater
    nsubj:jj:low
    nsubj:jj:small
    nsubj:jjr:higher
    nsubj:jj:real
... and 3661 more ... 

***Done!***
========================


***Processing results***
========================

Merging 3931 entries as "Experiential object":
    dobj:vb:reduce
    dobj:vb:increase
    dobj:vbz:carry
    dobj:vbp:have
    dobj:vbz:increase
    dobj:vbd:have
    dobj:vbg:reduce
    dobj:vb:minimize
    dobj:vbd:increase
    dobj:vbz:have
... and 3921 more ... 

***Done!***
========================

Add copula



In [70]:

    
parts.results['Experiential object'] = parts.results['Experiential object'] + copula

Finish up



In [71]:

    
rel_parts = editor(parts.results, '%', all_riskwords.totals, just_entries = part_roles.keys(), 
                   sort_by = 'total', skip_subcorpora = 1963)

plotter('Risk by participant role', rel_parts.results, y_label = 'Percentage of risk words in any experiential role',
        style = 'bmh', figsize = (10, 6), black_and_white = True)









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 2 entries:
    Experiential subject
    Experiential object

***Done!***
========================

Risk processes



In [11]:

    
procs = load_result('coll_govs_with_pos')
procs = editor(procs.results, just_entries = exp_roles['Process'], skip_subcorpora = 1963)
proc_roles = {'to risk': r'^root:root$',
              'to take risk': r'^dobj:v.*?:take$',
              'to run risk': r'^dobj:v.*?:run$',
              'to pose risk': r'^dobj:v.*?:pose$',
              'to put at risk': r'^prep_at:v.*?:put$'}
for name, regex in proc_roles.items():
    procs = editor(procs.results, merge_entries = regex, newname = name)









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 2829 entries:
    root:root
    dobj:vb:take
    dobj:vbg:take
    prep_at:vb:put
    dobj:vb:pose
    dobj:vbd:take
    prep_at:vbg:put
    dobj:vbp:run
    dobj:vbz:pose
    dobj:vbz:run
... and 2819 more ... 

***Done!***
========================


***Processing results***
========================

Merging 1 entries as "to risk":
    root:root

***Done!***
========================


***Processing results***
========================

Merging 6 entries as "to put at risk":
    prep_at:vb:put
    prep_at:vbg:put
    prep_at:vbd:put
    prep_at:vbn:put
    prep_at:vbz:put
    prep_at:vbp:put

***Done!***
========================


***Processing results***
========================

Merging 6 entries as "to take risk":
    dobj:vb:take
    dobj:vbg:take
    dobj:vbd:take
    dobj:vbp:take
    dobj:vbz:take
    dobj:vbn:take

***Done!***
========================


***Processing results***
========================

Merging 6 entries as "to run risk":
    dobj:vbp:run
    dobj:vbz:run
    dobj:vb:run
    dobj:vbg:run
    dobj:vbd:run
    dobj:vbn:run

***Done!***
========================


***Processing results***
========================

Merging 6 entries as "to pose risk":
    dobj:vb:pose
    dobj:vbz:pose
    dobj:vbd:pose
    dobj:vbp:pose
    dobj:vbg:pose
    dobj:vbn:pose

***Done!***
========================

remove copula



In [14]:

    
procs.results['to risk'] = procs.results['to risk'] - copula



In [31]:

    
rel_procs = editor(procs.results, '%', all_riskwords.totals, just_entries = proc_roles.keys(), sort_by = 'total')
plotter('Risk by processes', rel_procs.results, y_label = 'Percentage of risk words in any experiential role',
        style = 'bmh', figsize = (10, 6), black_and_white = True)









    



***Processing results***
========================

Keeping 5 entries:
    to risk
    to put at risk
    to take risk
    to run risk
    to pose risk

***Done!***
========================

Risk as modifier



In [27]:

    
mods = load_result('coll_govs_with_pos')

mods = editor(mods.results, skip_entries = exp_roles['Process'])
mods = editor(mods.results, just_entries = exp_roles['Modifier'])

mod_roles = {'Nominal pre-head': r'^nn:',
              'Nominal post-head': r'^(prep_[a-z]*:n|vmod:)',
              #'Head of PP complement': r'^prep_[a-z]*:v)',
              'Adverbial modifier': r'^advmod:',
              'Adjectival modifier': r'^amod:'}

for name, regex in mod_roles.items():
    mods = editor(mods.results, merge_entries = regex, newname = name)









    



***Processing results***
========================

Skipping 26 entries:
    root:root
    dobj:vb:take
    dobj:vbg:take
    prep_at:vb:put
    dobj:vb:pose
    dobj:vbd:take
    prep_at:vbg:put
    dobj:vbp:run
    dobj:vbz:pose
    dobj:vbz:run
... and 16 more ... 

***Done!***
========================


***Processing results***
========================

Keeping 11081 entries:
    nn:nns:factor
    nn:nn:management
    nn:nn:factor
    amod:nns:investment
    amod:nn:business
    amod:nn:behavior
    prep_at:nns:people
    nn:nn:assessment
    prep_of:nn:level
    amod:nns:loan
... and 11071 more ... 

***Done!***
========================


***Processing results***
========================

Merging 6685 entries as "Nominal post-head":
    prep_at:nns:people
    prep_of:nn:level
    prep_of:nn:lot
    prep_in:nn:increase
    prep_of:nn:kind
    prep_of:nn:degree
    prep_at:nns:woman
    prep_at:nns:child
    prep_of:nn:amount
    prep_of:nn:assessment
... and 6675 more ... 

***Done!***
========================


***Processing results***
========================

Merging 2280 entries as "Adjectival modifier":
    amod:nns:investment
    amod:nn:business
    amod:nn:behavior
    amod:nns:loan
    amod:nns:asset
    amod:nn:strategy
    amod:nn:move
    amod:nns:security
    amod:nn:proposition
    amod:nn:venture
... and 2270 more ... 

***Done!***
========================


***Processing results***
========================

Merging 2028 entries as "Nominal pre-head":
    nn:nns:factor
    nn:nn:management
    nn:nn:factor
    nn:nn:assessment
    nn:nn:taker
    nn:nn:analysis
    nn:nn:officer
    nn:nns:taker
    nn:nnp:business
    nn:nns:assessment
... and 2018 more ... 

***Done!***
========================


***Processing results***
========================

Merging 88 entries as "Adverbial modifier":
    advmod:vbz:be
    advmod:vb:make
    advmod:vbn:base
    advmod:vbn:consider
    advmod:vb:be
    advmod:vbn:borrow
    advmod:vb:play
    advmod:vbn:call
    advmod:vb:reduce
    advmod:nns:loan
... and 78 more ... 

***Done!***
========================



In [ ]:



In [29]:

    
rel_mods = editor(mods.results, '%', all_riskwords.totals, just_entries = mod_roles.keys(), 
                  sort_by = 'total', skip_subcorpora = 1963)
plotter('Risk by modifier type', rel_mods.results, y_label = 'Percentage of risk words in any experiential role',
        style = 'bmh', figsize = (10, 6), black_and_white = True)
plotter('Risk by modifier type', rel_mods.results, y_label = 'Percentage of risk words in any experiential role', 
        figsize = (10, 6), save = 'risk_by_mod_type_colour')









    



***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 4 entries:
    Nominal post-head
    Adjectival modifier
    Nominal pre-head
    Adverbial modifier

***Done!***
========================


21:35:52: images/risk_by_mod_type_colour.png created.






    Out[29]:





<module 'matplotlib.pyplot' from '/Library/Python/2.7/site-packages/matplotlib/pyplot.pyc'>

Participant roles



In [13]:

    
deps = load_result('collapsed_deps')
quickview(deps, 10)
copula = editor(deps.results, merge_entries = r'^cop:', newname = 'Cop').results['Cop']









    



  0: root:say
  1: det:the
  2: root:be
  3: det:a
  4: root:have
  5: cop:be
  6: root:risk
  7: root:take
  8: aux:to
  9: root:make

***Processing results***
========================

Merging 6 entries as "Cop":
    cop:be
    cop:seem
    cop:become
    cop:remain
    cop:appear
    cop:stay

***Done!***
========================



In [7]:

    
govs = load_result('coll_govs_with_pos')
all_govs = load_result('coll_govs_with_pos')
material = load_result('coll_govs_with_pos')



In [10]:

    
from dictionaries.process_types import processes
exp_roles = {'Process': r'(?i)^(root:root|dobj:.*?:(run|take|pose)|prep_at:v[a-z]*:put|rcmod:.*|xcomp:.*)$',
             'Participant': r'(?i)^(xsubj|nsubj|nsubjpass|acomp|agent|appos|cop|dobj|iobj):.*$',
             'Modifier': r'(?i)^((advmod|vmod|amod|nn):.*$|(prep_[a-z]*:n|prep_[a-z]*:v))'}

exp_part_roles = {
    'Senser': r'(nsubj|xsubj|agent):v.*?:%s' % processes.mental,
    'Phenomenon': r'(iobj|dobj|nsubjpass|acomp):v.*?:%s' % processes.mental,
    'Token': r'(nsubj|xsubj|agent):v.*?:%s' % processes.relational,
    'Value': r'(iobj|dobj|nsubjpass|acomp):v.*?:%s' % processes.relational,
    'Sayer': r'(nsubj|xsubj|agent):v.*?:%s' % processes.verbal,
    'Verbiage': r'(iobj|dobj|nsubjpass|acomp):v.*?:%s' % processes.verbal}

govs = editor(govs.results, skip_entries = exp_roles['Process'])
material = editor(material.results, skip_entries = exp_roles['Process'])

# merge each participant role, and remove these entries from the material result
serieses = []
for name, regex in exp_part_roles.items():
    tmp = editor(govs.results, merge_entries = regex, newname = name)
    serieses.append(tmp.results[name])
    material = editor(material.results, skip_entries = regex)

# the leftover becomes material

material = editor(material.results, merge_entries = r'(nsubj|xsubj|agent):v.*?:', newname = 'Actor')
serieses.append(material.results['Actor'])
material = editor(material.results, merge_entries = r'(iobj|dobj|nsubjpass|acomp):v.*?:', newname = 'Goal, range')
serieses.append(material.results['Goal, range'])

# add copula









    



***Processing results***
========================

Skipping 2829 entries:
    root:root
    dobj:vb:take
    dobj:vbg:take
    prep_at:vb:put
    dobj:vb:pose
    dobj:vbd:take
    prep_at:vbg:put
    dobj:vbp:run
    dobj:vbz:pose
    dobj:vbz:run
... and 2819 more ... 

***Done!***
========================


***Processing results***
========================

Skipping 2829 entries:
    root:root
    dobj:vb:take
    dobj:vbg:take
    prep_at:vb:put
    dobj:vb:pose
    dobj:vbd:take
    prep_at:vbg:put
    dobj:vbp:run
    dobj:vbz:pose
    dobj:vbz:run
... and 2819 more ... 

***Done!***
========================


***Processing results***
========================

Merging 2171 entries as "Senser":
    nsubj:vbz:be
    nsubj:vbd:be
    nsubj:vbp:be
    nsubj:vbd:say
    nsubj:vbz:increase
    nsubj:vbn:increase
    nsubj:vb:be
    nsubj:vbp:outweigh
    nsubj:vbg:lose
    nsubj:vbn:involve
... and 2161 more ... 

***Done!***
========================


***Processing results***
========================

Skipping 2171 entries:
    nsubj:vbz:be
    nsubj:vbd:be
    nsubj:vbp:be
    nsubj:vbd:say
    nsubj:vbz:increase
    nsubj:vbn:increase
    nsubj:vb:be
    nsubj:vbp:outweigh
    nsubj:vbg:lose
    nsubj:vbn:involve
... and 2161 more ... 

***Done!***
========================


***Processing results***
========================

Merging 3852 entries as "Phenomenon":
    dobj:vb:reduce
    dobj:vb:increase
    dobj:vbz:carry
    dobj:vbp:have
    dobj:vbz:increase
    dobj:vbd:have
    dobj:vbg:reduce
    dobj:vb:minimize
    dobj:vbd:increase
    dobj:vbz:have
... and 3842 more ... 

***Done!***
========================


***Processing results***
========================

Skipping 3852 entries:
    dobj:vb:reduce
    dobj:vb:increase
    dobj:vbz:carry
    dobj:vbp:have
    dobj:vbz:increase
    dobj:vbd:have
    dobj:vbg:reduce
    dobj:vb:minimize
    dobj:vbd:increase
    dobj:vbz:have
... and 3842 more ... 

***Done!***
========================


***Processing results***
========================

Merging 3811 entries as "Value":
    dobj:vb:reduce
    dobj:vb:increase
    dobj:vbz:carry
    dobj:vbp:have
    dobj:vbz:increase
    dobj:vbd:have
    dobj:vbg:reduce
    dobj:vb:minimize
    dobj:vbd:increase
    dobj:vbz:have
... and 3801 more ... 

***Done!***
========================


***Processing results***
========================

Skipping 0 entries:
    

***Done!***
========================


***Processing results***
========================

Merging 2151 entries as "Token":
    nsubj:vbz:be
    nsubj:vbd:be
    nsubj:vbp:be
    nsubj:vbd:say
    nsubj:vbz:increase
    nsubj:vbn:increase
    nsubj:vb:be
    nsubj:vbp:outweigh
    nsubj:vbg:lose
    nsubj:vbn:involve
... and 2141 more ... 

***Done!***
========================


***Processing results***
========================

Skipping 0 entries:
    

***Done!***
========================


***Processing results***
========================

Merging 3852 entries as "Verbiage":
    dobj:vb:reduce
    dobj:vb:increase
    dobj:vbz:carry
    dobj:vbp:have
    dobj:vbz:increase
    dobj:vbd:have
    dobj:vbg:reduce
    dobj:vb:minimize
    dobj:vbd:increase
    dobj:vbz:have
... and 3842 more ... 

***Done!***
========================


***Processing results***
========================

Skipping 0 entries:
    

***Done!***
========================


***Processing results***
========================

Merging 2171 entries as "Sayer":
    nsubj:vbz:be
    nsubj:vbd:be
    nsubj:vbp:be
    nsubj:vbd:say
    nsubj:vbz:increase
    nsubj:vbn:increase
    nsubj:vb:be
    nsubj:vbp:outweigh
    nsubj:vbg:lose
    nsubj:vbn:involve
... and 2161 more ... 

***Done!***
========================


***Processing results***
========================

Skipping 0 entries:
    

***Done!***
========================


***Processing results***
========================

Merging 0 entries as "Actor":
    

***Done!***
========================


***Processing results***
========================

Merging 1 entries as "Goal, range":
    dobj:vbp:|

***Done!***
========================



In [89]:

    
df









    Out[89]:






  
    
      
      Senser
      Phenomenon
      Value
      Token
      Verbiage
      Sayer
      Actor
      Goal, range
    
  
  
    
      1963
        3
        74
        74
        3
       17
        52
        41
        184
    
    
      1987
       18
       221
       221
       18
       47
       284
       252
       1192
    
    
      1988
       16
       204
       204
       16
       37
       268
       240
       1236
    
    
      1989
       20
       228
       228
       20
       59
       331
       239
       1321
    
    
      1990
       23
       263
       263
       23
       57
       303
       216
       1236
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2010
       13
       263
       263
       13
       69
       423
       295
       1898
    
    
      2011
       25
       308
       308
       25
       87
       482
       381
       2033
    
    
      2012
       18
       261
       261
       18
       58
       376
       289
       1870
    
    
      2013
        7
       259
       259
        7
       57
       367
       272
       1871
    
    
      2014
       10
       154
       154
       10
       25
       247
       181
       1157
    
  

29 rows × 8 columns



In [143]:

    
df = pd.concat(serieses, axis = 1)
tots = r['all_govs']
df = editor(df, '%', tots.totals, sort_by = 'total')
df = editor(df.results, skip_subcorpora = 1963)
plotter('Participant types for risk words', df.results, figsize = (16, 7))









    



***Processing results***
========================

***Done!***
========================


***Processing results***
========================

Skipping 1 subcorpora:
    1963

***Done!***
========================



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

    
govs = load_result('coll_govs_with_pos')
all_govs = load_result('coll_govs_with_pos')
material = load_result('coll_govs_with_pos')

exp_roles = {'Process': r'(?i)^(root:root|dobj:.*?:(run|take|pose)|prep_at:v[a-z]*:put|rcmod:.*|xcomp:.*)$',
             'Participant': r'(?i)^(xsubj|nsubj|nsubjpass|acomp|agent|appos|cop|dobj|iobj):.*$',
             'Modifier': r'(?i)^((advmod|vmod|amod|nn):.*$|(prep_[a-z]*:n|prep_[a-z]*:v))'}

exp_kinds = {'Experiential subect': r'(nsubj|xsubj|agent):v', 
             'Experiential object': r'(iobj|dobj|nsubjpass|acomp):v'}

exp_part_roles = {
    'Senser': r'(nsubj|xsubj|agent):v.*?:%s' % processes.mental,
    'Phenomenon': r'(iobj|dobj|nsubjpass|acomp):v.*?:%s' % processes.mental,
    'Token': r'(nsubj|xsubj|agent):v.*?:%s' % processes.relational,
    'Value': r'(iobj|dobj|nsubjpass|acomp):v.*?:%s' % processes.relational,
    'Sayer': r'(nsubj|xsubj|agent):v.*?:%s' % processes.verbal,
    'Verbiage': r'(iobj|dobj|nsubjpass|acomp):v.*?:%s' % processes.verbal}

govs = editor(govs.results, skip_entries = exp_roles['Process'])
material = editor(material.results, skip_entries = exp_roles['Process'])

# merge each participant role, and remove these entries from the material result
serieses = []
for name, regex in exp_part_roles.items():
    tmp = editor(govs.results, merge_entries = regex, newname = name)
    serieses.append(tmp.results[name])
    material = editor(material.results, skip_entries = regex)

# the leftover becomes material

material = editor(material.results, merge_entries = r'(nsubj|xsubj|agent):v.*?:', newname = 'Actor')
serieses.append(material.results['Actor'])
material = editor(material.results, merge_entries = r'(iobj|dobj|nsubjpass|acomp):v.*?:', newname = 'Goal, range')
serieses.append(material.results['Goal, range'])

	1900	2000

economics	17.7	22.0
politics	21.1	20.8
health	1.0	13.4
medicine	0.0	3.2
sport	11.5	7.8
arts	3.8	5.9
legal	1.9	4.4
security	0.5	3.2
education	0.0	2.4
war	11.5	2.7
industrial relations/work	1.4	3.7
crime	2.4	2.2
media	1.0	2.0
transport/infrastructure	3.8	1.2
military	4.3	1.5
social order	3.3	0.2
technology	0.0	5.4
environment	0.0	1.7
disaster	1.9	0.5

	Participant (p=0.00019)	Modifier (p=0.00006)	Process (p=0.00128)
1987	3744	1552	830
1988	3609	1517	786
1989	3793	1448	820
1990	3849	1573	818
1991	3412	1466	789
...	...	...	...
2010	5030	2139	1084
2011	5594	2218	1015
2012	4853	2100	931
2013	4763	1998	974
2014	2970	1227	661

	Senser	Phenomenon	Value	Token	Verbiage	Sayer	Actor	Goal, range
1963	3	74	74	3	17	52	41	184
1987	18	221	221	18	47	284	252	1192
1988	16	204	204	16	37	268	240	1236
1989	20	228	228	20	59	331	239	1321
1990	23	263	263	23	57	303	216	1236
...	...	...	...	...	...	...	...	...
2010	13	263	263	13	69	423	295	1898
2011	25	308	308	25	87	482	381	2033
2012	18	261	261	18	58	376	289	1870
2013	7	259	259	7	57	367	272	1871
2014	10	154	154	10	25	247	181	1157