In [1]:
    
import os
import pandas as pd
from corpkit import *
%matplotlib inline
#r = load_all_results()
    
In [2]:
    
first_fig = pd.read_csv('1900-2000-comparison.csv', index_col = 0)
first_fig
    
    Out[2]:
In [3]:
    
#plotter('Domains of risk discourse', first_fig, kind = 'bar', num_to_plot = 'all', x_label = False,
        #black_and_white = True, figsize = (10,6), style = 'bmh', show_totals = 'plot', save = 'domains_june_final')
plotter('Domains of risk discourse', first_fig, kind = 'bar', num_to_plot = 'all', x_label = False, figsize = (10,6), style = 'bmh', show_totals = 'plot', save = 'domains_june_colour')
    
    
    
In [4]:
    
import os
import pandas as pd
second_fig = pd.read_csv('key-terms-comparison.csv', index_col = 0)
second_fig = second_fig[['danger', 'threat', 'risk', 'disaster', 'harm', 'uncertainty']]
#from corpkit import plotter
%matplotlib inline
    
In [5]:
    
#plotter('Risk and related words over time', second_fig, num_to_plot = 'all', 
        #black_and_white = True, figsize = (10, 6), style = 'bmh', save = 'risk_related_june_final')
plotter('Risk and related words over time', second_fig, num_to_plot = 'all', 
        figsize = (10, 6), style = 'bmh', save = 'risk_related_june_colour')
    
    
    
In [2]:
    
pos = editor(r['riskpos'].results, '%', r['allwords'].totals, skip_subcorpora = 1963)
plotter('Risk words by word class', pos.results , style = 'bmh', num_to_plot = 3,
        figsize = (10, 6), black_and_white = True, save = 'risk_by_class_final', 
        y_label = 'Percentage of all parsed words')
    
    
    Out[2]:
    
In [3]:
    
open_words = ['Noun', 'Verb', 'Adjective', 'Adverb']
maths_done = editor(r['riskpos'].results, '%', r['baseline'].results, sort_by = 'total', just_entries = open_words, skip_subcorpora = [1963])
    
    
In [4]:
    
plotter('Percentage of open word classes that are risk words', maths_done.results, black_and_white = True,
        y_label = 'Percentage', figsize = (10, 6), style = 'bmh', num_to_plot = 3, save = 'perc_open_final')
plotter('Percentage of open word classes that are risk words', maths_done.results,
        y_label = 'Percentage', figsize = (10, 6), num_to_plot = 3, save = 'perc_open_final_colour')
plotter('Percentage of open word classes that are risk words', maths_done.results, 
        y_label = 'Percentage', kind = 'area', legend_pos = 'lower right',
        stacked = True, style = 'bmh', black_and_white = True, figsize = (10, 6), save = 'perc_open_area_final')
plotter('Percentage of open word classes that are risk words', maths_done.results, 
        y_label = 'Percentage', kind = 'area', legend_pos = 'lower right',
        stacked = True, figsize = (10, 6), save = 'perc_open_area_final_colour')
    
    
    Out[4]:
    
    
    
    
In [5]:
    
ppm = editor(r['risk_as_part_process_mod'].results, '%', r['risk_as_part_process_mod'].totals)
plotter('Risk as participant, process and modifier', ppm.results.drop('1963'), 
        style = 'bmh', black_and_white = True, figsize = (10, 6), save = 'ppm_final',
        y_label = 'Percentage of risk words in any experiential role')
plotter('Risk as participant, process and modifier', ppm.results.drop('1963'), 
        figsize = (10, 6), save = 'ppm_final_colour',
        y_label = 'Percentage of risk words in any experiential role')
    
    
    Out[5]:
    
    
In [143]:
    
govs = r['all_govs']
ppm = r['risk_as_part_process_mod']
new_govs = editor(govs.results, merge_entries = r'^(root:root|dobj:(take|run|pose))$', newname = 'Process')
new_govs = editor(new_govs.results, merge_entries = r'^(dobj|nsubj|nsubjpass|csubj|acomp|iobj|csubjpass):', newname = 'Participant')
new_govs = editor(new_govs.results, merge_entries = r'^(pobj|nn|amod|rcmod|vmod|tmod|npadvmod|advmod):', newname = 'Modifier')
new_govs = editor(new_govs.results, '%', govs.totals, sort_by = 'total', just_entries = ['Participant', 'Process', 'Modifier'])
plotter('Risk words by experiential role', new_govs.results.drop('1963'), 
        style = 'bmh', figsize = (10, 6), black_and_white = True, y_label = 'Percentage of risk words in any experiential role', save = 'ppmfinal')
    
    
    
In [ ]:
    
#coll_govs_with_pos = interrogator(corpus, 'g', r'(?i)\brisk', lemmatise = True, dep_type = 'collapsed', 
    # add_pos_to_g_d_option=True, quicksave = 'coll_govs_with_pos')
r = load_all_results()
    govs = r['collapsed_govs_with_pos']
exp_roles = [('Process', r'(?i)^(root:root|dobj:.*?:(run|take|pose)|prep_at:v[a-z]*:put|rcmod:.*|xcomp:.*)$'),
            ('Participant', r'(?i)^(xsubj|nsubj|nsubjpass|acomp|agent|appos|cop|dobj|iobj):.*$'),
            ('Modifier', r'(?i)^((advmod|vmod|amod|nn):.*$|(prep_[a-z]*:n|prep_[a-z]*:v))')]
for name, regex in exp_roles:
    govs = editor(govs.results, merge_entries = regex, newname = name)
govs = editor(govs.results, '%', govs.totals, skip_subcorpora = 1963)
plotter('Experiential role', govs.results, y_label = 'Percentage of risk words in any experiential role',
    style = 'bmh', figsize = (10, 6), black_and_white = True)
    
In [ ]:
    
# currently problematic
plotter('Risk as experiential subject\slash object', govs.results, style = 'bmh', 
        black_and_white = True, figsize = (10, 6))
    
In [16]:
    
adjmods = editor(r['adj_modifiers'].results, '%', r['adj_modifiers'].totals, 
                 just_entries = ['high', 'calculated', 'great', 'potential'], skip_subcorpora = 1963)
plotter('Selected adjectives modifying nominal risk', adjmods.results, style = 'bmh', 
        black_and_white = True, figsize = (10, 6), save = 'sel_adjs_final', y_label = 'Percentage of all adjectival risk words')
    
    
    Out[16]:
    
In [62]:
    
def load_result(savename, loaddir = 'data/saved_interrogations'):
    """Reloads a save_result as namedtuple"""
    import collections
    import pickle
    import os
    import pandas
    if not savename.endswith('.p'):
        savename = savename + '.p'
    unpickled = pickle.load(open(os.path.join(loaddir, savename), 'rb'))
    
    if type(unpickled) == pandas.core.frame.DataFrame or type(unpickled) == pandas.core.series.Series:
        output = unpickled
    elif len(unpickled) == 4:
        outputnames = collections.namedtuple('interrogation', ['query', 'results', 'totals', 'table'])
        output = outputnames(unpickled[0], unpickled[1], unpickled[2], unpickled[3])        
    elif len(unpickled) == 3:
        outputnames = collections.namedtuple('interrogation', ['query', 'results', 'totals'])
        output = outputnames(unpickled[0], unpickled[1], unpickled[2])
    elif len(unpickled) == 2:
        outputnames = collections.namedtuple('interrogation', ['query', 'totals'])
        output = outputnames(unpickled[0], unpickled[1])
    return output
processes = load_result('processes')
proc_rel = editor(processes.results, '%', processes.totals, skip_subcorpora = 1963)
    
    
In [116]:
    
plotter('Risk processes', proc_rel.results, style = 'bmh', legend_pos = 'center left', save = 'risk_proc_final',
        black_and_white=True, figsize = (10, 6), y_label = 'Percentage of all risk processes')
    
    
    
In [18]:
    
modifiers = r['modifiers']
mods = editor(modifiers.results, '%', modifiers.totals, skip_subcorpora = [1963])
plotter('Types of risk modifiers', mods.results, style = 'bmh', black_and_white = True, 
        figsize = (10, 6), legend_pos = 'lower right', save = 'mod_types_final')
    
    
    Out[18]:
    
In [203]:
    
subj_of_risk_process = r['subj_of_risk_process']
noun_lemmata = r['noun_lemmata']
    
In [204]:
    
rskrs = editor(subj_of_risk_process.results, '%', subj_of_risk_process.totals, 
               just_totals = True, skip_subcorpora = 1963, sort_by = 'total')
    
    
In [205]:
    
plotter('Riskers, sorted by total frequency', rskrs.results, kind = 'bar', black_and_white = True, style = 'bmh', figsize = (10, 6),
       num_to_plot = 12, y_label = 'Percentage of all riskers', show_totals = 'plot', save = 'riskers_total_final')
    
    
    
In [208]:
    
entities = ['politician', 'candidate', 'governor', 'lawmaker', 'person', 'man', 'woman', 'child', 'baby']
spec_riskers = editor(subj_of_risk_process.results, '%', noun_lemmata.results, 
                      just_entries = entities, skip_subcorpora = 1963, just_totals = True, sort_by = 'total')
plotter('Percentage of common participants that are in the role of risker', spec_riskers.results, 
        kind = 'bar', black_and_white = True, style = 'bmh', figsize = (10, 6), save = 'rel_risker_final',
       num_to_plot = 12, y_label = 'Percentage of occurrences in the role of risker', show_totals = 'plot')
    
    
    
In [2]:
    
propernouns = r['propernouns']
    
In [11]:
    
terms = ['vioxx', 'merck', 'aids', 'clinton', 'obama', 'europe', 'bush']
sel_nnp = editor(propernouns.results, '%', propernouns.totals, just_entries = terms, skip_subcorpora = 1963, sort_by = 'total')
    
    
In [14]:
    
plotter('Health crises: comparing social actors that co-occur with risk in \emph{The New York Times}', sel_nnp.results, legend_pos = 'upper right',
       black_and_white = False, figsize = (10, 6), save = 'healthcomp', y_label = 'Percentage of all proper nouns')
plotter('Comparing social actors that co-occur with risk', sel_nnp.results, legend_pos = 'upper left',
       black_and_white = True, style = 'bmh', figsize = (10, 6))
    
    
    
    
In [72]:
    
propernouns.results.aids
    
    Out[72]:
In [27]:
    
risk_of = r['risk_of']
    
In [28]:
    
rel_riskof = editor(risk_of.results, '%', risk_of.totals, skip_subcorpora = 1963)
plotter('Risk of (noun)', rel_riskof.results, style = 'fivethirtyeight', 
        figsize = (10, 6), y_label = 'Percentage of all results', save = 'riskof_')
    
    
    
In [16]:
    
social_act = r['health-social-actors-w-risk-in-sent']
    
In [17]:
    
social_act.query
    
    Out[17]:
In [18]:
    
everyday = editor(social_act.results, just_entries = ['man', 'woman', 'person', 'child', 'consumer', 'baby'])
plotter('Everyday participants in the health subcorpus', everyday.results, 
        style = 'bmh', black_and_white=True, figsize = (10, 6), save = 'everyday_health_final')
    
    
    
In [19]:
    
inst = ['insurer', 'hospital', 'commercial insurer', 'business', 'insurance company', 
        'health maintenance organization', 'blue cross', 'disease control', 'employer']
institutional = editor(social_act.results, '%', social_act.totals, just_entries = inst)
    
    
In [21]:
    
plotter('Decline of health institution risks in the NYT, 1987-2014', institutional.results, figsize = (10, 6),
        y_label = 'Percentage of all participants', black_and_white = False)
    
    
In [ ]:
    
#parts = load_result('health-part')
#peop = ['person', 'man', 'woman', 'child', 'baby', 'consumer']
#he_pa = editor(parts.results, just_entries = peop)
#plotter('Everyday participants in the health subcorpus', he_pa.results, 
    #y_label = 'Percentage of all participants',
       #figsize = (10, 6), black_and_white = True, style = 'bmh')
    
21:55:04: adj_modifiers.p loaded as adj_modifiers. 21:55:04: adj_riskwords.p loaded as adj_riskwords. 21:55:06: all_govs.p loaded as all_govs. 21:55:06: allwords.p loaded as allwords. 21:55:06: at_risk_things.p loaded as at_risk_things. 21:55:06: baseline.p loaded as baseline. 21:55:06: edited_adj_riskwords.p loaded as edited_adj_riskwords. 21:55:06: health-processes.p loaded as health-processes. 21:55:11: health-social-actors-w-risk-in-sent.p loaded as health-social-actors-w-risk-in-sent. 21:55:13: health_kwds.p loaded as health_kwds. 21:55:37: health_kwds_compared_to_nyt.p loaded as health_kwds_compared_to_nyt. 21:55:39: health_ngrams.p loaded as health_ngrams. 21:55:41: kwds.p loaded as kwds. 21:55:41: modals.p loaded as modals. 21:55:41: modals_lemmatised.p loaded as modals_lemmatised. 21:55:41: modifiers.p loaded as modifiers. 21:55:41: n_atrisk_n.p loaded as n_atrisk_n. 21:55:52: ngms.p loaded as ngms. 21:56:12: noun_lemmata.p loaded as noun_lemmata. 21:56:12: predicators.p loaded as predicators. 21:56:12: proc_w_risk_part.p loaded as proc_w_risk_part. 21:56:12: processes.p failed to load. Try using load_result to find out the matter. 21:56:24: propernouns.p loaded as propernouns. 21:56:24: relationals.p loaded as relationals. 21:56:24: risk_as_part_process_mod.p loaded as risk_as_part_process_mod. 21:56:24: risk_objects.p loaded as risk_objects. 21:56:24: risk_of.p loaded as risk_of. 21:56:25: risk_verbing.p loaded as risk_verbing. 21:56:25: riskpos.p loaded as riskpos. 21:56:25: riskwords.p loaded as riskwords. 21:56:26: sayers.p loaded as sayers. 21:56:26: social-actors-w-risk-in-sent.p loaded as social-actors-w-risk-in-sent. 21:56:26: subj_of_risk_process.p loaded as subj_of_risk_process. 21:56:26: terror_heart.p loaded as terror_heart. 21:56:27: uniques.p loaded as uniques. 21:56:27: x_subj_of_risk_process.p loaded as x_subj_of_risk_process.
In [135]:
    
health_ngrams = r['health_ngrams']
inc = editor(health_ngrams.results, sort_by = 'increase')
plotter('n-grams, increasing', inc.results, num_to_plot=10, figsize = (10, 6), y_label = 'Absolute frequency',
        style = 'bmh', black_and_white = True, save = 'ngram_health_inc_final')
    
    
    
In [144]:
    
dec = editor(health_ngrams.results, sort_by = 'decrease')
dec = editor(dec.results, skip_entries = [8, 9, 10])
    
    
In [128]:
    
plotter('n-grams, decreasing', dec.results, num_to_plot=10, figsize = (10, 6),
        style = 'bmh', black_and_white = True, legend_pos = 'upper right', save = 'ngram_health_dec_final')
    
    
    
In [25]:
    
inc_part = editor(social_act.results, '%', social_act.totals, sort_by = 'increase')
    
    
In [129]:
    
plotter('Participants in the health subcorpus, increasing', inc_part.results, num_to_plot=10, figsize = (10, 6),
        style = 'bmh', black_and_white = True, save = 'part_health_inc_final')
    
    
    
In [22]:
    
cats = [('Infectious disease', ['aids', 'aid', 'aids virus', 'aids patient', 'transmission', 'flu', 'influenza']),
('Life world', ['person', 'man', 'woman', 'child', 'baby', 'consumer']),
('Intitutions', ['empire', 'hospital', 'commercial', 'business', 'insurance company', 'HMO/health maintenance organisation', 'blue cross', 'disease control', 'employer', 'insurer', 'health insurance association', 'insurance industry', 'office']),
('Non-infectious disease', ['breast cancer', 'cancer', 'heart disease', 'diabetes', 'heart attack', 'prostate cancer', 'stroke', 'ovarian cancer', 'obesity']),
('Science and research',  ['study', 'researcher', 'finding', 'new study', 'author', 'university', 'expert'])]
    
In [23]:
    
themes = r['health-social-actors-w-risk-in-sent']
themes_all = r['health-social-actors-w-risk-in-sent']
for name, cat in cats:
    themes = editor(themes.results, merge_entries = cat, newname = name)
themes = editor(themes.results, '%', themes_all.totals, just_entries = [n for n, t in cats], sort_by = 'total')
    
    
In [26]:
    
plotter('Major themes in articles discussing health risks in the NYT', themes.results, 
        num_to_plot=10, figsize = (10, 6), style = 'fivethirtyeight', save = 'themes-health')
    
    
    
In [119]:
    
#r = load_all_results()
deps = load_result('collapsed_deps')
copula = editor(deps.results, merge_entries = r'^cop:', newname = 'Cop').results['Cop']
    
    
Now, we can do a first pass over the data. Problem is, root includes Attribute/Value.
In [21]:
    
all_riskwords = load_result('coll_govs_with_pos')
#allwords = r['allwords']
govs = load_result('coll_govs_with_pos')
from collections import OrderedDict
exp_roles = OrderedDict({'Process': r'(?i)^(root:root|dobj:.*?:(run|take|pose)|prep_at:v[a-z]*:put)$',
            'Participant': r'(?i)^((xsubj|nsubj|nsubjpass|acomp|agent|appos|dobj|iobj):.*|prep_[a-z]*:v.*?:.*)$',
            'Modifier': r'(?i)^((advmod|vmod|amod|nn):.*$|(prep_[a-z]*:n))'})
#exp_roles = OrderedDict(sorted(exp_roles.items(), key=lambda t: t[0]), reverse)
for name, regex in exp_roles.items():
    govs = editor(govs.results, merge_entries = regex, newname = name)
    
    
In [151]:
    
print govs.results['Process']
print govs.results['Participant']
    
    
Now we need to subtract copula results, and add to participant:
In [152]:
    
govs.results['Process'] = govs.results['Process'] - copula
govs.results['Participant'] = govs.results['Participant'] + copula
    
In [153]:
    
print govs.results['Process']
print govs.results['Participant']
    
    
And now get relative frequencies:
In [154]:
    
#allwd = interrogator('data/nyt/years', 'c', 'any')
    
In [22]:
    
all_riskwords = load_result('coll_govs_with_pos')
    
In [141]:
    
rel_govs = editor(govs.results, '%', all_riskwords.totals, 
              skip_subcorpora = 1963, sort_by = 'total', just_entries = exp_roles.keys())
    
    
In [199]:
    
p_check = editor(govs.results, sort_by = 'increase', just_entries = exp_roles.keys(), 
                 keep_stats = True, skip_subcorpora = 1963)
    
    
In [201]:
    
p_check.results.ix['p']
    
    Out[201]:
In [194]:
    
newnames = []
for col in list(p_check.results.columns):
    pval = p_check.results[col]['p']
    newname = '%s (p=%s)' % (col, format(pval, '.5f'))
    newnames.append(newname)
p_check.results.columns = newnames
statfields = ['slope', 'intercept', 'r', 'p', 'stderr']
p_check.results.drop(statfields, axis = 0, inplace = True)
p_check.results
    
    Out[194]:
In [197]:
    
rel_govs = editor(p_check.results, '%', all_riskwords.totals, 
              skip_subcorpora = 1963, sort_by = 'total')
plotter('Experiential role of risk words', rel_govs.results, 
        y_label = 'Percentage of risk words in any experiential role',
    style = 'bmh', figsize = (10, 6), subplots = True, save = 'subplot-three-roles-final')
    
    
    
In [69]:
    
parts = load_result('coll_govs_with_pos')
parts = editor(parts.results, skip_entries = exp_roles['Process'])
parts = editor(parts.results, just_entries = exp_roles['Participant'])
# must remove processes
part_roles = {'Experiential subject': r'^(nsubj|agent|xsubj):',
              'Experiential object': r'^(nsubjpass|dobj|iobj|acomp):'}
for name, regex in part_roles.items():
    parts = editor(parts.results, merge_entries = regex, newname = name)
    
    
Add copula
In [70]:
    
parts.results['Experiential object'] = parts.results['Experiential object'] + copula
    
Finish up
In [71]:
    
rel_parts = editor(parts.results, '%', all_riskwords.totals, just_entries = part_roles.keys(), 
                   sort_by = 'total', skip_subcorpora = 1963)
plotter('Risk by participant role', rel_parts.results, y_label = 'Percentage of risk words in any experiential role',
        style = 'bmh', figsize = (10, 6), black_and_white = True)
    
    
    
In [11]:
    
procs = load_result('coll_govs_with_pos')
procs = editor(procs.results, just_entries = exp_roles['Process'], skip_subcorpora = 1963)
proc_roles = {'to risk': r'^root:root$',
              'to take risk': r'^dobj:v.*?:take$',
              'to run risk': r'^dobj:v.*?:run$',
              'to pose risk': r'^dobj:v.*?:pose$',
              'to put at risk': r'^prep_at:v.*?:put$'}
for name, regex in proc_roles.items():
    procs = editor(procs.results, merge_entries = regex, newname = name)
    
    
remove copula
In [14]:
    
procs.results['to risk'] = procs.results['to risk'] - copula
    
In [31]:
    
rel_procs = editor(procs.results, '%', all_riskwords.totals, just_entries = proc_roles.keys(), sort_by = 'total')
plotter('Risk by processes', rel_procs.results, y_label = 'Percentage of risk words in any experiential role',
        style = 'bmh', figsize = (10, 6), black_and_white = True)
    
    
    
In [27]:
    
mods = load_result('coll_govs_with_pos')
mods = editor(mods.results, skip_entries = exp_roles['Process'])
mods = editor(mods.results, just_entries = exp_roles['Modifier'])
mod_roles = {'Nominal pre-head': r'^nn:',
              'Nominal post-head': r'^(prep_[a-z]*:n|vmod:)',
              #'Head of PP complement': r'^prep_[a-z]*:v)',
              'Adverbial modifier': r'^advmod:',
              'Adjectival modifier': r'^amod:'}
for name, regex in mod_roles.items():
    mods = editor(mods.results, merge_entries = regex, newname = name)
    
    
In [ ]:
    
    
In [29]:
    
rel_mods = editor(mods.results, '%', all_riskwords.totals, just_entries = mod_roles.keys(), 
                  sort_by = 'total', skip_subcorpora = 1963)
plotter('Risk by modifier type', rel_mods.results, y_label = 'Percentage of risk words in any experiential role',
        style = 'bmh', figsize = (10, 6), black_and_white = True)
plotter('Risk by modifier type', rel_mods.results, y_label = 'Percentage of risk words in any experiential role', 
        figsize = (10, 6), save = 'risk_by_mod_type_colour')
    
    
    Out[29]:
    
    
In [13]:
    
deps = load_result('collapsed_deps')
quickview(deps, 10)
copula = editor(deps.results, merge_entries = r'^cop:', newname = 'Cop').results['Cop']
    
    
In [7]:
    
govs = load_result('coll_govs_with_pos')
all_govs = load_result('coll_govs_with_pos')
material = load_result('coll_govs_with_pos')
    
In [10]:
    
from dictionaries.process_types import processes
exp_roles = {'Process': r'(?i)^(root:root|dobj:.*?:(run|take|pose)|prep_at:v[a-z]*:put|rcmod:.*|xcomp:.*)$',
             'Participant': r'(?i)^(xsubj|nsubj|nsubjpass|acomp|agent|appos|cop|dobj|iobj):.*$',
             'Modifier': r'(?i)^((advmod|vmod|amod|nn):.*$|(prep_[a-z]*:n|prep_[a-z]*:v))'}
exp_part_roles = {
    'Senser': r'(nsubj|xsubj|agent):v.*?:%s' % processes.mental,
    'Phenomenon': r'(iobj|dobj|nsubjpass|acomp):v.*?:%s' % processes.mental,
    'Token': r'(nsubj|xsubj|agent):v.*?:%s' % processes.relational,
    'Value': r'(iobj|dobj|nsubjpass|acomp):v.*?:%s' % processes.relational,
    'Sayer': r'(nsubj|xsubj|agent):v.*?:%s' % processes.verbal,
    'Verbiage': r'(iobj|dobj|nsubjpass|acomp):v.*?:%s' % processes.verbal}
govs = editor(govs.results, skip_entries = exp_roles['Process'])
material = editor(material.results, skip_entries = exp_roles['Process'])
# merge each participant role, and remove these entries from the material result
serieses = []
for name, regex in exp_part_roles.items():
    tmp = editor(govs.results, merge_entries = regex, newname = name)
    serieses.append(tmp.results[name])
    material = editor(material.results, skip_entries = regex)
# the leftover becomes material
material = editor(material.results, merge_entries = r'(nsubj|xsubj|agent):v.*?:', newname = 'Actor')
serieses.append(material.results['Actor'])
material = editor(material.results, merge_entries = r'(iobj|dobj|nsubjpass|acomp):v.*?:', newname = 'Goal, range')
serieses.append(material.results['Goal, range'])
# add copula
    
    
In [89]:
    
df
    
    Out[89]:
In [143]:
    
df = pd.concat(serieses, axis = 1)
tots = r['all_govs']
df = editor(df, '%', tots.totals, sort_by = 'total')
df = editor(df.results, skip_subcorpora = 1963)
plotter('Participant types for risk words', df.results, figsize = (16, 7))
    
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
govs = load_result('coll_govs_with_pos')
all_govs = load_result('coll_govs_with_pos')
material = load_result('coll_govs_with_pos')
exp_roles = {'Process': r'(?i)^(root:root|dobj:.*?:(run|take|pose)|prep_at:v[a-z]*:put|rcmod:.*|xcomp:.*)$',
             'Participant': r'(?i)^(xsubj|nsubj|nsubjpass|acomp|agent|appos|cop|dobj|iobj):.*$',
             'Modifier': r'(?i)^((advmod|vmod|amod|nn):.*$|(prep_[a-z]*:n|prep_[a-z]*:v))'}
exp_kinds = {'Experiential subect': r'(nsubj|xsubj|agent):v', 
             'Experiential object': r'(iobj|dobj|nsubjpass|acomp):v'}
exp_part_roles = {
    'Senser': r'(nsubj|xsubj|agent):v.*?:%s' % processes.mental,
    'Phenomenon': r'(iobj|dobj|nsubjpass|acomp):v.*?:%s' % processes.mental,
    'Token': r'(nsubj|xsubj|agent):v.*?:%s' % processes.relational,
    'Value': r'(iobj|dobj|nsubjpass|acomp):v.*?:%s' % processes.relational,
    'Sayer': r'(nsubj|xsubj|agent):v.*?:%s' % processes.verbal,
    'Verbiage': r'(iobj|dobj|nsubjpass|acomp):v.*?:%s' % processes.verbal}
govs = editor(govs.results, skip_entries = exp_roles['Process'])
material = editor(material.results, skip_entries = exp_roles['Process'])
# merge each participant role, and remove these entries from the material result
serieses = []
for name, regex in exp_part_roles.items():
    tmp = editor(govs.results, merge_entries = regex, newname = name)
    serieses.append(tmp.results[name])
    material = editor(material.results, skip_entries = regex)
# the leftover becomes material
material = editor(material.results, merge_entries = r'(nsubj|xsubj|agent):v.*?:', newname = 'Actor')
serieses.append(material.results['Actor'])
material = editor(material.results, merge_entries = r'(iobj|dobj|nsubjpass|acomp):v.*?:', newname = 'Goal, range')
serieses.append(material.results['Goal, range'])