Figures from book chapter

Two from earlier investigation


In [1]:
import os
import pandas as pd
from corpkit import *
%matplotlib inline
#r = load_all_results()

In [2]:
first_fig = pd.read_csv('1900-2000-comparison.csv', index_col = 0)
first_fig


Out[2]:
1900 2000
economics 17.7 22.0
politics 21.1 20.8
health 1.0 13.4
medicine 0.0 3.2
sport 11.5 7.8
arts 3.8 5.9
legal 1.9 4.4
security 0.5 3.2
education 0.0 2.4
war 11.5 2.7
industrial relations/work 1.4 3.7
crime 2.4 2.2
media 1.0 2.0
transport/infrastructure 3.8 1.2
military 4.3 1.5
social order 3.3 0.2
technology 0.0 5.4
environment 0.0 1.7
disaster 1.9 0.5

In [3]:
#plotter('Domains of risk discourse', first_fig, kind = 'bar', num_to_plot = 'all', x_label = False,
        #black_and_white = True, figsize = (10,6), style = 'bmh', show_totals = 'plot', save = 'domains_june_final')
plotter('Domains of risk discourse', first_fig, kind = 'bar', num_to_plot = 'all', x_label = False, figsize = (10,6), style = 'bmh', show_totals = 'plot', save = 'domains_june_colour')


13:13:19: images/domains_june_colour.png created.

In [4]:
import os
import pandas as pd
second_fig = pd.read_csv('key-terms-comparison.csv', index_col = 0)
second_fig = second_fig[['danger', 'threat', 'risk', 'disaster', 'harm', 'uncertainty']]
#from corpkit import plotter
%matplotlib inline

In [5]:
#plotter('Risk and related words over time', second_fig, num_to_plot = 'all', 
        #black_and_white = True, figsize = (10, 6), style = 'bmh', save = 'risk_related_june_final')
plotter('Risk and related words over time', second_fig, num_to_plot = 'all', 
        figsize = (10, 6), style = 'bmh', save = 'risk_related_june_colour')


13:14:05: images/risk_related_june_colour.png created.

In [2]:
pos = editor(r['riskpos'].results, '%', r['allwords'].totals, skip_subcorpora = 1963)
plotter('Risk words by word class', pos.results , style = 'bmh', num_to_plot = 3,
        figsize = (10, 6), black_and_white = True, save = 'risk_by_class_final', 
        y_label = 'Percentage of all parsed words')


***Processing results***
========================

Skipping 1 subcorpora:
    1963

***Done!***
========================


20:46:08: images/risk_by_class_final.png created.
Out[2]:
<module 'matplotlib.pyplot' from '/Library/Python/2.7/site-packages/matplotlib/pyplot.pyc'>

In [3]:
open_words = ['Noun', 'Verb', 'Adjective', 'Adverb']
maths_done = editor(r['riskpos'].results, '%', r['baseline'].results, sort_by = 'total', just_entries = open_words, skip_subcorpora = [1963])


***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 4 entries:
    Noun
    Verb
    Adjective
    Adverb

***Done!***
========================


In [4]:
plotter('Percentage of open word classes that are risk words', maths_done.results, black_and_white = True,
        y_label = 'Percentage', figsize = (10, 6), style = 'bmh', num_to_plot = 3, save = 'perc_open_final')
plotter('Percentage of open word classes that are risk words', maths_done.results,
        y_label = 'Percentage', figsize = (10, 6), num_to_plot = 3, save = 'perc_open_final_colour')
plotter('Percentage of open word classes that are risk words', maths_done.results, 
        y_label = 'Percentage', kind = 'area', legend_pos = 'lower right',
        stacked = True, style = 'bmh', black_and_white = True, figsize = (10, 6), save = 'perc_open_area_final')
plotter('Percentage of open word classes that are risk words', maths_done.results, 
        y_label = 'Percentage', kind = 'area', legend_pos = 'lower right',
        stacked = True, figsize = (10, 6), save = 'perc_open_area_final_colour')


20:46:35: images/perc_open_final.png created.

20:46:36: images/perc_open_final_colour.png created.

20:46:37: images/perc_open_area_final.png created.

20:46:38: images/perc_open_area_final_colour.png created.
Out[4]:
<module 'matplotlib.pyplot' from '/Library/Python/2.7/site-packages/matplotlib/pyplot.pyc'>

In [5]:
ppm = editor(r['risk_as_part_process_mod'].results, '%', r['risk_as_part_process_mod'].totals)
plotter('Risk as participant, process and modifier', ppm.results.drop('1963'), 
        style = 'bmh', black_and_white = True, figsize = (10, 6), save = 'ppm_final',
        y_label = 'Percentage of risk words in any experiential role')
plotter('Risk as participant, process and modifier', ppm.results.drop('1963'), 
        figsize = (10, 6), save = 'ppm_final_colour',
        y_label = 'Percentage of risk words in any experiential role')


***Processing results***
========================

***Done!***
========================


20:47:30: images/ppm_final.png created.

20:47:31: images/ppm_final_colour.png created.
Out[5]:
<module 'matplotlib.pyplot' from '/Library/Python/2.7/site-packages/matplotlib/pyplot.pyc'>

In [143]:
govs = r['all_govs']
ppm = r['risk_as_part_process_mod']
new_govs = editor(govs.results, merge_entries = r'^(root:root|dobj:(take|run|pose))$', newname = 'Process')
new_govs = editor(new_govs.results, merge_entries = r'^(dobj|nsubj|nsubjpass|csubj|acomp|iobj|csubjpass):', newname = 'Participant')
new_govs = editor(new_govs.results, merge_entries = r'^(pobj|nn|amod|rcmod|vmod|tmod|npadvmod|advmod):', newname = 'Modifier')
new_govs = editor(new_govs.results, '%', govs.totals, sort_by = 'total', just_entries = ['Participant', 'Process', 'Modifier'])
plotter('Risk words by experiential role', new_govs.results.drop('1963'), 
        style = 'bmh', figsize = (10, 6), black_and_white = True, y_label = 'Percentage of risk words in any experiential role', save = 'ppmfinal')


***Processing results***
========================

Merging 4 entries as "Process":
    dobj:take
    root:root
    dobj:pose
    dobj:run

***Done!***
========================


***Processing results***
========================

Merging 4060 entries as "Participant":
    nsubj:be
    dobj:reduce
    dobj:increase
    dobj:have
    dobj:carry
    dobj:face
    dobj:raise
    dobj:minimize
    dobj:assess
    dobj:create
... and 4050 more ... 

***Done!***
========================


***Processing results***
========================

Merging 5633 entries as "Modifier":
    pobj:at
    pobj:of
    pobj:about
    pobj:with
    pobj:to
    nn:factor
    pobj:for
    pobj:on
    pobj:in
    pobj:as
... and 5623 more ... 

***Done!***
========================


***Processing results***
========================

Keeping 3 entries:
    Participant
    Process
    Modifier

***Done!***
========================


12:04:47: images/ppmfinal.png created.

using collapsed dependencies


In [ ]:
#coll_govs_with_pos = interrogator(corpus, 'g', r'(?i)\brisk', lemmatise = True, dep_type = 'collapsed', 
    # add_pos_to_g_d_option=True, quicksave = 'coll_govs_with_pos')
r = load_all_results()
    govs = r['collapsed_govs_with_pos']
exp_roles = [('Process', r'(?i)^(root:root|dobj:.*?:(run|take|pose)|prep_at:v[a-z]*:put|rcmod:.*|xcomp:.*)$'),
            ('Participant', r'(?i)^(xsubj|nsubj|nsubjpass|acomp|agent|appos|cop|dobj|iobj):.*$'),
            ('Modifier', r'(?i)^((advmod|vmod|amod|nn):.*$|(prep_[a-z]*:n|prep_[a-z]*:v))')]
for name, regex in exp_roles:
    govs = editor(govs.results, merge_entries = regex, newname = name)
govs = editor(govs.results, '%', govs.totals, skip_subcorpora = 1963)
plotter('Experiential role', govs.results, y_label = 'Percentage of risk words in any experiential role',
    style = 'bmh', figsize = (10, 6), black_and_white = True)

In [ ]:
# currently problematic
plotter('Risk as experiential subject\slash object', govs.results, style = 'bmh', 
        black_and_white = True, figsize = (10, 6))

Adjectival modifiers of risk


In [16]:
adjmods = editor(r['adj_modifiers'].results, '%', r['adj_modifiers'].totals, 
                 just_entries = ['high', 'calculated', 'great', 'potential'], skip_subcorpora = 1963)
plotter('Selected adjectives modifying nominal risk', adjmods.results, style = 'bmh', 
        black_and_white = True, figsize = (10, 6), save = 'sel_adjs_final', y_label = 'Percentage of all adjectival risk words')


***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 4 entries:
    high
    calculated
    great
    potential

***Done!***
========================


21:24:49: images/sel_adjs_final.png created.
Out[16]:
<module 'matplotlib.pyplot' from '/Library/Python/2.7/site-packages/matplotlib/pyplot.pyc'>

risk processes


In [62]:
def load_result(savename, loaddir = 'data/saved_interrogations'):
    """Reloads a save_result as namedtuple"""
    import collections
    import pickle
    import os
    import pandas
    if not savename.endswith('.p'):
        savename = savename + '.p'
    unpickled = pickle.load(open(os.path.join(loaddir, savename), 'rb'))
    
    if type(unpickled) == pandas.core.frame.DataFrame or type(unpickled) == pandas.core.series.Series:
        output = unpickled
    elif len(unpickled) == 4:
        outputnames = collections.namedtuple('interrogation', ['query', 'results', 'totals', 'table'])
        output = outputnames(unpickled[0], unpickled[1], unpickled[2], unpickled[3])        
    elif len(unpickled) == 3:
        outputnames = collections.namedtuple('interrogation', ['query', 'results', 'totals'])
        output = outputnames(unpickled[0], unpickled[1], unpickled[2])
    elif len(unpickled) == 2:
        outputnames = collections.namedtuple('interrogation', ['query', 'totals'])
        output = outputnames(unpickled[0], unpickled[1])
    return output

processes = load_result('processes')
proc_rel = editor(processes.results, '%', processes.totals, skip_subcorpora = 1963)


***Processing results***
========================

Skipping 1 subcorpora:
    1963

***Done!***
========================


In [116]:
plotter('Risk processes', proc_rel.results, style = 'bmh', legend_pos = 'center left', save = 'risk_proc_final',
        black_and_white=True, figsize = (10, 6), y_label = 'Percentage of all risk processes')


23:26:55: images/risk_proc_final.png created.

In [18]:
modifiers = r['modifiers']
mods = editor(modifiers.results, '%', modifiers.totals, skip_subcorpora = [1963])
plotter('Types of risk modifiers', mods.results, style = 'bmh', black_and_white = True, 
        figsize = (10, 6), legend_pos = 'lower right', save = 'mod_types_final')


***Processing results***
========================

Skipping 1 subcorpora:
    1963

***Done!***
========================


21:25:09: images/mod_types_final.png created.
Out[18]:
<module 'matplotlib.pyplot' from '/Library/Python/2.7/site-packages/matplotlib/pyplot.pyc'>

In [203]:
subj_of_risk_process = r['subj_of_risk_process']
noun_lemmata = r['noun_lemmata']

In [204]:
rskrs = editor(subj_of_risk_process.results, '%', subj_of_risk_process.totals, 
               just_totals = True, skip_subcorpora = 1963, sort_by = 'total')


***Processing results***
========================

Skipping 1 subcorpora:
    1963

***Done!***
========================


In [205]:
plotter('Riskers, sorted by total frequency', rskrs.results, kind = 'bar', black_and_white = True, style = 'bmh', figsize = (10, 6),
       num_to_plot = 12, y_label = 'Percentage of all riskers', show_totals = 'plot', save = 'riskers_total_final')


14:30:03: images/riskers_total_final.png created.

In [208]:
entities = ['politician', 'candidate', 'governor', 'lawmaker', 'person', 'man', 'woman', 'child', 'baby']
spec_riskers = editor(subj_of_risk_process.results, '%', noun_lemmata.results, 
                      just_entries = entities, skip_subcorpora = 1963, just_totals = True, sort_by = 'total')
plotter('Percentage of common participants that are in the role of risker', spec_riskers.results, 
        kind = 'bar', black_and_white = True, style = 'bmh', figsize = (10, 6), save = 'rel_risker_final',
       num_to_plot = 12, y_label = 'Percentage of occurrences in the role of risker', show_totals = 'plot')


***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 9 entries:
    politician
    candidate
    governor
    lawmaker
    person
    man
    woman
    child
    baby

Threshold: 19


***Done!***
========================


14:38:27: images/rel_risker_final.png created.

In [2]:
propernouns = r['propernouns']

In [11]:
terms = ['vioxx', 'merck', 'aids', 'clinton', 'obama', 'europe', 'bush']
sel_nnp = editor(propernouns.results, '%', propernouns.totals, just_entries = terms, skip_subcorpora = 1963, sort_by = 'total')


***Processing results***
========================

Skipping 1 subcorpora:
    1963

Keeping 7 entries:
    vioxx
    merck
    aids
    clinton
    obama
    europe
    bush

***Done!***
========================


In [14]:
plotter('Health crises: comparing social actors that co-occur with risk in \emph{The New York Times}', sel_nnp.results, legend_pos = 'upper right',
       black_and_white = False, figsize = (10, 6), save = 'healthcomp', y_label = 'Percentage of all proper nouns')
plotter('Comparing social actors that co-occur with risk', sel_nnp.results, legend_pos = 'upper left',
       black_and_white = True, style = 'bmh', figsize = (10, 6))


14:43:28: images/healthcomp.png created.

In [72]:
propernouns.results.aids


Out[72]:
1963     0
1987    87
1988    46
...
2012    12
2013     8
2014     2
Name: aids, Length: 29, dtype: int64

In [27]:
risk_of = r['risk_of']

In [28]:
rel_riskof = editor(risk_of.results, '%', risk_of.totals, skip_subcorpora = 1963)
plotter('Risk of (noun)', rel_riskof.results, style = 'fivethirtyeight', 
        figsize = (10, 6), y_label = 'Percentage of all results', save = 'riskof_')


***Processing results***
========================

Skipping 1 subcorpora:
    1963

***Done!***
========================


15:25:20: images/riskof_.png created.

In [16]:
social_act = r['health-social-actors-w-risk-in-sent']

In [17]:
social_act.query


Out[17]:
{'datatype': dtype('int64'),
 'dep_type': 'basic-dependencies',
 'dictionary': 'bnc.p',
 'function': 'interrogator',
 'function_filter': False,
 'lemmatag': False,
 'lemmatise': True,
 'option': 'words',
 'path': 'data/nyt/topics/health',
 'phrases': True,
 'plaintext': False,
 'query': 'NP <# (/NN.?/ !< /(?i).?\\brisk.?\\b/) >> (ROOT << /(?i).?\\brisk.?\\b/)',
 'quicksave': 'health-social-actors-w-risk-in-sent.p',
 'spelling': False,
 'table_size': 50,
 'time_ended': '2015-06=13 23:25:29',
 'time_started': '2015-06-13 23:22:05',
 'titlefilter': True,
 'translated_option': 't'}

In [18]:
everyday = editor(social_act.results, just_entries = ['man', 'woman', 'person', 'child', 'consumer', 'baby'])
plotter('Everyday participants in the health subcorpus', everyday.results, 
        style = 'bmh', black_and_white=True, figsize = (10, 6), save = 'everyday_health_final')


***Processing results***
========================

Keeping 6 entries:
    man
    woman
    person
    child
    consumer
    baby

***Done!***
========================


14:53:58: images/everyday_health_final.png created.