In [115]:
import os, ConfigParser, mediacloud, datetime
import pandas as pd

In [2]:
# load mediacloud and topic_id from config file
config = ConfigParser.ConfigParser()
config.read('app.config')

key = config.get('mediacloud','key')
topic_id = config.get('mediacloud', 'topic_id')

In [42]:
# instantiate mediacloud api
mc = mediacloud.api.MediaCloud(key)
mca = mediacloud.api.AdminMediaCloud(key)

1. Background Info

We're looking at the US Presidential Election topic in Media Cloud. That's topic ID #1404. This is a set of stories published between Apr 30, 2015 to Nov 7, 2016, queried on the names of the major presidential candidates. The topic is queried from the following media source sets:

The seed query is:

+( fiorina ( scott and walker ) ( ben and carson ) trump ( cruz and -victor ) kasich rubio (jeb and bush) clinton sanders ) AND (+publish_date:[2016-09-30T00:00:00Z TO 2016-11-08T23:59:59Z]) AND ((tags_id_media:9139487 OR tags_id_media:9139458 OR tags_id_media:2453107 OR tags_id_stories:9139487 OR tags_id_stories:9139458 OR tags_id_stories:2453107))

I think this is the same dataset used for this CJR report, "Breitbart-led right-wing media ecosystem altered broader media agenda", but I'm not totally sure.

2. Network Structure

Run this section to request a gexf file representing the unweighted, directed network of media outlets in this dataset. Nodes represent different media outlets, edges represents inlinks and outlinks between outlets.


In [122]:
# this api call takes a minute or two, but you should only need to do this once.

network = mc.topicMediaMap(topic_id)

with open('network.gexf', 'wb') as f:
    f.write(network)

In [123]:
# if you've already generated network.gexf, run this cell to import it

with open('network.gexf', 'r') as f:
    network = f.read()

In [ ]:

3. Contagion Data

Now we want to see how a term/framing/quote propagates through our network. To do that, we need to search the stories in our topic (#1404) for mentions of a given term/framing/quote. Let's start with the term "alt-right".


In [61]:
# this is the query we're interested in. put the term(s) you want to search for here
query = '( "alt-right" OR "alt right" OR "alternative right" )'

In [119]:
# define function fetch stories from topic, based on query

def fetch_all_stories(query, topic_id):

    stories_id = []
    media_id = []
    media_name = []
    publish_date = []
    media_inlink_count = []
    outlink_count = []
    title = []
    url = []
    
    # do the first page of stories
    stories = mc.topicStoryList(topic_id, q=query)
    
    # append new data to lists
    stories_id.extend(         [s['stories_id'] for s in stories['stories']])
    media_id.extend(           [s['media_id'] for s in stories['stories']])
    media_name.extend(         [s['media_name'] for s in stories['stories']])
    publish_date.extend(       [s['publish_date'] for s in stories['stories']])
    media_inlink_count.extend( [s['media_inlink_count'] for s in stories['stories']])
    outlink_count.extend(      [s['outlink_count'] for s in stories['stories']])
    title.extend(              [s['title'] for s in stories['stories']])
    url.extend(                [s['url'] for s in stories['stories']])
    
    nextpage_id = stories['link_ids']['next']
    
    # page through all the remaining stories in the topic
    while True:
        stories = mc.topicStoryList(topic_id, q=query, link_id = nextpage_id)
                                    
        # append story data
        stories_id.extend(         [s['stories_id'] for s in stories['stories']])
        media_id.extend(           [s['media_id'] for s in stories['stories']])
        media_name.extend(         [s['media_name'] for s in stories['stories']])
        publish_date.extend(       [s['publish_date'] for s in stories['stories']])
        media_inlink_count.extend( [s['media_inlink_count'] for s in stories['stories']])
        outlink_count.extend(      [s['outlink_count'] for s in stories['stories']])
        title.extend(              [s['title'] for s in stories['stories']])
        url.extend(                [s['url'] for s in stories['stories']])
        
        if (len(stories['stories']) < 1) or ('next' not in stories['link_ids']):
            break
        
        nextpage_id = stories['link_ids']['next']
        
    stories = pd.DataFrame({
                            'stories_id' : stories_id,
                            'media_id' : media_id,
                            'media_name' : media_name,
                            'publish_date' : publish_date,
                            'media_inlink_count' : media_inlink_count,
                            'outlink_count' : outlink_count,
                            'title' : title,
                            'url' : url
                            })
        
    return stories

In [120]:
stories = fetch_all_stories(query, topic_id)

In [124]:
# write to csv
stories.to_csv('stories_mentioning_altright.csv', encoding='utf-8')

We can get the same data for some other terms...


In [125]:
query = '( "nasty woman" OR "nasty women" OR "nastywomen" OR "nastywoman" )'
stories_nastywomen = fetch_all_stories(query, topic_id)
stories_nastywomen.to_csv('stories_mentioning_nastywomen.csv', encoding='utf-8')

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: