data-collection



In [1]:
import sys
import json

try:
  from urllib.parse import urlencode
  from urllib.request import Request, urlopen
except ImportError:
  from urllib import urlencode
  from urllib2 import Request, urlopen

APP_ID  = 'YOUR_APP_ID'
APP_KEY = 'YOUR_APP_KEY'

def call(endpoint, params):
  protocol_host_path = 'https://api.newsapi.aylien.com/api/v1'
  headers = {
      'X-AYLIEN-NewsAPI-Application-ID': APP_ID,
      'X-AYLIEN-NewsAPI-Application-Key': APP_KEY}
  query_string = urlencode([(k, v) for k, vs in params.items() for v in isinstance(vs, list) and vs or [vs]])
  url = protocol_host_path + "/" + endpoint + "?" + query_string
  request = Request(url)
  for k, v in headers.items():
    request.add_header(k, v)
  response = urlopen(request)
  return json.loads(response.read().decode('utf-8'))

def getStoriesForAuthor(author, count=50, pages=1, prevResults=[], cursor="*"):
    if pages >= 1:
        pages = pages - 1
        parameters = {
            'author.name': author,
            'per_page': count,
            'return[]': "title",
            'language': "en",
            'cursor': cursor
        }
        results = call('stories', parameters)
        return getStoriesForAuthor(author, count, pages, prevResults + results["stories"], results["next_page_cursor"])
    else:
        return prevResults

In [2]:
author1 = getStoriesForAuthor("Akin Oyedele", count=100, pages=7)

In [3]:
author2 = getStoriesForAuthor("Carly Ledbetter", count=100, pages=7)

In [4]:
import cPickle as pickle
pickle.dump( author1, open( "author1.p", "wb" ) )
pickle.dump( author2, open( "author2.p", "wb" ) )