In [5]:
import json
import urllib2
from IPython.display import Image
In [6]:
class TopicSummarizer(object):
"""
Our stepwise processor that uses Wikipedia to summarize topics.
Just instantiate with the topic name, call .process(), and then .get_results()
"""
def __init__(self, topic):
self.topic = unicode(topic)
def process(self):
self._fetch_text()
self._fetch_thumbnail()
return self
def get_results(self, as_text=False):
if as_text:
return self.topic + ' summary: ' + self._text
return TopicSummary(self.topic, self._thumb_url, self._text)
def _fetch_text(self):
self._text_api_url = TEXT_URL_TMPL.format(title=self.topic)
self._text_resp = self._get_url_json(self._text_api_url)
self._text = self._text_resp['query']['pages'].values()[0]['extract']
def _fetch_thumbnail(self):
self._thumb_api_url = THUMB_URL_TMPL.format(title=self.topic)
self._thumb_resp = self._get_url_json(self._thumb_api_url)
self._thumb_url = self._thumb_resp['query']['pages'].values()[0]['thumbnail']['source']
def _get_url_json(self, url):
resp = urllib2.urlopen(url)
resp_body = resp.read()
return json.loads(resp_body)
class TopicSummary(object):
def __init__(self, topic, thumb_url, text):
self.topic = topic
self.thumb_url = thumb_url
self.text = text
def __repr__(self):
cn = self.__class__.__name__
return '%s(%r, %r, %r)' % (cn, self.topic, self.thumb_url, self.text)
TEXT_URL_TMPL = 'https://en.wikipedia.org/w/api.php?action=query&prop=extracts&exsentences=2&titles={title}&explaintext=1&exintro=1&format=json'
THUMB_URL_TMPL = 'https://en.wikipedia.org/w/api.php?action=query&prop=pageimages&titles={title}&format=json'
In [7]:
summarizer = TopicSummarizer('Coffee')
summarizer.process()
Out[7]:
In [8]:
coffee_summary = summarizer.get_results()
In [9]:
print(coffee_summary.text)
Image(url=coffee_summary.thumb_url)
Out[9]: