In [122]:
from __future__ import print_function
import requests
from textblob import TextBlob
from bs4 import BeautifulSoup
from collections import namedtuple
In [144]:
sentiment = namedtuple('Sentiment', ['feel', 'title'])
def get_page_sentiment(page_content):
soup = BeautifulSoup(page_content)
stuff = soup.find_all('tr', {'class': 'athing'})
title = ''.join([c for c in stuff[0].strings if c not in [' ', '\n', '\t']])
sentiments = []
for x in stuff[1:]:
comment = [c for c in x.strings if c not in [' ', '\n', '\t']]
author = comment[0]
time = comment[1]
content = ''.join(comment[2:-1])
reply = comment[-1]
blob = TextBlob(content)
for sentence in blob.sentences:
sentiments.append(sentence.sentiment.polarity)
return sentiment(sum(sentiments) / (len(stuff) - 1), title)
In [145]:
def print_sentiment(page_url):
page_content = requests.get(page_url).text
feel, title = get_page_sentiment(page_content)
print('{}: {}'.format(title, feel))
In [146]:
print_sentiment('https://news.ycombinator.com/item?id=9746405')
In [147]:
print_sentiment('https://news.ycombinator.com/item?id=9746866')
In [148]:
print_sentiment('https://news.ycombinator.com/item?id=9746846')
In [ ]: