Ta-Stephan: Didn't grab the number of comments for each Url.

Grade = 9/10


In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
# Grab the Reddit Homepage
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
response = requests.get("http://www.reddit.com", headers=headers)

In [3]:
#Feed it into BeautifulSoup
reddit = BeautifulSoup(response.text, 'html.parser')

In [4]:
#reddit

In [5]:
one_sibling_up = reddit.find_all('div', {'class': 'clearleft'})

In [6]:
#because only every other clearleft has a post in it:
posts = [tag.find_next_sibling('div') for tag in one_sibling_up if tag.find_next_sibling('div')]

In [7]:
#Function to change the votes into int
def vote_count_int(x):
    if x == '•':
        return 0
    else:
        return int(x)

In [8]:
all_stories = []
for article in posts:
    #subreddit
    if article.find("a", {'class': 'subreddit hover may-blank' }) is None:
        continue
    else:
        article_subreddit = article.find("a", {'class': 'subreddit hover may-blank' })
    #title
    if article.find("a", {'class': 'title may-blank ' }) is None:
        continue
    else:
        article_title = article.find("a", {'class': 'title may-blank ' }) 
    #time
    if article.find("time", {'class' : 'live-timestamp'}) is None:  
        continue
    else:
        time = article.find("time", {'class' : 'live-timestamp'}).get('datetime')
    #URL
    if article.find("a", {'class': 'title may-blank ' }) is None:
        continue
    else: 
        article_URL = article.find("a", {'class': 'title may-blank ' }).get('href')
    #Thumbnails
    #if article.find('img') is None:
    #    posts.remove(article)
    #else:
    #    image_url = article.find('img')
    #    if image_url:
    #        thumbnail = image_url.get('src')
    #votes
    if article.find("div", {'class': 'score unvoted' }) is None:
        break
    else:
        article_score = article.find("div", {'class': 'score unvoted' })
    #Dictionary
    article_subreddit_dict = {'subreddit': article_subreddit.string, 'title': article_title.string, \
                              'time': time, 'URL': article_URL, \
                              'votes': vote_count_int(article_score.string)} 
    #Dict List
    all_stories.append(article_subreddit_dict)

In [9]:
all_stories


Out[9]:
[{'URL': 'http://imgur.com/a/bwTqG',
  'subreddit': '/r/aww',
  'time': '2016-06-30T19:46:44+00:00',
  'title': 'How to stay cool this Summer',
  'votes': 0},
 {'URL': 'http://imgur.com/VCe1OQA',
  'subreddit': '/r/aww',
  'time': '2016-06-30T19:47:38+00:00',
  'title': 'My new 4 MO foster pupper. Name suggestions and breed guesses welcome! (crossposted on /r/idmydog) https://imgur.com/gallery/qdhZh',
  'votes': 0},
 {'URL': 'https://soundcloud.com/ebdot/with-me-prod-b-james',
  'subreddit': '/r/Music',
  'time': '2016-06-30T19:48:07+00:00',
  'title': 'Eugene Bdot - With Me [Hip-Hop]',
  'votes': 0},
 {'URL': '/r/Showerthoughts/comments/4qnlxu/when_anything_is_moving_there_is_a_moment_when/',
  'subreddit': '/r/Showerthoughts',
  'time': '2016-06-30T19:47:44+00:00',
  'title': "When anything is moving, there is a moment when that object will be at a precise location it can't be set to.",
  'votes': 0},
 {'URL': 'http://imgur.com/T0IwPUa',
  'subreddit': '/r/mildlyinteresting',
  'time': '2016-06-30T19:42:31+00:00',
  'title': 'These resealable bags came in a resealable bag',
  'votes': 0},
 {'URL': 'http://i.imgur.com/dzaJiay.jpg',
  'subreddit': '/r/aww',
  'time': '2016-06-30T19:45:33+00:00',
  'title': 'Meet Veruca - she\'s red/white and "bratty for daddy"',
  'votes': 0},
 {'URL': '/r/tifu/comments/4qnljz/tifu_by_breaking_into_my_friends_house/',
  'subreddit': '/r/tifu',
  'time': '2016-06-30T19:45:28+00:00',
  'title': "TIFU by breaking into my friend's house",
  'votes': 0},
 {'URL': 'https://i.reddituploads.com/7cc9db7b5a614a399843b8bf542abede?fit=max&h=1536&w=1536&s=d84c5b2920c0aeecfdcf911f6b556d53',
  'subreddit': '/r/mildlyinteresting',
  'time': '2016-06-30T19:42:13+00:00',
  'title': 'This sheet of notebook paper came upside down.',
  'votes': 0},
 {'URL': '/r/personalfinance/comments/4qnlge/i_have_us_3000_in_a_us_checking_account_but_i/',
  'subreddit': '/r/personalfinance',
  'time': '2016-06-30T19:44:54+00:00',
  'title': "I have US$ 3000 in a U.S. checking account, but I don't live there anymore. How could I invest it?",
  'votes': 0},
 {'URL': '/r/Showerthoughts/comments/4qme47/microwave_ovens_need_a_midnight_snack_button_that/',
  'subreddit': '/r/Showerthoughts',
  'time': '2016-06-30T15:53:21+00:00',
  'title': "Microwave ovens need a 'Midnight Snack' button that disables the dings and beeps.",
  'votes': 4415},
 {'URL': 'http://imgur.com/7cWmfxU',
  'subreddit': '/r/mildlyinteresting',
  'time': '2016-06-30T15:25:24+00:00',
  'title': 'My can of spray sealant leaked, then sealed itself.',
  'votes': 4613},
 {'URL': 'http://i.imgur.com/i3XneMt.gifv',
  'subreddit': '/r/gifs',
  'time': '2016-06-30T13:37:31+00:00',
  'title': 'Do cows get excited?',
  'votes': 5681},
 {'URL': 'http://imgur.com/inia1oA',
  'subreddit': '/r/funny',
  'time': '2016-06-30T14:11:08+00:00',
  'title': 'Every few years Tom Hanks plays a slightly more serious Captain.',
  'votes': 4602},
 {'URL': 'http://www.theinertia.com/health/heres-a-company-growing-real-meat-from-stem-cells/',
  'subreddit': '/r/Futurology',
  'time': '2016-06-30T15:13:31+00:00',
  'title': 'The company “Memphis Meats” has successfully grown real, authentic tasting meat from animal stem cells.',
  'votes': 3696},
 {'URL': 'http://www.foxcarolina.com/story/32308903/deputies-police-investigating-nightclub-shooting',
  'subreddit': '/r/news',
  'time': '2016-06-30T14:45:26+00:00',
  'title': 'Night club shooting in SC after dispute; lawful concealed carrier draws firearm and stops shooter',
  'votes': 3923},
 {'URL': 'http://imgur.com/rk0kTW9',
  'subreddit': '/r/GetMotivated',
  'time': '2016-06-30T12:53:13+00:00',
  'title': "[image] Don't cling to past mistakes",
  'votes': 5348},
 {'URL': 'http://i.imgur.com/nrS7U5d.gifv',
  'subreddit': '/r/aww',
  'time': '2016-06-30T12:21:21+00:00',
  'title': 'Instant Regret trying to catch that tail',
  'votes': 6082},
 {'URL': 'https://m.youtube.com/watch?v=CO6qLC4cL8E',
  'subreddit': '/r/movies',
  'time': '2016-06-30T13:12:03+00:00',
  'title': 'Why Him - Official Red Band Trailer - Bryan Cranston, James Franco',
  'votes': 3907},
 {'URL': '/r/Jokes/comments/4qlu0w/dead_again/',
  'subreddit': '/r/Jokes',
  'time': '2016-06-30T14:06:14+00:00',
  'title': 'Dead again..',
  'votes': 3117},
 {'URL': 'http://www.theguardian.com/politics/live/2016/jun/30/brexit-live-theresa-may-and-boris-johnson-set-to-announce-leadership-bids?CMP=twt_gu',
  'subreddit': '/r/worldnews',
  'time': '2016-06-30T10:58:04+00:00',
  'title': 'Boris Johnson says he will not run for Tory party leadership',
  'votes': 5509},
 {'URL': 'http://imgur.com/J4V33lU',
  'subreddit': '/r/gaming',
  'time': '2016-06-30T10:35:23+00:00',
  'title': 'Good Guy Dead Space',
  'votes': 5291},
 {'URL': '/r/personalfinance/comments/4qlr51/my_employer_requires_me_to_stay_after_close_to/',
  'subreddit': '/r/personalfinance',
  'time': '2016-06-30T13:49:15+00:00',
  'title': "My employer requires me to stay after close to lock up, count drawers, etc. but says she isn't required to pay me for that time spent? Help?",
  'votes': 2728},
 {'URL': 'http://i.imgur.com/gLeEN4z.png',
  'subreddit': '/r/creepy',
  'time': '2016-06-30T13:29:52+00:00',
  'title': 'Terrors of the Deep by Bacius9',
  'votes': 2841},
 {'URL': 'http://www.nj.com/camden/index.ssf/2016/06/teacher_who_suggested_he_student_get_baked_to_lose.html#incart_river_home',
  'subreddit': '/r/nottheonion',
  'time': '2016-06-30T15:58:18+00:00',
  'title': "Teacher who wanted to 'get baked' with 'hot' students losing job",
  'votes': 1800},
 {'URL': 'http://i.imgur.com/2gV4Utu.jpg',
  'subreddit': '/r/Art',
  'time': '2016-06-30T16:11:22+00:00',
  'title': 'Pencil Color Eagle By Katy Lipscomb - 2015',
  'votes': 1670},
 {'URL': 'https://www.bookspring.org/2016/03/troubling-study-reading-aloud/',
  'subreddit': '/r/books',
  'time': '2016-06-30T12:00:31+00:00',
  'title': 'New report shows that fewer than half (46%) of parents read aloud to their children every day, and only 34% do so for at least 15 minutes',
  'votes': 3523},
 {'URL': 'http://i.imgur.com/kQiPDqt.jpg',
  'subreddit': '/r/photoshopbattles',
  'time': '2016-06-30T11:42:22+00:00',
  'title': 'PsBattle: Cyclist says hi to Boris Johnson',
  'votes': 3279},
 {'URL': '/r/askscience/comments/4ql6ak/askscience_ama_im_professor_brian_hare_a_pioneer/',
  'subreddit': '/r/askscience',
  'time': '2016-06-30T11:33:16+00:00',
  'title': "AskScience AMA: I’m Professor Brian Hare, a pioneer of canine cognition research, here to discuss the inner workings of a dog’s brain, including how they see the world and the cognitive skills that influence your dog's personality and behavior. AMA!",
  'votes': 3386},
 {'URL': 'http://espn.go.com/espnw/culture/feature/article/16593240/paulette-leaphart-walked-topless-biloxi-washington-talk-realities-cancer-survivors',
  'subreddit': '/r/TwoXChromosomes',
  'time': '2016-06-30T13:11:22+00:00',
  'title': 'A breast cancer survivor walked topless from Mississippi to DC. Screw pink ribbons. This is real breast cancer "awareness"',
  'votes': 2338},
 {'URL': 'https://en.wikipedia.org/wiki/Tilly_Smith',
  'subreddit': '/r/todayilearned',
  'time': '2016-06-30T09:48:40+00:00',
  'title': 'TIL a 10 year old British girl saved nearly a hundred foreign tourists at Maikhao Beach in Thailand by warning beachgoers minutes before the arrival of the 2004 Indian Ocean earthquake tsunami. She learned about tsunamis in school just two weeks prior to the event.',
  'votes': 4152},
 {'URL': '/r/OldSchoolCool/comments/4qlnjv/on_the_way_to_the_summer_jam_at_watkins_glen_ny/',
  'subreddit': '/r/OldSchoolCool',
  'time': '2016-06-30T13:28:02+00:00',
  'title': 'On the way to the Summer Jam at Watkins Glen NY in 1973.',
  'votes': 2044},
 {'URL': 'https://www.youtube.com/watch?v=m8ft15cTCIk',
  'subreddit': '/r/videos',
  'time': '2016-06-30T08:19:40+00:00',
  'title': 'How to get a baby to clean the house',
  'votes': 5046},
 {'URL': 'http://i.imgur.com/vep0rpK.png',
  'subreddit': '/r/space',
  'time': '2016-06-30T06:16:27+00:00',
  'title': "I looked up the Mars rover's odometer out of curiosity and was a bit surprised by the results. Here's an overlay of its Martian journey between August 2012 and June 2016, superimposed on Earth.",
  'votes': 5683}]

In [10]:
import pandas as pd

In [11]:
#convert to Pandas

In [12]:
#date string import

In [13]:
stories_df = pd.DataFrame(all_stories)
stories_df.head(2)


Out[13]:
URL subreddit time title votes
0 http://imgur.com/a/bwTqG /r/aww 2016-06-30T19:46:44+00:00 How to stay cool this Summer 0
1 http://imgur.com/VCe1OQA /r/aww 2016-06-30T19:47:38+00:00 My new 4 MO foster pupper. Name suggestions an... 0

In [14]:
import time

In [15]:
datestring = time.strftime("%m-%h-%d")
datestring


Out[15]:
'06-Jun-30'

In [16]:
#creating .csv file

In [17]:
filename = "reddit-frontpage-" + datestring + ".csv"
stories_df.to_csv(filename, index=False)

In [18]:
pd.read_csv("reddit-frontpage-06-Jun-30.csv")


Out[18]:
URL subreddit time title votes
0 http://imgur.com/a/bwTqG /r/aww 2016-06-30T19:46:44+00:00 How to stay cool this Summer 0
1 http://imgur.com/VCe1OQA /r/aww 2016-06-30T19:47:38+00:00 My new 4 MO foster pupper. Name suggestions an... 0
2 https://soundcloud.com/ebdot/with-me-prod-b-james /r/Music 2016-06-30T19:48:07+00:00 Eugene Bdot - With Me [Hip-Hop] 0
3 /r/Showerthoughts/comments/4qnlxu/when_anythin... /r/Showerthoughts 2016-06-30T19:47:44+00:00 When anything is moving, there is a moment whe... 0
4 http://imgur.com/T0IwPUa /r/mildlyinteresting 2016-06-30T19:42:31+00:00 These resealable bags came in a resealable bag 0
5 http://i.imgur.com/dzaJiay.jpg /r/aww 2016-06-30T19:45:33+00:00 Meet Veruca - she's red/white and "bratty for ... 0
6 /r/tifu/comments/4qnljz/tifu_by_breaking_into_... /r/tifu 2016-06-30T19:45:28+00:00 TIFU by breaking into my friend's house 0
7 https://i.reddituploads.com/7cc9db7b5a614a3998... /r/mildlyinteresting 2016-06-30T19:42:13+00:00 This sheet of notebook paper came upside down. 0
8 /r/personalfinance/comments/4qnlge/i_have_us_3... /r/personalfinance 2016-06-30T19:44:54+00:00 I have US$ 3000 in a U.S. checking account, bu... 0
9 /r/Showerthoughts/comments/4qme47/microwave_ov... /r/Showerthoughts 2016-06-30T15:53:21+00:00 Microwave ovens need a 'Midnight Snack' button... 4415
10 http://imgur.com/7cWmfxU /r/mildlyinteresting 2016-06-30T15:25:24+00:00 My can of spray sealant leaked, then sealed it... 4613
11 http://i.imgur.com/i3XneMt.gifv /r/gifs 2016-06-30T13:37:31+00:00 Do cows get excited? 5681
12 http://imgur.com/inia1oA /r/funny 2016-06-30T14:11:08+00:00 Every few years Tom Hanks plays a slightly mor... 4602
13 http://www.theinertia.com/health/heres-a-compa... /r/Futurology 2016-06-30T15:13:31+00:00 The company “Memphis Meats” has successfully g... 3696
14 http://www.foxcarolina.com/story/32308903/depu... /r/news 2016-06-30T14:45:26+00:00 Night club shooting in SC after dispute; lawfu... 3923
15 http://imgur.com/rk0kTW9 /r/GetMotivated 2016-06-30T12:53:13+00:00 [image] Don't cling to past mistakes 5348
16 http://i.imgur.com/nrS7U5d.gifv /r/aww 2016-06-30T12:21:21+00:00 Instant Regret trying to catch that tail 6082
17 https://m.youtube.com/watch?v=CO6qLC4cL8E /r/movies 2016-06-30T13:12:03+00:00 Why Him - Official Red Band Trailer - Bryan Cr... 3907
18 /r/Jokes/comments/4qlu0w/dead_again/ /r/Jokes 2016-06-30T14:06:14+00:00 Dead again.. 3117
19 http://www.theguardian.com/politics/live/2016/... /r/worldnews 2016-06-30T10:58:04+00:00 Boris Johnson says he will not run for Tory pa... 5509
20 http://imgur.com/J4V33lU /r/gaming 2016-06-30T10:35:23+00:00 Good Guy Dead Space 5291
21 /r/personalfinance/comments/4qlr51/my_employer... /r/personalfinance 2016-06-30T13:49:15+00:00 My employer requires me to stay after close to... 2728
22 http://i.imgur.com/gLeEN4z.png /r/creepy 2016-06-30T13:29:52+00:00 Terrors of the Deep by Bacius9 2841
23 http://www.nj.com/camden/index.ssf/2016/06/tea... /r/nottheonion 2016-06-30T15:58:18+00:00 Teacher who wanted to 'get baked' with 'hot' s... 1800
24 http://i.imgur.com/2gV4Utu.jpg /r/Art 2016-06-30T16:11:22+00:00 Pencil Color Eagle By Katy Lipscomb - 2015 1670
25 https://www.bookspring.org/2016/03/troubling-s... /r/books 2016-06-30T12:00:31+00:00 New report shows that fewer than half (46%) of... 3523
26 http://i.imgur.com/kQiPDqt.jpg /r/photoshopbattles 2016-06-30T11:42:22+00:00 PsBattle: Cyclist says hi to Boris Johnson 3279
27 /r/askscience/comments/4ql6ak/askscience_ama_i... /r/askscience 2016-06-30T11:33:16+00:00 AskScience AMA: I’m Professor Brian Hare, a pi... 3386
28 http://espn.go.com/espnw/culture/feature/artic... /r/TwoXChromosomes 2016-06-30T13:11:22+00:00 A breast cancer survivor walked topless from M... 2338
29 https://en.wikipedia.org/wiki/Tilly_Smith /r/todayilearned 2016-06-30T09:48:40+00:00 TIL a 10 year old British girl saved nearly a ... 4152
30 /r/OldSchoolCool/comments/4qlnjv/on_the_way_to... /r/OldSchoolCool 2016-06-30T13:28:02+00:00 On the way to the Summer Jam at Watkins Glen N... 2044
31 https://www.youtube.com/watch?v=m8ft15cTCIk /r/videos 2016-06-30T08:19:40+00:00 How to get a baby to clean the house 5046
32 http://i.imgur.com/vep0rpK.png /r/space 2016-06-30T06:16:27+00:00 I looked up the Mars rover's odometer out of c... 5683

In [ ]: