In [1]:
import requests
from bs4 import BeautifulSoup

In [7]:
#grab the NYT homepage
response = requests.get("http://www.nytimes.com")

In [8]:
#feed t into BSP
doc = BeautifulSoup(response.text, 'html.parser')

In [9]:
#get out the stories 
stories = doc.find_all("article", {'class': 'story'})
len(stories)


Out[9]:
141

In [29]:
all_stories = []
#grab the headlines and bylines
for story in stories:
    headline = story.find('h2', {'class':'story-heading'})
    if headline:
        this_story= {'headlines': headline_text}
        headline_text = headline.text.strip()
        byline = story.find('p', {'class': 'byline'})
        if byline:
            by_line_text = byline.text.strip()
        all_stories.append(this_story)    
    
all_stories


Out[29]:
[{'headlines': 'John Leguizamo’s Walls May Talk, but They Don’t Criticize'},
 {'headlines': 'Trump Fires His Campaign Chief in Pivot to General Race'},
 {'headlines': 'What Trump Learned From Joe McCarthy’s Top Aide'},
 {'headlines': 'Clinton Seeks Running Mate to Click With Her, Not Compete'},
 {'headlines': '3 New York Police Officials Arrested on Corruption Charges'},
 {'headlines': 'Senators Question Doping Agency on Handling of Russia'},
 {'headlines': ''},
 {'headlines': 'Is This Heaven? No, It’s Cleveland 11:52 AM ET'},
 {'headlines': 'Sports of The Times: James Makes N.B.A. History 8:13 AM ET'},
 {'headlines': 'Your Monday Briefing'},
 {'headlines': 'The Smartphone Way to Inner Calm'},
 {'headlines': 'The Benefits of Exercise Explained'},
 {'headlines': 'Venezuelans Storm Stores as Hunger Grips the Nation'},
 {'headlines': 'Venezuela Casts a Long Shadow on Elections in Spain'},
 {'headlines': 'Justices Turn Away Challenge to Connecticut Gun Law'},
 {'headlines': 'Orlando Shooter Said U.S. Should ‘Stop Bombing’ Syria 11:04 AM ET'},
 {'headlines': '4 Key Questions as Senate Considers New Gun Measures'},
 {'headlines': 'A Look at the Islamic State’s Victories and Losses'},
 {'headlines': 'What Does the First Day of Summer Look Like?'},
 {'headlines': 'Ex-C.E.O. of Volkswagen Is Under Investigation 9:54 AM ET'},
 {'headlines': 'Kabul Bombing Kills Security Contractors, Officials Say 12:18 PM ET'},
 {'headlines': 'Dustin Johnson Wins U.S. Open Despite Controversy'},
 {'headlines': 'Brooklyn Residents Seek Answers for Killing'},
 {'headlines': 'TV Recaps: ‘Game of Thrones’  |  ‘Veep’  | ‘Silicon Valley’NYT Now'},
 {'headlines': 'Two Astonishing Views of O.J. Simpson'},
 {'headlines': 'In Coral Spawning, Hope for Endangered Reefs'},
 {'headlines': 'A Quest to Get Americans to Care About Rugby'},
 {'headlines': 'Obama Needs to Protect the Iran Deal'},
 {'headlines': 'Editorial: The Broken Promise of Closing Guantánamo'},
 {'headlines': 'Blow: The G.O.P.’s Cynical Gay Ploy'},
 {'headlines': 'Cohen: Jo Cox and Britain’s Place in Europe 7:03 AM ET'},
 {'headlines': 'Krugman: A Tale of Two Parties'},
 {'headlines': 'Join us on Facebook »'},
 {'headlines': 'The ‘American Tragedy’ of Vietnam'},
 {'headlines': 'Cleveland Is Believeland'},
 {'headlines': 'Op-Ed: What the President Can’t Do for the Economy'},
 {'headlines': 'What Sent a Reporter Back to First Grade — Even During Maternity Leave'},
 {'headlines': 'Watch The Orlando Shooting Story Take Shape'},
 {'headlines': 'What Sent a Reporter Back to First Grade — Even During Maternity Leave'},
 {'headlines': 'Play Today’s Puzzle'},
 {'headlines': 'Play Today’s Puzzle'},
 {'headlines': 'Hockey Problems'},
 {'headlines': 'Decades Later, Hydrogen Bomb’s Damage Lingers'},
 {'headlines': 'The Stone: The Violence of Forgetting'},
 {'headlines': 'What Sent a Reporter Back to First Grade'},
 {'headlines': 'Consequences of a Brexit'},
 {'headlines': 'Finding Beauty in the Ordinary'},
 {'headlines': 'Not Forgotten: Anderson Cooper on His Father'},
 {'headlines': 'The Sisterhood of Political Progeny'},
 {'headlines': 'Op-Ed: The ‘American Tragedy’ of Vietnam'},
 {'headlines': 'GIF Shows Evolution of Orlando Breaking News'},
 {'headlines': 'Op-Ed: Obama Needs to Protect the Iran Deal'},
 {'headlines': 'The Sound of Music Is in His Heart'},
 {'headlines': 'The Law School Bust'},
 {'headlines': 'E.U. Countries Warn Britain on ‘Brexit’: You’ll Pay if You Leave Us'},
 {'headlines': 'Anti-Immigration Poster Denounced by a Top ‘Brexit’ Advocate'},
 {'headlines': 'Wall St. Rises as Fears of a ‘Brexit’ Ease'},
 {'headlines': 'Martin Winterkorn, Ex-C.E.O. of Volkswagen, Is Under Investigation'},
 {'headlines': 'Editorial: Heading Off the Next Extremist'},
 {'headlines': 'Frank Bruni: The Republicans’ Big Hot Mess'},
 {'headlines': 'Orlando Gunman Told Police That U.S. Should ‘Stop Bombing’ Syria and Iraq'},
 {'headlines': '4 Key Questions as Senate Considers New Gun Safety Measures'},
 {'headlines': 'Airbnb Vows to Fight Racism, but Its Users Can’t Sue to Prompt Fairness'},
 {'headlines': 'Europe’s Emergency Workers Turn to Drones to Save Lives'},
 {'headlines': '‘Made in L.A.,’ at the Hammer, Excavates Hollywood’s Past'},
 {'headlines': 'Books of The Times: Review: Natashia Deón’s ‘Grace,’ a Tale of Slavery, Its Ghosts and Legacy'},
 {'headlines': 'Donald Trump’s June Stumbles Mirror Those of Mitt Romney'},
 {'headlines': 'What Donald Trump Learned From Joseph McCarthy’s Right-Hand Man'},
 {'headlines': 'A Face in the Crowd at Moncler Gamme Bleu: Big Sean'},
 {'headlines': 'Table for Three: Cecile Richards, Barbara Bush and the Sisterhood of Political Progeny'},
 {'headlines': 'Anton Yelchin, ‘Star Trek’ Actor, Dies at 27'},
 {'headlines': '‘Finding Dory’ Sets Box Office Record for Pixar'},
 {'headlines': '3 N.Y.P.D. Commanders Are Arrested on Corruption Charges'},
 {'headlines': 'Yearbook Project Collects Stories of Children Killed in Shootings'},
 {'headlines': 'Cavaliers 93, Warriors 89 | Cleveland wins series, 4-3: Cavaliers Defeat Warriors to Win Their First N.B.A. Title'},
 {'headlines': 'Sports of The Times: A Long-Sought Title Belongs to LeBron James'},
 {'headlines': 'Leslie Odom Jr. to Leave ‘Hamilton’ on July 9'},
 {'headlines': 'Review: ‘I’ll Say She Is’ Revives a Marx Brothers Revue'},
 {'headlines': 'An Unwelcome Tourist Arrives in New Jersey: Clinging Jellyfish'},
 {'headlines': 'ScienceTake: The Grackle’s Secret to Success'},
 {'headlines': 'Prince Be, Who Infused Rap With Mysticism, Dies at 46'},
 {'headlines': 'Donald Shea, Officer Who Captured Infamous Bank Robber, Dies at 90'},
 {'headlines': 'Review: In ‘Suited,’ Searching for Clothes That Truly Fit'},
 {'headlines': 'Anton Yelchin, ‘Star Trek’ Actor, Dies at 27'},
 {'headlines': 'Personal Health: No Such Thing as a Healthy Smoker'},
 {'headlines': 'Food Banks Take on a Contributor to Diabetes: Themselves'},
 {'headlines': '4 Roller Coasters That Put the Theme in Theme Park'},
 {'headlines': 'Carry-On: What David Sedaris Can’t Travel Without'},
 {'headlines': 'Books of The Times: Review: Natashia Deón’s ‘Grace,’ a Tale of Slavery, Its Ghosts and Legacy'},
 {'headlines': 'Nonfiction: Susan Faludi’s ‘In the Darkroom’'},
 {'headlines': 'An Expensive Law Degree, and No Place to Use It'},
 {'headlines': 'Race/Related: Moving to Make Amends, Georgetown President Meets With Descendant of Slaves'},
 {'headlines': 'City Kitchen: A Creamy, Sweet Tribute to Summer'},
 {'headlines': 'A Good Appetite: Hot Honey Shrimp Is Spicy, Sweet and Speedy'},
 {'headlines': 'Opinion: Donald Trump’s Place'},
 {'headlines': 'Editorial: Heading Off the Next Extremist'},
 {'headlines': 'Checklist for Winning a Bidding War'},
 {'headlines': 'What I Love: John Leguizamo’s Walls May Talk, but They Don’t Criticize'},
 {'headlines': 'The 2016 Race: Yes, Political Ads Are Still Important, Even for Donald Trump'},
 {'headlines': 'The New Health Care: Why You Should Exercise (No, Not to Lose Weight)'},
 {'headlines': 'Feature: Can Netflix Survive in the New World It Created?'},
 {'headlines': 'Notebook: Why ‘Transcending Race’ Is a Lie'},
 {'headlines': 'Driven: Mazda’s CX-9: A Crossover That’s a Treat for Parents'},
 {'headlines': 'Wheels: Skeptics of Self-Driving Cars Span Generations'},
 {'headlines': '15 Minutes With Ai Weiwei'},
 {'headlines': 'Men’s Fashion Shows: Raf Simons in Florence'},
 {'headlines': 'Tune In to The Times: The Times on the Air: Terrorism, Trump and Trolls'},
 {'headlines': 'Looking Back: 1948-2016 | A Times Art Treasure Comes to an Omaha Library'},
 {'headlines': 'Checklist for Winning a Bidding War'},
 {'headlines': 'Search for Homes for Sale or Rent'},
 {'headlines': 'Sell Your Home'}]

In [5]:
#ave the headlines and bylines to a timestamped CSV

In [20]:
!pip install pandas


Requirement already satisfied (use --upgrade to upgrade): pandas in /usr/local/lib/python3.5/site-packages
Requirement already satisfied (use --upgrade to upgrade): pytz>=2011k in /usr/local/lib/python3.5/site-packages (from pandas)
Requirement already satisfied (use --upgrade to upgrade): python-dateutil>=2 in /usr/local/lib/python3.5/site-packages (from pandas)
Requirement already satisfied (use --upgrade to upgrade): numpy>=1.7.0 in /usr/local/lib/python3.5/site-packages (from pandas)
Requirement already satisfied (use --upgrade to upgrade): six>=1.5 in /usr/local/lib/python3.5/site-packages (from python-dateutil>=2->pandas)

In [24]:
import pandas as pd

In [30]:
stories_df = pd.DataFrame(all_stories)
stories_df.head()


Out[30]:
headlines
0 John Leguizamo’s Walls May Talk, but They Don’...
1 Trump Fires His Campaign Chief in Pivot to Gen...
2 What Trump Learned From Joe McCarthy’s Top Aide
3 Clinton Seeks Running Mate to Click With Her, ...
4 3 New York Police Officials Arrested on Corrup...

In [32]:
stories_df.to_csv("nyt-data.cvs")

In [33]:
import time

In [37]:
datastring =  time.strftime("%Y-%m-%d-%I-%M-%p")
datastring


Out[37]:
'2016-06-20-12-46-PM'

In [40]:
filename = "nyt-data-" + datastring + ".csv"
stories_df.to_csv(filename, index=False)

In [ ]: