Title: Beautiful Soup Basic HTML Scraping
Slug: beautiful_soup_html_basics
Summary: Beautiful Soup Basic HTML Scraping
Date: 2016-05-01 12:00
Category: Python
Tags: Web Scraping
Authors: Chris Albon
In [79]:
# Import required modules
import requests
from bs4 import BeautifulSoup
In [80]:
# Create a variable with the url
url = 'http://chrisralbon.com'
# Use requests to get the contents
r = requests.get(url)
# Get the text of the contents
html_content = r.text
# Convert the html content into a beautiful soup object
soup = BeautifulSoup(html_content, 'lxml')
In [81]:
# View the title tag of the soup object
soup.title
Out[81]:
In [82]:
# View the string within the title tag
soup.title.string
Out[82]:
In [83]:
# view the paragraph tag of the soup
soup.p
Out[83]:
In [84]:
soup.title.parent.name
Out[84]:
In [85]:
soup.a
Out[85]:
In [86]:
soup.find_all('a')[0:5]
Out[86]:
In [87]:
soup.p.string
In [88]:
soup.find_all('h2')[0:5]
Out[88]:
In [89]:
soup.find_all('a')[0:5]
Out[89]: