In [1]:
# Todo:
# 1. Get html from ischool faculty directory
# 2. Parse html extract faculty contact info
# 3. Write to a csv file for marketing
import requests
from bs4 import BeautifulSoup
import csv
def get_html(url):
# Get html from url
response = requests.get(url)
return response.text
def extract_info(html):
# take html extract faculty info return list of dictionaries
soup = BeautifulSoup(html, "lxml")
faculty = []
for item in soup.select(".faculty-list")[0].select('.media'):
fac = {
"name": item.find("h4").text,
"title": item.find("h5").text,
"email": ""
}
for link in item.find_all("a"):
if "mailto:" in link["href"]:
fac["email"] = link.text
faculty.append(fac)
return faculty
def write_csv(filename, facultylist):
# take dictionaries and create a csv
with open(filename, 'w', newline='') as csvfile:
fieldnames = ['name', 'title', 'email']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for faculty in facultylist:
writer.writerow(faculty)
return "success"
# Follow our steps
webpage = get_html('https://ischool.syr.edu/people/directories/faculty/')
faculty = extract_info(webpage)
filename = "faculty.csv"
result = write_csv(filename, faculty)
if result == "success":
print("File %s written to disk" % (filename))
In [ ]: