In [1]:

    
# Todo:

# 1. Get html from ischool faculty directory
# 2. Parse html extract faculty contact info
# 3. Write to a csv file for marketing

import requests
from bs4 import BeautifulSoup
import csv

def get_html(url):
    # Get html from url
    response = requests.get(url)
    return response.text

def extract_info(html):
    # take html extract faculty info return list of dictionaries
    soup = BeautifulSoup(html, "lxml")
    faculty = []
    for item in soup.select(".faculty-list")[0].select('.media'):
        fac = {
            "name": item.find("h4").text,
            "title": item.find("h5").text,
            "email": ""
        }
        for link in item.find_all("a"):
            if "mailto:" in link["href"]:
                fac["email"] = link.text
        faculty.append(fac)
    return faculty   
    
def write_csv(filename, facultylist):
    # take dictionaries and create a csv
    with open(filename, 'w', newline='') as csvfile:
        fieldnames = ['name', 'title', 'email']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for faculty in facultylist:
            writer.writerow(faculty)
    return "success"

# Follow our steps
webpage = get_html('https://ischool.syr.edu/people/directories/faculty/')
faculty = extract_info(webpage)
filename = "faculty.csv"
result = write_csv(filename, faculty)
if result == "success":
    print("File %s written to disk" % (filename))









    



File faculty.csv written to disk

Watch Me Code 1: Beautiful Soup 4