Gather Presidential Debate Transcripts from http://www.debates.org/

This script downloads transcripts of debates hosted at debates.org


In [1]:
import os
from os import path
import time

from bs4 import BeautifulSoup
import requests

In [2]:
#page with list of debates
debates_url= "http://www.debates.org/index.php?page=debate-transcripts"

In [3]:
outdir = path.join(path.curdir, "pres_debates")

#check if path exists and create if not
if not path.exists(outdir):
    os.makedirs(outdir)

In [4]:
#load 
debates_page = BeautifulSoup(
    requests.get(debates_url).content
)

In [5]:
debates_list = debates_page.find('div', attrs={"id": "content-sm"}).find_all('a')

In [8]:
for debate in debates_list:
    url = debate.attrs['href']
    
    # One of the links goes to translations for the years debate, we'll skip that one
    if "translate" in debate.text:
        continue
    
    # Some of the links are relative, this checks for that
    if not url.startswith('http'):
        url = "http://www.debates.org/" + url
    
    title = debate.text.replace(" ", "_")
    outpath= path.join(outdir, title + ".txt")
    outfile = open(outpath, "w")
    
    print "Downloading {} at {}". format(title, url)
    
    debate_text = BeautifulSoup(
        requests.get(url).content
    ).find('div', attrs={"id": "content-sm"})
    
    outfile.write(str(debate_text))
    outfile.close()
    
    time.sleep(1)


Downloading October_3,_2012:_The_First_Obama-Romney_Presidential_Debate at http://www.debates.org/index.php?page=october-3-2012-debate-transcript
Downloading October_11,_2012:_The_Biden-Ryan_Vice_Presidential_Debate at http://www.debates.org/index.php?page=october-11-2012-the-biden-romney-vice-presidential-debate
Downloading October_16,_2012:_The_Second_Obama-Romney_Presidential_Debate at http://www.debates.org/index.php?page=october-1-2012-the-second-obama-romney-presidential-debate
Downloading October_22,_2012:_The_Third_Obama-Romney_Presidential_Debate at http://www.debates.org/index.php?page=october-22-2012-the-third-obama-romney-presidential-debate
Downloading September_26,_2008:_The_First_McCain-Obama_Presidential_Debate at http://www.debates.org/index.php?page=2008-debate-transcript
Downloading October_2,_2008:_The_Biden-Palin_Vice_Presidential_Debate at http://www.debates.org/index.php?page=2008-debate-transcript-2
Downloading October_7,_2008:_The_Second_McCain-Obama_Presidential_Debate at http://www.debates.org/index.php?page=october-7-2008-debate-transcrip
Downloading October_15,_2008:_The_Third_McCain-Obama_Presidential_Debate at http://www.debates.org/index.php?page=october-15-2008-debate-transcript
Downloading October_13,_2004:_The_Third_Bush-Kerry_Presidential_Debate at http://www.debates.org/index.php?page=october-13-2004-debate-transcript
Downloading October_8,_2004:_The_Second_Bush-Kerry_Presidential_Debate at http://www.debates.org/index.php?page=october-8-2004-debate-transcript
Downloading October_5,_2004:_The_Cheney-Edwards_Vice_Presidential_Debate at http://www.debates.org/index.php?page=october-5-2004-transcript
Downloading September_30,_2004:_The_First_Bush-Kerry_Presidential_Debate at http://www.debates.org/index.php?page=september-30-2004-debate-transcript
Downloading October_3,_2000:_The_First_Gore-Bush_Presidential_Debate at http://www.debates.org/index.php?page=october-3-2000-transcript
Downloading October_5,_2000:_The_Lieberman-Cheney_Vice_Presidential_Debate at http://www.debates.org/index.php?page=october-5-2000-debate-transcript
Downloading October_11,_2000:_The_Second_Gore-Bush_Presidential_Debate at http://www.debates.org/index.php?page=october-11-2000-debate-transcript
Downloading October_17,_2000:_The_Third_Gore-Bush_Presidential_Debate at http://www.debates.org/index.php?page=october-17-2000-debate-transcript
Downloading October_6,_1996:_The_First_Clinton-Dole_Presidential_Debate at http://www.debates.org/index.php?page=october-6-1996-debate-transcript
Downloading October_9,_1996:_The_Gore-Kemp_Vice_Presidential_Debate at http://www.debates.org/index.php?page=october-9-1996-debate-transcript
Downloading October_16,_1996:_The_Second_Clinton-Dole_Presidential_Debate at http://www.debates.org/index.php?page=october-16-1996-debate-transcript
Downloading First_half_of_Debate at http://www.debates.org/index.php?page=october-11-1992-first-half-debate-transcript
Downloading Second_half_of_Debate at http://www.debates.org/index.php?page=october-11-1992-second-half-debate-transcript
Downloading October_13,_1992:_The_Gore-Quayle-Stockdale_Vice_Presidential_Debate at http://www.debates.org/index.php?page=october-13-1992-debate-transcript
Downloading First_half_of_Debate at http://www.debates.org/index.php?page=october-15-1992-first-half-debate-transcript
Downloading Second_half_of_Debate at http://www.debates.org/index.php?page=october-15-1992-second-half-debate-transcript
Downloading October_19,_1992:_The_Third_Clinton-Bush-Perot_Presidential_Debate at http://www.debates.org/index.php?page=october-19-1992-debate-transcript
Downloading September_25,_1988:_The_First_Bush-Dukakis_Presidential_Debate at http://www.debates.org/index.php?page=september-25-1988-debate-transcript
Downloading October_5,_1988:_The_Bentsen-Quayle_Vice_Presidential_Debate at http://www.debates.org/index.php?page=october-5-1988-debate-transcripts
Downloading October_13,_1988:_The_Second_Bush-Dukakis_Presidential_Debate at http://www.debates.org/index.php?page=october-13-1988-debate-transcript
Downloading October_7,_1984:_The_First_Reagan-Mondale_Presidential_Debate at http://www.debates.org/index.php?page=october-7-1984-debate-transcript
Downloading October_11,_1984:_The_Bush-Ferraro_Vice_Presidential_Debate at http://www.debates.org/index.php?page=october-11-1984-debate-transcript
Downloading October_21,_1984:_The_Second_Reagan-Mondale_Presidential_Debate at http://www.debates.org/index.php?page=october-21-1984-debate-transcript
Downloading September_21,_1980:_The_Anderson-Reagan_Presidential_Debate at http://www.debates.org/index.php?page=september-21-1980-debate-transcript
Downloading October_28,_1980:_The_Carter-Reagan_Presidential_Debate at http://www.debates.org/index.php?page=october-28-1980-debate-transcript
Downloading September_23,_1976:_The_First_Carter-Ford_Presidential_Debate at http://www.debates.org/index.php?page=september-23-1976-debate-transcript
Downloading October_6,_1976:_The_Second_Carter-Ford_Presidential_Debate at http://www.debates.org/index.php?page=october-6-1976-debate-transcript
Downloading October_22,_1976:_The_Third_Carter-Ford_Presidential_Debate at http://www.debates.org/index.php?page=october-22-1976-debate-transcript
Downloading September_26,_1960:_The_First_Kennedy-Nixon_Presidential_Debate at http://www.debates.org/index.php?page=september-26-1960-debate-transcript
Downloading October_7,_1960:_The_Second_Kennedy-Nixon_Presidential_Debate at http://www.debates.org/index.php?page=october-7-1960-debate-transcript
Downloading October_13,_1960:_The_Third_Kennedy-Nixon_Presidential_Debate at http://www.debates.org/index.php?page=october-13-1960-debate-transcript
Downloading October_21,_1960:_The_Fourth_Kennedy-Nixon_Presidential_Debate at http://www.debates.org/index.php?page=october-21-1960-debate-transcript