notebook.community

Edit and run



In [1]:

    
DOC = '''Supreme Court Oral Argument Predictor (SCOAP)

Creates models for predicting outcomes of Supreme Court oral
arguments. Pulls justice-specific phrases associated with
winning and losing arguments.

LICENSE:    MIT
AUTHOR:     theonaunheim@gmail.com
COPYRIGHT:  2017, Theo Naunheim
VERSION:    0.4.3
MODIFIED:   2017-03-26
DATA DIR:   .scoap
REQUIRES:   Jupyter Notebook and Xpdf/Poppler

WARNING:    THIS SCRIPT DOWNLOADS AND PROCESSES A LARGE
            VOLUME OF MATERIAL. IT IS COMPUTATIONALLY
            EXPENSIVE AND TAKES A NON-NEGLIGIBLE AMOUNT
            OF TIME AND BANDWIDTH.

'''



In [2]:

    
# Standard library imports
import asyncio
import copy
import itertools
import os
import re
import string
import sys
import zipfile

# Web/data imports
import bs4
import numpy as np
import pandas as pd
import requests

# Scikit learn imports
import sklearn
import sklearn.feature_extraction
import sklearn.metrics
import sklearn.model_selection
import sklearn.linear_model
import sklearn.naive_bayes
import sklearn.pipeline
import sklearn.svm
import sklearn.ensemble



In [3]:

    
# Constants and constant-ish things.

# Debug flag cuts down amount of data used.
DEBUG = False

# Website URLs for downloads
TRANSCRIPT_INFO = 'https://www.supremecourt.gov/oral_arguments/argument_transcript/'
TRANSCRIPT_DOWNLOADS = 'https://www.supremecourt.gov/oral_arguments/'
SCDB_CSV_DOWNLOAD_LINK = 'http://scdb.wustl.edu/_brickFiles/2016_01/SCDB_2016_01_justiceCentered_Docket.csv.zip'

# Transcript years for dynamic URL creation
START_YEAR = 2006
END_YEAR = 2017

# OS-specific path for PDF to text extraction utility.
if os.name == 'nt':
    PDF2TEXT_PATH = r'C:\Program Files\Xpdf\pdftotext.exe'
elif os.name == 'posix':
    PDF2TEXT_PATH = '/usr/bin/pdftotext'
else:
    raise Exception('This script requires Xpdf/Poppler utility pdftotext to run.')

# Paths for SCOAP specific data.
DATA_FOLDER = os.path.join(os.path.expanduser('~'), '.scoap')
SCDB_ZIP_NAME = SCDB_CSV_DOWNLOAD_LINK.rpartition('/')[2]
SCDB_CSV_NAME = SCDB_ZIP_NAME.rpartition('.')[0]
SCDB_ZIP_PATH = os.path.join(DATA_FOLDER, SCDB_ZIP_NAME)
SCDB_CSV_PATH = SCDB_ZIP_PATH.rpartition('.')[0]

# The current term justices and cases we wish to analyze.
CURRENT_JUSTICES = ['Roberts', 'Kennedy', 'Thomas', 'Ginsburg', 'Breyer', 'Alito', 'Sotomayor', 'Kagan']
CURRENT_CASES = ['15-214', '15-1031', '15-497', '15-1189', '16-369',
                 '16-254', '15-118', '15-1248', '16-32', '15-1194',
                 '16-54', '15-9260', '16-149', '16-1256', '15-1500',
                 '15-1391', '15-1406', '15-827', '15-1498', '16-348',
                 '15-1293', '15-1358', '15-8544', '15-797', '15-1204',
                 '15-680', '15-1262', '14-1538', '15-649', '15-866',
                 '15-513', '15-927', '15-423', '15-1251', '15-1111',
                 '14-1055', '15-1191', '15-537', '15-5991', '15-628', 
                 '15-8049', '14-9496', '15-777', '15-606', '15-7250',]

# Voting relationships for OT15, courtesy of http://www.scotusblog.com/statistics/
VOTING_RELATIONSHIPS = {"KENNEDY"  :{"KENNEDY":1.00,"SCALIA":0.82,"THOMAS":0.71,"KAGAN":0.95,"ROBERTS":0.88,"GINSBURG":0.84,"ALITO":0.82,"BREYER":0.91,"SOTOMAYOR":0.79},
                        "SCALIA"   :{"KENNEDY":0.82,"SCALIA":1.00,"THOMAS":0.88,"KAGAN":0.82,"ROBERTS":0.88,"GINSBURG":0.71,"ALITO":0.94,"BREYER":0.82,"SOTOMAYOR":0.65},
                        "THOMAS"   :{"KENNEDY":0.71,"SCALIA":0.88,"THOMAS":1.00,"KAGAN":0.67,"ROBERTS":0.75,"GINSBURG":0.62,"ALITO":0.78,"BREYER":0.67,"SOTOMAYOR":0.64},
                        "KAGAN"    :{"KENNEDY":0.95,"SCALIA":0.82,"THOMAS":0.67,"KAGAN":1.00,"ROBERTS":0.87,"GINSBURG":0.87,"ALITO":0.81,"BREYER":0.92,"SOTOMAYOR":0.81},
                        "ROBERTS"  :{"KENNEDY":0.88,"SCALIA":0.88,"THOMAS":0.75,"KAGAN":0.87,"ROBERTS":1.00,"GINSBURG":0.78,"ALITO":0.84,"BREYER":0.84,"SOTOMAYOR":0.77},
                        "GINSBURG" :{"KENNEDY":0.84,"SCALIA":0.71,"THOMAS":0.62,"KAGAN":0.87,"ROBERTS":0.78,"GINSBURG":1.00,"ALITO":0.73,"BREYER":0.86,"SOTOMAYOR":0.88},
                        "ALITO"    :{"KENNEDY":0.82,"SCALIA":0.94,"THOMAS":0.78,"KAGAN":0.81,"ROBERTS":0.84,"GINSBURG":0.73,"ALITO":1.00,"BREYER":0.77,"SOTOMAYOR":0.64},
                        "BREYER"   :{"KENNEDY":0.91,"SCALIA":0.82,"THOMAS":0.67,"KAGAN":0.92,"ROBERTS":0.84,"GINSBURG":0.86,"ALITO":0.77,"BREYER":1.00,"SOTOMAYOR":0.83},
                        "SOTOMAYOR":{"KENNEDY":0.79,"SCALIA":0.65,"THOMAS":0.64,"KAGAN":0.81,"ROBERTS":0.77,"GINSBURG":0.88,"ALITO":0.64,"BREYER":0.83,"SOTOMAYOR":1.00}}



In [4]:

    
# Define function.
def create_dataframe():
    '''Create a skeleton for our df.'''
    df = pd.DataFrame(columns=['CASE',
                               'DOCKET',
                               'ARGUMENT_YEAR',
                               'ARGUMENT_LINK',
                               'ARGUMENT_PATH',])
    return df


# Run function.
arg_df = create_dataframe()



In [5]:

    
# Define function.
def get_argument_metadata(df, start=START_YEAR - 1, end=END_YEAR + 1):
    '''This fetches oral argument location metadata.'''
    # For each year
    for year in range(start, end):
        # Create web address and download data 
        address = TRANSCRIPT_INFO + str(year)
        r = requests.get(address)
        # Parse data
        try:
            soup = bs4.BeautifulSoup(r.text, 'lxml')
            table = soup.find('table', 'table datatables')
            for row in table.findAll('tr'):
                link = row.find('a')
                case = row.find('span')
                # Write table info to dataframe.
                if link:
                    link_text = link.text[:-2].lower()
                    case_text = case.text
                    link_tail = link.attrs['href'].lstrip('../')
                    full_link = TRANSCRIPT_DOWNLOADS + link_tail
                    # Write to frame
                    path = os.path.join(DATA_FOLDER, link_text, 'argument.pdf')
                    df = df.append({'CASE': case_text,
                                    'DOCKET': link_text,
                                    'ARGUMENT_LINK': full_link,
                                    'ARGUMENT_PATH': path,
                                    'ARGUMENT_YEAR': str(year)}, ignore_index=True)
        except AttributeError:
            print('Attribute error. Probably an empty page.')
    return df


# Run function.
arg_df = get_argument_metadata(arg_df)









    



Attribute error. Probably an empty page.



In [6]:

    
# Show dataframe for clarity.
arg_df.head(3)









    Out[6]:






  
    
      
      CASE
      DOCKET
      ARGUMENT_YEAR
      ARGUMENT_LINK
      ARGUMENT_PATH
    
  
  
    
      0
      Hudson v. Michigan (Reargued)
      04-1360
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/04-1360/argument.pdf
    
    
      1
      Washington v. Recuenco
      05-83
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/05-83/argument.pdf
    
    
      2
      Burlington N. & S. F. R. Co. v. White
      05-259
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/05-259/argument.pdf



In [7]:

    
# Debug to shorten time during testing
if DEBUG:
    arg_df = arg_df.iloc[-10:].copy()



In [8]:

    
# Define function.
def make_directories(row):
    '''All cases get their own folder.'''
    try:
        path = os.path.join(DATA_FOLDER, row['DOCKET'])
        os.makedirs(path)
    except FileExistsError:
        pass

    
# Apply function. Output unnecessary.
_ = arg_df.apply(make_directories, axis=1)



In [9]:

    
# Define function.
def download_pdfs(row):
    '''Get PDFs and put in the folder if necessary.'''
    # If there's a link and no file, download.
    if row['ARGUMENT_LINK'] is not np.NaN:
        if os.path.exists(row['ARGUMENT_PATH']):
            return False
        r = requests.get(row['ARGUMENT_LINK'], stream=True)
        with open(row['ARGUMENT_PATH'], 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024): 
                if chunk:
                    f.write(chunk)

                    
# Apply function. No assinment required.
_ = arg_df.apply(download_pdfs, axis=1)



In [10]:

    
arg_df.head(3)









    Out[10]:






  
    
      
      CASE
      DOCKET
      ARGUMENT_YEAR
      ARGUMENT_LINK
      ARGUMENT_PATH
    
  
  
    
      0
      Hudson v. Michigan (Reargued)
      04-1360
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/04-1360/argument.pdf
    
    
      1
      Washington v. Recuenco
      05-83
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/05-83/argument.pdf
    
    
      2
      Burlington N. & S. F. R. Co. v. White
      05-259
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/05-259/argument.pdf



In [11]:

    
# Define functions.
async def get_text(pdf_path): 
    '''This function is a coroutine for a single pdf2text.py instance.'''
    # Create the subprocess, redirect the standard output into a pipe
    process = await asyncio.create_subprocess_exec(PDF2TEXT_PATH,
                                                   pdf_path,
                                                   '-',
                                                   stdout=asyncio.subprocess.PIPE,
                                                   stderr=asyncio.subprocess.PIPE) 
    # Read output
    data = await process.communicate() 
    # Have process exit and return data.
    await process.wait()
    # Decode cp1252 for windows
    try:
        decoded_data = data[0].decode('cp1252')
    # And UTF-8 for Linux.
    except:
        decoded_data = data[0].decode()
    return decoded_data


async def get_all_text(pdf_paths):
    '''This gathers the pdf2text.py results.'''
    # Create list for return results.
    result_list = []
    # Create a list of tasks
    input_len = len(pdf_paths)
    num_chunks = (input_len // 10) + 1
    chunked_input = np.array_split(pdf_paths, num_chunks)
    # Now run each of the chunks in parallel to speed things up.
    for chunk in chunked_input:
        # Create tasks
        tasks = [get_text(path) for path in chunk]
        # Run all the tasks in parallel
        results = await asyncio.gather(*tasks)
        # Put the zipped (path, results) in result list
        for path, result in zip(chunk, results):
            result_list.append((path, result))
    return result_list


def add_arguments(df):
    '''Adds argument text to df.'''
    # Get unique PDFs
    unique_pdfs = df['ARGUMENT_PATH'].unique()
    # Windows only supports proactorloop.
    if os.name == 'nt':
        loop = asyncio.ProactorEventLoop()
    elif os.name == 'posix':
        loop = asyncio.SelectorEventLoop()
    else:
        loop == None
    asyncio.set_event_loop(loop)
    # Run our coroutine to extract text.
    arg_data = loop.run_until_complete(get_all_text(unique_pdfs))
    # Loop no longer necessary.
    loop.close()
    # Create dataframe for data.
    tdf = pd.DataFrame.from_records(arg_data, columns=['ARGUMENT_PATH', 'TEXT'])
    # Join to input df and fill na.
    df = df.merge(tdf, how='left', on='ARGUMENT_PATH').fillna('')
    return df


# Run function
arg_df = add_arguments(arg_df)



In [12]:

    
# Show dataframe for clarity.
arg_df.head(3)









    Out[12]:






  
    
      
      CASE
      DOCKET
      ARGUMENT_YEAR
      ARGUMENT_LINK
      ARGUMENT_PATH
      TEXT
    
  
  
    
      0
      Hudson v. Michigan (Reargued)
      04-1360
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/04-1360/argument.pdf
      1\n\nIN THE SUPREME COURT OF THE UNITED STATES...
    
    
      1
      Washington v. Recuenco
      05-83
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/05-83/argument.pdf
      1\n\nIN THE SUPREME COURT OF THE UNITED STATES...
    
    
      2
      Burlington N. & S. F. R. Co. v. White
      05-259
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/05-259/argument.pdf
      1\n\nIN THE SUPREME COURT OF THE UNITED STATES...



In [13]:

    
# Define function.
def cut_unnecessary_text(df):
    '''This function cuts low information text from transcript.'''
    # First chop off the caption ('PROCEEDINGS' or 'P R O C E E D I N G S')
    capture_string = r'P\s?R\s?O\s?C\s?E\s?E\s?D\s?I\s?N\s?G\s?S([\s\S]*\Z)'
    df['TEXT'] = df['TEXT'].str.extract(capture_string,
                                        expand=False,
                                        flags=re.MULTILINE)
    # First we specify the patterns we don't want
    patterns_to_cut = [

        # Cut carriage returns and form feeds because f*** those guys.
        r'[\r\f]',

        # Remove tables at end ##:## 4 within no more than 100 chars of Alderson
        (r'\s*' +
         r'Alderson Reporting Company' + 
         # period because a.m. messes it up.
         r'[\s\S.]{0,75}\d?\d:\d?\d' * 3 +
         r'[\s\S]*' +
         r'\Z'),
        
        # Remove [2004 - 2005] footer
        r'1111 14th[\s\S]{0,100}20005',
        
        # Remove [2006 - 2016] header/footer unofficial
        r'Alderson[\s\S]{0,100}Review',

        # Remove [2006 - 2016] header/footer official
        r'Alderson[\s\S]{0,100}[oO]fficial',
    
        # Remove Genric Alderson 
        r'Alderson Reporting Company',

        # Cut court reporter annotations
        r'[(\[][\s\S]{0,100}[)\]]',

        # Cut line numbers, page numbers, all other low-information numbers
        r'[0-9]',

        # Cut PAGE
        r'[Pp][Aa][Gg][Ee]',

    ]    

    # Replace above patterns with empty space.
    for pattern in patterns_to_cut:
        df['TEXT'] = df['TEXT'].str.replace(pat=pattern,
                                            repl='',
                                            flags=re.MULTILINE)
    return df


# Run function.
arg_df = cut_unnecessary_text(arg_df)



In [14]:

    
# Show df for clarity
arg_df.head(3)









    Out[14]:






  
    
      
      CASE
      DOCKET
      ARGUMENT_YEAR
      ARGUMENT_LINK
      ARGUMENT_PATH
      TEXT
    
  
  
    
      0
      Hudson v. Michigan (Reargued)
      04-1360
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/04-1360/argument.pdf
      \n\n\n\n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe...
    
    
      1
      Washington v. Recuenco
      05-83
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/05-83/argument.pdf
      \n\n\n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe'l...
    
    
      2
      Burlington N. & S. F. R. Co. v. White
      05-259
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/05-259/argument.pdf
      \n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe'll he...



In [15]:

    
# Define function.
def create_heading_columns(df):
    '''This function finds the section headings for each case'''
    # Create Petitioner oral argument heading col
    pet_arg_pattern = ''.join([r'(',
                               r'ORAL ARGUMENT[\S\s]{,200}',
                               r'(?:PETITIONER|APPELLANT)S?',
                               # As appointed by this court optional
                               r'(?:[\S\s]{,50}THIS COURT)?',
                               r')'])
    df['PET_ARG_HEADING'] = df['TEXT'].str.extract(pet_arg_pattern,
                                                   expand=False,
                                                   flags=re.MULTILINE).fillna('')
        
    # Create Respondent oral argument heading col
    res_arg_pattern = ''.join([r'(ORAL ARGUMENT[\S\s]{,200}',
                               r'(?:RESPONDENT|APPELLEE)S?',
                               # As appointed by this court optional
                               r'(?:[\S\s]{,50}THIS COURT)?',
                               r')'])
    df['RES_ARG_HEADING'] = df['TEXT'].str.extract(res_arg_pattern,
                                                   expand=False,
                                                   flags=re.MULTILINE).fillna('')
    
    # Create Petitioner rebuttal heading col
    pet_reb_pattern = ''.join([r'(REBUTTAL ARGUMENT[\S\s]{,200}',
                               r'(?:PETITIONER|APPELLANT)S?',
                               # As appointed by this court optional
                               r'(?:[\S\s]{,50}THIS COURT)?',
                               r')'])
    df['PET_REB_HEADING'] = df['TEXT'].str.extract(pet_reb_pattern,
                                                   expand=False,
                                                   flags=re.MULTILINE).fillna('')
    
    return df


# TODO:
# IN ##-#### optional ... r'(?:[\S\s]{,10}IN[\S\s]{,5}-)?'

# Run function
arg_df = create_heading_columns(arg_df).fillna('')



In [16]:

    
# Define function.
def extract_petitioner_arg(df):
    '''Pulls out petitioner argument using section headers.'''

    # Create extraction (between pet arg heading and res arg heading)
    df['PET_ARG_REGEX'] = df.apply(lambda row: ''.join([row['PET_ARG_HEADING'],
                                                        r'([\S\s]*?)',
                                                        r'(?:ORAL)']),
                                   axis=1)
    
    # Extract and create petitioner argument column
    df['PETITIONER_ARGUMENT'] = df.apply(lambda row: re.findall(row['PET_ARG_REGEX'],
                                                                row['TEXT'],
                                                                flags=re.MULTILINE),
                                         axis=1)
    
    # If no match, empty string. Else, take match's.
    df['PETITIONER_ARGUMENT'] = df['PETITIONER_ARGUMENT'].map(lambda matches: ''.join(matches))
    
    return df


# Run function.
arg_df = extract_petitioner_arg(arg_df)



In [17]:

    
# Define function.
def extract_respondent_arg(df):
    '''Pulls out respondent argument using previously generated section heads.'''
    # Get respondent argument (between res arg heading and pet reb heading)
    df['RES_ARG_REGEX'] = df.apply(lambda row: ''.join([row['RES_ARG_HEADING'],
                                                        r'([\S\s]*?)',
                                                        r'(?:REBUTTAL)|(?:ORAL)']),
                                   axis=1)

    df['RESPONDENT_ARGUMENT'] = df.apply(lambda row: re.findall(row['RES_ARG_REGEX'],
                                                                row['TEXT'],
                                                                flags=re.MULTILINE),
                                         axis=1)
    
    # If no match, empty string. Else, take match's.
    df['RESPONDENT_ARGUMENT'] = df['RESPONDENT_ARGUMENT'].map(lambda matches: ''.join(matches))
    
    return df


# Run function.
arg_df = extract_respondent_arg(arg_df)



In [18]:

    
# Define function.
def extract_petitioner_reb(df):
    '''Pulls out petitioner rebuttal using previously generated section heads.'''
    # Get petitioner rebuttal (between pet reb heading and res reb heading)
    df['PET_REB_REGEX'] = df.apply(lambda row: ''.join([row['PET_REB_HEADING'],
                                                        r'([\S\s]*?)',
                                                        r'(?:\Z)']),
                                   axis=1)

    df['PETITIONER_REBUTTAL'] = df.apply(lambda row: re.search(row['PET_REB_REGEX'],
                                                               row['TEXT'],
                                                               flags=re.MULTILINE).group(1),
                                         axis=1)
    
    return df


# Run function.
arg_df = extract_petitioner_reb(arg_df)



In [19]:

    
# TODO. If transcript omits info (e.g. SAMSUNG/WAXMAN 15-777), no match.
len(arg_df)









    Out[19]:





854



In [20]:

    
# Show dataframe for clarity
arg_df.head(3)









    Out[20]:






  
    
      
      CASE
      DOCKET
      ARGUMENT_YEAR
      ARGUMENT_LINK
      ARGUMENT_PATH
      TEXT
      PET_ARG_HEADING
      RES_ARG_HEADING
      PET_REB_HEADING
      PET_ARG_REGEX
      PETITIONER_ARGUMENT
      RES_ARG_REGEX
      RESPONDENT_ARGUMENT
      PET_REB_REGEX
      PETITIONER_REBUTTAL
    
  
  
    
      0
      Hudson v. Michigan (Reargued)
      04-1360
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/04-1360/argument.pdf
      \n\n\n\n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe...
      ORAL ARGUMENT OF DAVID A. MORAN\n\n\n\nON BEHA...
      ORAL ARGUMENT OF TIMOTHY A. BAUGHMAN\n\n\n\nON...
      REBUTTAL ARGUMENT OF DAVID A. MORAN\n\n\n\nON ...
      ORAL ARGUMENT OF DAVID A. MORAN\n\n\n\nON BEHA...
      \n\n\n\n\nMR. MORAN:\n\nMr. Chief Justice, and...
      ORAL ARGUMENT OF TIMOTHY A. BAUGHMAN\n\n\n\nON...
      \n\n\n\n\n\nMR. BAUGHMAN:\n\nThank you, Mr. Mo...
      REBUTTAL ARGUMENT OF DAVID A. MORAN\n\n\n\nON ...
      \n\n\n\nMR. MORAN:\n\n\n\nThank you --\n\nCHIE...
    
    
      1
      Washington v. Recuenco
      05-83
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/05-83/argument.pdf
      \n\n\n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe'l...
      ORAL ARGUMENT OF JAMES M. WHISMAN\n\n\n\nON BE...
      ORAL ARGUMENT OF GREGORY C. LINK\n\n\n\nON BEH...
      REBUTTAL ARGUMENT OF JAMES M. WHISMAN\n\n\n\nO...
      ORAL ARGUMENT OF JAMES M. WHISMAN\n\n\n\nON BE...
      \n\n\n\n\n\nMR. WHISMAN:\n\nMr. Chief Justice,...
      ORAL ARGUMENT OF GREGORY C. LINK\n\n\n\nON BEH...
      \n\n\n\n\nAnd on\n\nMR. LINK:\n\nMr. Chief Jus...
      REBUTTAL ARGUMENT OF JAMES M. WHISMAN\n\n\n\nO...
      \n\n\n\nJUSTICE SCALIA:\n\nThank you, counsel....
    
    
      2
      Burlington N. & S. F. R. Co. v. White
      05-259
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/05-259/argument.pdf
      \n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe'll he...
      ORAL ARGUMENT OF CARTER G. PHILLIPS\n\n\n\nON ...
      ORAL ARGUMENT OF DONALD A. DONATI\n\n\n\nON BE...
      REBUTTAL ARGUMENT OF CARTER G. PHILLIPS\n\n\n\...
      ORAL ARGUMENT OF CARTER G. PHILLIPS\n\n\n\nON ...
      \n\n\n\n\n\nMR. PHILLIPS:\n\nThank you, Mr. Ch...
      ORAL ARGUMENT OF DONALD A. DONATI\n\n\n\nON BE...
      \n\n\n\n\n\nMR. DONATI:\n\nMr. Chief Justice, ...
      REBUTTAL ARGUMENT OF CARTER G. PHILLIPS\n\n\n\...
      \n\n\n\nMR. PHILLIPS:\n\nThank you, Mr. Chief ...



In [21]:

    
bak = arg_df.copy()



In [22]:

    
def split_arguments(df):
    '''Split argument into a series of comments.'''    
    # Must be double quote because raw string addressing ' for O'Connor
    comment_pattern = r"([A-Z.'\s]{5,25}:\s[\s\S]*?)(?=[A-Z'.\s]{5,25}[:\Z])"
    for column in ['PETITIONER_ARGUMENT',
                   'RESPONDENT_ARGUMENT',
                   'PETITIONER_REBUTTAL']:
        # We only want periods in the middle of names.
        df[column] = df[column].str.findall(comment_pattern)
    return df


# Run functions
arg_df = split_arguments(arg_df)



In [23]:

    
def tuplify_cell(cell_value):
    '''Helper function for tuplify_argument().'''
    return_value = []
    for comment in cell_value:
        justice, _, comment = comment.partition(':')
        return_value.append(tuple([justice.replace('.', '').strip(),
                                   comment.strip()]))
    return return_value


def tuplify_arguments(df):
    '''Turn question strings into (justice, text) tuples.'''
    for column in ['PETITIONER_ARGUMENT',
                   'RESPONDENT_ARGUMENT',
                   'PETITIONER_REBUTTAL']:
        df[column] = df[column].map(tuplify_cell)
    return df.fillna('')


# Run function
arg_df = tuplify_arguments(arg_df)



In [24]:

    
def condense_cell(cell_value):
    '''Helper function for condense_argument().'''
    return_dict = {}
    for input_tuple in cell_value:
        justice, comment = input_tuple
        try:
            return_dict[justice].append(comment)
        except KeyError:
            return_dict[justice] = [comment]
    return return_dict


def condense_arguments(df):
    '''Turn args into: {'justice': ['comment 1', 'comment 2']}'''
    for column in ['PETITIONER_ARGUMENT',
                   'RESPONDENT_ARGUMENT',
                   'PETITIONER_REBUTTAL']:
        df[column] = df[column].map(condense_cell)
    return df


arg_df = condense_arguments(arg_df)



In [25]:

    
arg_df.head(3)









    Out[25]:






  
    
      
      CASE
      DOCKET
      ARGUMENT_YEAR
      ARGUMENT_LINK
      ARGUMENT_PATH
      TEXT
      PET_ARG_HEADING
      RES_ARG_HEADING
      PET_REB_HEADING
      PET_ARG_REGEX
      PETITIONER_ARGUMENT
      RES_ARG_REGEX
      RESPONDENT_ARGUMENT
      PET_REB_REGEX
      PETITIONER_REBUTTAL
    
  
  
    
      0
      Hudson v. Michigan (Reargued)
      04-1360
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/04-1360/argument.pdf
      \n\n\n\n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe...
      ORAL ARGUMENT OF DAVID A. MORAN\n\n\n\nON BEHA...
      ORAL ARGUMENT OF TIMOTHY A. BAUGHMAN\n\n\n\nON...
      REBUTTAL ARGUMENT OF DAVID A. MORAN\n\n\n\nON ...
      ORAL ARGUMENT OF DAVID A. MORAN\n\n\n\nON BEHA...
      {'JUSTICE SCALIA': ['Counsel, what -- what do ...
      ORAL ARGUMENT OF TIMOTHY A. BAUGHMAN\n\n\n\nON...
      {'JUSTICE SCALIA': ['I'd be happy to argue tha...
      REBUTTAL ARGUMENT OF DAVID A. MORAN\n\n\n\nON ...
      {'CHIEF JUSTICE ROBERTS': ['You think there --...
    
    
      1
      Washington v. Recuenco
      05-83
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/05-83/argument.pdf
      \n\n\n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe'l...
      ORAL ARGUMENT OF JAMES M. WHISMAN\n\n\n\nON BE...
      ORAL ARGUMENT OF GREGORY C. LINK\n\n\n\nON BEH...
      REBUTTAL ARGUMENT OF JAMES M. WHISMAN\n\n\n\nO...
      ORAL ARGUMENT OF JAMES M. WHISMAN\n\n\n\nON BE...
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      ORAL ARGUMENT OF GREGORY C. LINK\n\n\n\nON BEH...
      {'JUSTICE SCALIA': ['It didn't
It said the

Ho...
      REBUTTAL ARGUMENT OF JAMES M. WHISMAN\n\n\n\nO...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
    
    
      2
      Burlington N. & S. F. R. Co. v. White
      05-259
      2005
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/05-259/argument.pdf
      \n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe'll he...
      ORAL ARGUMENT OF CARTER G. PHILLIPS\n\n\n\nON ...
      ORAL ARGUMENT OF DONALD A. DONATI\n\n\n\nON BE...
      REBUTTAL ARGUMENT OF CARTER G. PHILLIPS\n\n\n\...
      ORAL ARGUMENT OF CARTER G. PHILLIPS\n\n\n\nON ...
      {'JUSTICE SCALIA': ['But  has the language and...
      ORAL ARGUMENT OF DONALD A. DONATI\n\n\n\nON BE...
      {'JUSTICE SCALIA': ['I'm -- I'm a supervisor, ...
      REBUTTAL ARGUMENT OF CARTER G. PHILLIPS\n\n\n\...
      {'MR PHILLIPS': ['Thank you, Mr. Chief Justice...



In [26]:

    
# Define function creating secondary df.
def create_scdb_df():
    '''Download data frome the SCDB and instantiate dataframe.'''

    # If we've already download database, just load.
    if os.path.exists(SCDB_CSV_PATH):
        pass
    else:
        # Get data
        r = requests.get(SCDB_CSV_DOWNLOAD_LINK, stream=True)
        with open(SCDB_ZIP_PATH, 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024): 
                if chunk:
                    f.write(chunk)
        # Unzip context manager
        with zipfile.ZipFile(SCDB_ZIP_PATH) as zip_file:
            # Read data context manager
            with zip_file.open(SCDB_CSV_NAME) as pseudo_file:
                data = pseudo_file.read()
                # Write data with context manager and write to csv.
                with open(SCDB_CSV_PATH, 'wb+') as f:
                    f.write(data)
    
    # Now create dataframe from csv.
    case_df = pd.read_csv(SCDB_CSV_PATH, encoding='latin-1')

    return case_df


# Run df.
case_df = create_scdb_df()



In [27]:

    
# Show case dataframe for clarity
case_df.head(3)









    Out[27]:






  
    
      
      caseId
      docketId
      caseIssuesId
      voteId
      dateDecision
      decisionType
      usCite
      sctCite
      ledCite
      lexisCite
      ...
      majVotes
      minVotes
      justice
      justiceName
      vote
      opinion
      direction
      majority
      firstAgreement
      secondAgreement
    
  
  
    
      0
      1946-001
      1946-001-01
      1946-001-01-01
      1946-001-01-01-01-01
      11/18/1946
      1
      329 U.S. 1
      67 S. Ct. 6
      91 L. Ed. 3
      1946 U.S. LEXIS 1724
      ...
      8
      1
      86
      HHBurton
      2.0
      1.0
      1.0
      1.0
      NaN
      NaN
    
    
      1
      1946-001
      1946-001-01
      1946-001-01-01
      1946-001-01-01-01-02
      11/18/1946
      1
      329 U.S. 1
      67 S. Ct. 6
      91 L. Ed. 3
      1946 U.S. LEXIS 1724
      ...
      8
      1
      84
      RHJackson
      1.0
      1.0
      2.0
      2.0
      NaN
      NaN
    
    
      2
      1946-001
      1946-001-01
      1946-001-01-01
      1946-001-01-01-01-03
      11/18/1946
      1
      329 U.S. 1
      67 S. Ct. 6
      91 L. Ed. 3
      1946 U.S. LEXIS 1724
      ...
      8
      1
      81
      WODouglas
      1.0
      1.0
      2.0
      2.0
      NaN
      NaN
    
  

3 rows × 61 columns



In [28]:

    
cut_arg_df = arg_df[['DOCKET',
                     'CASE', 
                     'PETITIONER_ARGUMENT',
                     #'RESPONDENT_ARGUMENT',
                     'PETITIONER_REBUTTAL']]

cut_arg_df.head(3)









    Out[28]:






  
    
      
      DOCKET
      CASE
      PETITIONER_ARGUMENT
      PETITIONER_REBUTTAL
    
  
  
    
      0
      04-1360
      Hudson v. Michigan (Reargued)
      {'JUSTICE SCALIA': ['Counsel, what -- what do ...
      {'CHIEF JUSTICE ROBERTS': ['You think there --...
    
    
      1
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
    
    
      2
      05-259
      Burlington N. & S. F. R. Co. v. White
      {'JUSTICE SCALIA': ['But  has the language and...
      {'MR PHILLIPS': ['Thank you, Mr. Chief Justice...



In [29]:

    
cut_case_df = case_df[['docket', 'majority', 'partyWinning', 'justiceName']]
cut_case_df.columns = ['DOCKET', 'majority', 'partyWinning', 'JUSTICE']

cut_case_df.head(3)









    Out[29]:






  
    
      
      DOCKET
      majority
      partyWinning
      JUSTICE
    
  
  
    
      0
      24
      1.0
      1.0
      HHBurton
    
    
      1
      24
      2.0
      1.0
      RHJackson
    
    
      2
      24
      2.0
      1.0
      WODouglas



In [30]:

    
# Join case_df and arg_df to create joint dataframe jdf.
jdf = pd.merge(cut_arg_df,
               cut_case_df,
               how='left',
               on='DOCKET')

# Drop (Reargued) because it creates dupes.
contains_reargue = jdf['CASE'].str.contains('Reargue')
jdf = jdf[~contains_reargue]



In [31]:

    
# Show joint dataframe for clarity (all tail end will be np.NaN)
jdf.head(3)









    Out[31]:






  
    
      
      DOCKET
      CASE
      PETITIONER_ARGUMENT
      PETITIONER_REBUTTAL
      majority
      partyWinning
      JUSTICE
    
  
  
    
      9
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      2.0
      1.0
      JGRoberts
    
    
      10
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      1.0
      1.0
      JPStevens
    
    
      11
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      2.0
      1.0
      AScalia



In [32]:

    
'''

From documentation on 'partyWinning' column:

http://scdb.wustl.edu/documentation.php?var=partyWinning

0: no favorable disposition for petitioning party apparent
1: petitioning party received a favorable disposition
2: favorable disposition for petitioning party unclear 

We want to be able to separate those who won from those who did not
win. Consequently, we drop all cases where the decision was
ambiguous, or the winner was not apparent.

We then convert the 0 to False and the 1 to True, which gives
us a True/False 'PETITIONER_WINS' column.

PETITIONER_WINS
0 -> False
1 -> True

If the petitioner wins, it is because it was the decision of the
majority of the court. We can accurately describe the nature of this
column as 'PETITIONER_WINS_MAJORITY'.

'''

jdf = jdf[jdf['partyWinning'] != 2.0].copy()
jdf['PETITIONER_WINS_MAJORITY'] = jdf['partyWinning'].astype(bool)



In [33]:

    
'''

From documentation on 'majority' columns:

http://scdb.wustl.edu/documentation.php?var=majority

1: dissent
2: majority 

We want to convert this into a 'VOTED_WITH_MAJORITY' column.
To do this we subtract one from each and every value so that
dissent becomes 0 and majority becomes 1.

majority
0: dissent (result 1 - 1)
1: majority (result from 2 - 1)

Then we convert the 0 to False and 1 to True, so that we have a
'VOTED_WITH_MAJORITY' column.

VOTED_WITH_MAJORITY
0 -> False
1 -> True

'''

jdf['majority_minus_one'] = jdf['majority'] - 1
jdf['VOTED_WITH_MAJORITY'] = jdf['majority_minus_one'].astype(bool)
jdf









    Out[33]:






  
    
      
      DOCKET
      CASE
      PETITIONER_ARGUMENT
      PETITIONER_REBUTTAL
      majority
      partyWinning
      JUSTICE
      PETITIONER_WINS_MAJORITY
      majority_minus_one
      VOTED_WITH_MAJORITY
    
  
  
    
      9
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      2.0
      1.0
      JGRoberts
      True
      1.0
      True
    
    
      10
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      1.0
      1.0
      JPStevens
      True
      0.0
      False
    
    
      11
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      2.0
      1.0
      AScalia
      True
      1.0
      True
    
    
      12
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      2.0
      1.0
      AMKennedy
      True
      1.0
      True
    
    
      13
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      2.0
      1.0
      DHSouter
      True
      1.0
      True
    
    
      14
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      2.0
      1.0
      CThomas
      True
      1.0
      True
    
    
      15
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      1.0
      1.0
      RBGinsburg
      True
      0.0
      False
    
    
      16
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      2.0
      1.0
      SGBreyer
      True
      1.0
      True
    
    
      17
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      2.0
      1.0
      SAAlito
      True
      1.0
      True
    
    
      18
      05-259
      Burlington N. & S. F. R. Co. v. White
      {'JUSTICE SCALIA': ['But  has the language and...
      {'MR PHILLIPS': ['Thank you, Mr. Chief Justice...
      2.0
      0.0
      JGRoberts
      False
      1.0
      True
    
    
      19
      05-259
      Burlington N. & S. F. R. Co. v. White
      {'JUSTICE SCALIA': ['But  has the language and...
      {'MR PHILLIPS': ['Thank you, Mr. Chief Justice...
      2.0
      0.0
      JPStevens
      False
      1.0
      True
    
    
      20
      05-259
      Burlington N. & S. F. R. Co. v. White
      {'JUSTICE SCALIA': ['But  has the language and...
      {'MR PHILLIPS': ['Thank you, Mr. Chief Justice...
      2.0
      0.0
      AScalia
      False
      1.0
      True
    
    
      21
      05-259
      Burlington N. & S. F. R. Co. v. White
      {'JUSTICE SCALIA': ['But  has the language and...
      {'MR PHILLIPS': ['Thank you, Mr. Chief Justice...
      2.0
      0.0
      AMKennedy
      False
      1.0
      True
    
    
      22
      05-259
      Burlington N. & S. F. R. Co. v. White
      {'JUSTICE SCALIA': ['But  has the language and...
      {'MR PHILLIPS': ['Thank you, Mr. Chief Justice...
      2.0
      0.0
      DHSouter
      False
      1.0
      True
    
    
      23
      05-259
      Burlington N. & S. F. R. Co. v. White
      {'JUSTICE SCALIA': ['But  has the language and...
      {'MR PHILLIPS': ['Thank you, Mr. Chief Justice...
      2.0
      0.0
      CThomas
      False
      1.0
      True
    
    
      24
      05-259
      Burlington N. & S. F. R. Co. v. White
      {'JUSTICE SCALIA': ['But  has the language and...
      {'MR PHILLIPS': ['Thank you, Mr. Chief Justice...
      2.0
      0.0
      RBGinsburg
      False
      1.0
      True
    
    
      25
      05-259
      Burlington N. & S. F. R. Co. v. White
      {'JUSTICE SCALIA': ['But  has the language and...
      {'MR PHILLIPS': ['Thank you, Mr. Chief Justice...
      2.0
      0.0
      SGBreyer
      False
      1.0
      True
    
    
      26
      05-259
      Burlington N. & S. F. R. Co. v. White
      {'JUSTICE SCALIA': ['But  has the language and...
      {'MR PHILLIPS': ['Thank you, Mr. Chief Justice...
      2.0
      0.0
      SAAlito
      False
      1.0
      True
    
    
      27
      05-352
      United States v. Gonzalez-Lopez
      {'JUSTICE SCALIA': ['When did -- when did we

...
      {'JUSTICE STEVENS': ['Well, Mr. Dreeben, do yo...
      2.0
      0.0
      JPStevens
      False
      1.0
      True
    
    
      28
      05-352
      United States v. Gonzalez-Lopez
      {'JUSTICE SCALIA': ['When did -- when did we

...
      {'JUSTICE STEVENS': ['Well, Mr. Dreeben, do yo...
      2.0
      0.0
      AScalia
      False
      1.0
      True
    
    
      29
      05-352
      United States v. Gonzalez-Lopez
      {'JUSTICE SCALIA': ['When did -- when did we

...
      {'JUSTICE STEVENS': ['Well, Mr. Dreeben, do yo...
      1.0
      0.0
      AMKennedy
      False
      0.0
      False
    
    
      30
      05-352
      United States v. Gonzalez-Lopez
      {'JUSTICE SCALIA': ['When did -- when did we

...
      {'JUSTICE STEVENS': ['Well, Mr. Dreeben, do yo...
      2.0
      0.0
      DHSouter
      False
      1.0
      True
    
    
      31
      05-352
      United States v. Gonzalez-Lopez
      {'JUSTICE SCALIA': ['When did -- when did we

...
      {'JUSTICE STEVENS': ['Well, Mr. Dreeben, do yo...
      1.0
      0.0
      CThomas
      False
      0.0
      False
    
    
      32
      05-352
      United States v. Gonzalez-Lopez
      {'JUSTICE SCALIA': ['When did -- when did we

...
      {'JUSTICE STEVENS': ['Well, Mr. Dreeben, do yo...
      2.0
      0.0
      RBGinsburg
      False
      1.0
      True
    
    
      33
      05-352
      United States v. Gonzalez-Lopez
      {'JUSTICE SCALIA': ['When did -- when did we

...
      {'JUSTICE STEVENS': ['Well, Mr. Dreeben, do yo...
      2.0
      0.0
      SGBreyer
      False
      1.0
      True
    
    
      34
      05-352
      United States v. Gonzalez-Lopez
      {'JUSTICE SCALIA': ['When did -- when did we

...
      {'JUSTICE STEVENS': ['Well, Mr. Dreeben, do yo...
      1.0
      0.0
      JGRoberts
      False
      0.0
      False
    
    
      35
      05-352
      United States v. Gonzalez-Lopez
      {'JUSTICE SCALIA': ['When did -- when did we

...
      {'JUSTICE STEVENS': ['Well, Mr. Dreeben, do yo...
      1.0
      0.0
      SAAlito
      False
      0.0
      False
    
    
      36
      05-5992
      Zedner v. United States
      {'JUSTICE SCALIA': ['If the

The delay -- the ...
      {'JUSTICE SCALIA': ['People can't

And the

I ...
      2.0
      1.0
      JPStevens
      True
      1.0
      True
    
    
      37
      05-5992
      Zedner v. United States
      {'JUSTICE SCALIA': ['If the

The delay -- the ...
      {'JUSTICE SCALIA': ['People can't

And the

I ...
      2.0
      1.0
      AScalia
      True
      1.0
      True
    
    
      38
      05-5992
      Zedner v. United States
      {'JUSTICE SCALIA': ['If the

The delay -- the ...
      {'JUSTICE SCALIA': ['People can't

And the

I ...
      2.0
      1.0
      AMKennedy
      True
      1.0
      True
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      7147
      15-1406
      Goodyear Tire & Rubber Co. v. Haeger
      {'JUSTICE KAGAN': ['And,

Mr. Bergeron, could ...
      {'JUSTICE SOTOMAYOR': ['There is something --
...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7148
      15-827
      Endrew F. v. Douglas County School Dist. RE-1
      {'MR FISHER': ['Mr. Chief Justice, and may it
...
      {'MR FISHER': ['Three points, Your Honors.

Tw...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7149
      15-1498
      Lynch v. Dimaya
      {'MR KNEEDLER': ['Mr. Chief Justice, and may i...
      {'MR KNEEDLER': ['First, we explained in our o...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7150
      16-348
      Midland Funding, LLC v. Johnson
      {'JUSTICE KAGAN': ['Before you get to that



...
      {'MR SHANMUGAM': ['Thank you,






Mr. Chief ...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7151
      15-1293
      Lee v. Tam
      {'MR STEWART': ['Thank you, Mr. Chief Justice,...
      {'MR STEWART': ['Let make three quick points.
...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7152
      15-1358
      Ziglar v. Abbasi
      {'GENERAL GERSHENGORN': ['Mr. Chief Justice, a...
      {'GENERAL GERSHENGORN': ['But if

Thank you, M...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7153
      15-8544
      Beckles v. United States
      {'CHIEF JUSTICE ROBERTS': ['right?



of depar...
      {'MS BERGMANN': ['I'd like to make -- start

w...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7154
      15-797
      Moore v. Texas
      {'JUSTICE KAGAN': ['', 'We wouldn't need that,...
      {'CHIEF JUSTICE ROBERTS': [''], 'MR SLOAN': ['...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7155
      15-1204
      Jennings v. Rodriguez
      {'JUSTICE KAGAN': ['sorry', 'I was going to sh...
      {'GENERAL GERSHENGORN': ['Justice.

Thank you,...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7156
      15-680
      Bethune-Hill v. Virginia State Bd. of Elections
      {'JUSTICE KAGAN': ['Mr. Elias, could I make su...
      {'MR ELIAS': ['Mr. Chief Justice, and may it

...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7157
      15-1262
      McCrory v. Harris
      {'JUSTICE KAGAN': ['Mr. Clement, that passage
...
      {'MR CLEMENT': ['A few points in rebuttal.



...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7158
      14-1538
      Life Technologies Corp. v. Promega Corp.
      {'JUSTICE KAGAN': ['that from?

Where -- where...
      {'MR PHILLIPS': ['Thank you, Mr. Chief Justice...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7159
      15-649
      Czyzewski v. Jevic Holding Corp.
      {'JUSTICE KAGAN': ['', 'isn't mentioned somepl...
      {'MS SPINELLI': ['Respondents' position fails
...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7160
      15-497
      Fry v. Napoleon Community Schools
      {'JUSTICE KAGAN': ['Could -- could I ask about...
      {'MR BAGENSTOS': ['Thank you, Mr. Chief

Justi...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7161
      15-866
      Star Athletica, L.L.C. v. Varsity Brands, Inc.
      {'JUSTICE KAGAN': ['How is your argument



di...
      {'JUSTICE KAGAN': ['Well, can't the school jus...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7162
      15-513
      State Farm Fire & Casualty Co. v. United State...
      {'JUSTICE KAGAN': ['', '', 'couple of times --...
      {'MS SULLIVAN': ['When you write the opinion

...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7163
      15-927
      SCA Hygiene Products Aktiebolag v. First Quali...
      {'JUSTICE SOTOMAYOR': ['They have some cases

...
      {'JUSTICE KAGAN': ['Well, Mr. Black, I take it...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7164
      15-423
      Bolivarian Republic of Venezuela v. Helmerich ...
      {'JUSTICE KAGAN': ['In the last provision of

...
      {'MS STETSON': ['Ms. Carroll describes this

c...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7165
      15-1251
      NLRB v. SW General, Inc.
      {'GENERAL GERSHENGORN': ['Mr. Chief Justice, a...
      {'GENERAL GERSHENGORN': ['Justice.

Thank you,...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7166
      15-1111
      Bank of America Corp. v. Miami
      {'MR KATYAL': ['Thank you, Mr. Chief Justice,
...
      {'MR KATYAL': ['Four points, Your Honor.



Fi...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7167
      14-1055
      Lightfoot v. Cendant Mortgage
      {'CHIEF JUSTICE ROBERTS': ['So a dozen

You do...
      {'JUSTICE KAGAN': ['personal jurisdiction.



...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7168
      15-1191
      Lynch v. Morales-Santana
      {'MR KNEEDLER': ['Mr. Chief Justice, and may i...
      {'MR KNEEDLER': ['Thank you, Mr. Chief Justice...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7169
      15-537
      Bravo-Fernandez v. United States
      {'JUSTICE KAGAN': ['Ms. Blatt, it does seem to...
      {'MS BLATT': ['Thanks.

Thank you, Mr. Chief

...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7170
      15-5991
      Shaw v. United States
      {'JUSTICE KAGAN': ['So, Ms. Bell, I guess -- I...
      {'MS BELL': ['Thank you, Mr. Chief Justice.


...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7171
      15-628
      Salman v. United States
      {'JUSTICE KAGAN': ['The only case

Ms. Shapiro...
      {'CHIEF JUSTICE ROBERTS': ['Finish your

sente...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7172
      15-8049
      Buck v. Davis
      {'JUSTICE KAGAN': ['But, for example, last yea...
      {'MS SWARNS': ['This Court has long recognized...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7173
      14-9496
      Manuel v. Joliet
      {'JUSTICE KAGAN': ['Mr. Eisenhammer, why shoul...
      {'MR EISENHAMMER': ['Thank you.



Just to ans...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7174
      15-777
      Samsung Electronics Co., v. Apple Inc.
      {'JUSTICE KAGAN': ['Could I really quickly mak...
      {'JUSTICE BREYER': ['The problem, of course, i...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7175
      15-606
      Pena-Rodriguez v. Colorado
      {'MR FISHER': ['Mr. Chief Justice, and may it
...
      {'MR FISHER': ['Thank you.

I'd like to make

...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
    
      7176
      15-7250
      Manrique v. United States
      {'JUSTICE KAGAN': ['Mr. Rashkind --', 'Mr. Ras...
      {'MR RASHKIND': ['Thank you, Your Honor.



If...
      NaN
      NaN
      NaN
      True
      NaN
      True
    
  

7132 rows × 10 columns



In [34]:

    
'''

We can determine whether a petitioner won over a specific justice based on:

1. Whether the petitioner won over a majority, and
2. Whether the specific justice was a part of that majority.

If the answer to both of these questions is the same (that is, either
both the answers are Yes or both the answers are no), then the
petitioner won over the justice.

Logically:

    P_WINS_MAJ,  J_VOTES_MAJ = P_WINS_J
    
        If petitioner wins majority and justice voted with majority, the petitioner won over the justice

    P_WINS_MAJ, ~J_VOTES_MAJ = P_LOSES_J
    
        If petitioner wins majority and justice NOT a part of the majority, petitioner did not win justice

   ~P_WINS_MAJ,  J_VOTES_MAJ = P_LOSES_J
   
        If petitioner does NOT win majority and justice voted in majority, petitioner did not win justice

   ~P_WINS_MAJ,  ~J_VOTES_MAJ = P_WINS_J
   
        If petitioner does NOT win majority and justice voted NOT in majority, petitioner won justice


'''

def determine_vote(row):
    # If petitioner wins majority
    if row['PETITIONER_WINS_MAJORITY']:
        # Pet wins majority AND justice voted with majority
        if row['VOTED_WITH_MAJORITY']:
            return True
        # Pet wins majority AND justice voted against majority
        else:
            return False
    # If petitioner loses majority
    else:
        # Pet loses majority AND justice voted with majority
        if row['VOTED_WITH_MAJORITY']:
            return False
        # Pet loses majority AND justice voted against majority
        else:
            return True

# Voted with majority
jdf['VOTED_FOR_PETITIONER'] = jdf.apply(determine_vote, axis=1)



In [35]:

    
jdf.head(3)









    Out[35]:






  
    
      
      DOCKET
      CASE
      PETITIONER_ARGUMENT
      PETITIONER_REBUTTAL
      majority
      partyWinning
      JUSTICE
      PETITIONER_WINS_MAJORITY
      majority_minus_one
      VOTED_WITH_MAJORITY
      VOTED_FOR_PETITIONER
    
  
  
    
      9
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      2.0
      1.0
      JGRoberts
      True
      1.0
      True
      True
    
    
      10
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      1.0
      1.0
      JPStevens
      True
      0.0
      False
      False
    
    
      11
      05-83
      Washington v. Recuenco
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      2.0
      1.0
      AScalia
      True
      1.0
      True
      True



In [36]:

    
# Demonstration dataframe
pd.DataFrame(data={'Justice Votes With Majority': ['Petitioner Wins Justice',
                                                   'Petitioner Loses Justice'], 
                   'Justice Votes Againt  Majority': ['Petitioner Loses Justice',
                                                      'Petitioner Wins Justice']},
             index=['Petitioner Wins Majority',
                    'Petitioner Loses Majority'])









    Out[36]:






  
    
      
      Justice Votes Againt  Majority
      Justice Votes With Majority
    
  
  
    
      Petitioner Wins Majority
      Petitioner Loses Justice
      Petitioner Wins Justice
    
    
      Petitioner Loses Majority
      Petitioner Wins Justice
      Petitioner Loses Justice



In [37]:

    
jdf[['CASE', 'JUSTICE', 'VOTED_FOR_PETITIONER']].dropna().head(9)









    Out[37]:






  
    
      
      CASE
      JUSTICE
      VOTED_FOR_PETITIONER
    
  
  
    
      9
      Washington v. Recuenco
      JGRoberts
      True
    
    
      10
      Washington v. Recuenco
      JPStevens
      False
    
    
      11
      Washington v. Recuenco
      AScalia
      True
    
    
      12
      Washington v. Recuenco
      AMKennedy
      True
    
    
      13
      Washington v. Recuenco
      DHSouter
      True
    
    
      14
      Washington v. Recuenco
      CThomas
      True
    
    
      15
      Washington v. Recuenco
      RBGinsburg
      False
    
    
      16
      Washington v. Recuenco
      SGBreyer
      True
    
    
      17
      Washington v. Recuenco
      SAAlito
      True



In [38]:

    
# Define function.
def trim_columns(df):

    # Trim columns
    df = df[['DOCKET',
             'CASE',
             'JUSTICE',
             'PETITIONER_ARGUMENT',
             #'RESPONDENT_ARGUMENT',
             'PETITIONER_REBUTTAL',
             'VOTED_FOR_PETITIONER']]
    
    return df
    
    
# Run function.
jdf = trim_columns(jdf)



In [39]:

    
# Show joint dataframe for clarity.
jdf.head(3)









    Out[39]:






  
    
      
      DOCKET
      CASE
      JUSTICE
      PETITIONER_ARGUMENT
      PETITIONER_REBUTTAL
      VOTED_FOR_PETITIONER
    
  
  
    
      9
      05-83
      Washington v. Recuenco
      JGRoberts
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      True
    
    
      10
      05-83
      Washington v. Recuenco
      JPStevens
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      False
    
    
      11
      05-83
      Washington v. Recuenco
      AScalia
      {'JUSTICE SCALIA': ['Was deadly weapon still a...
      {'JUSTICE SCALIA': ['Thank you, counsel.

Mr. ...
      True



In [40]:

    
# Define filter function.
def filter_justice_data(row):
    '''Converts SCDB: RHJackson to JACKSON, which can be pulled from JUSTICE JACKSON.
    
       Then for JACKSON:
             {'JUSTICE JACKSON': [1, 2],
              'JUSTICE ROBERTS': [2, 3]}

       Becomes [1,2] for JACKSON's row.
    
    '''
    
    # Handle SCDB justice names. Based on capitalization
    # which messes up SDOConnor -> Connor
    # SDOConner should be OCONNER, not CONNER.
    if row['JUSTICE'] == 'SDOConnor':
        row['JUSTICE'] = 'SDOconnor'
    # Pick first lower case letter and start name one previous
    lower_mask = [letter.islower() for letter in row['JUSTICE']]
    first_lower = lower_mask.index(True)
    one_prior = first_lower - 1
    row['JUSTICE'] = row['JUSTICE'][one_prior:].upper()
            
    # Handle text columns
    for index in ['PETITIONER_ARGUMENT',
                  #'RESPONDENT_ARGUMENT',
                  'PETITIONER_REBUTTAL',]:
        # Find if justice name is in any of the keys.
        # 1 if found in string, 0 if not.
        # [1, 0, 0] -> True
        justice_represented = any([key.count(row['JUSTICE']) for key in row[index].keys()])
        # If represented, fill with value.
        if justice_represented:
            for key in row[index].keys():
                if row['JUSTICE'] in key and 'JUSTICE' in key:
                    try:
                        row[index] = row[index][key]
                    except TypeError:
                        # Fallback to edit distance?
                        pass
            # If a number has not been placed in the cell, place zero.
            if type(row[index]) == dict:
                row[index] = []
        # If not represented
        else:
            row[index] = []

    return row


# Apply function. If justice is NA ... not yet decided
jdf = (jdf.dropna(subset=['JUSTICE'])
          .apply(filter_justice_data, axis=1))



In [41]:

    
# Show joint dataframe for clarity
jdf.head(3)









    Out[41]:






  
    
      
      DOCKET
      CASE
      JUSTICE
      PETITIONER_ARGUMENT
      PETITIONER_REBUTTAL
      VOTED_FOR_PETITIONER
    
  
  
    
      9
      05-83
      Washington v. Recuenco
      ROBERTS
      [Is the jury given a\n\ncopy of the information?]
      [Although under\n\nAnd those are the\n\n\n\nso...
      True
    
    
      10
      05-83
      Washington v. Recuenco
      STEVENS
      [Could you -- could you\n\n\n\nclarify one thi...
      [But then we'd have Justice\n\n\n\nScalia's ca...
      False
    
    
      11
      05-83
      Washington v. Recuenco
      SCALIA
      [Was deadly weapon still an\n\n\n\nenhancement...
      [Thank you, counsel.\n\nMr. Whisman, can I -- ...
      True



In [42]:

    
# Write argument data df to csv
arg_data_csv_path = os.path.join(os.path.expanduser('~'),
                                 '.scoap',
                                 'argument_data.csv')
jdf.to_csv(arg_data_csv_path, encoding='utf-8')



In [43]:

    
# Create text_df
text_df = pd.melt(jdf,
                  id_vars=['JUSTICE',
                           'DOCKET',
                           'VOTED_FOR_PETITIONER'],
                  value_vars=['PETITIONER_ARGUMENT',
                              #'RESPONDENT_ARGUMENT',
                              'PETITIONER_REBUTTAL'],
                  var_name='ARG_TYPE',
                  value_name='TEXT')

text_df.head(3)









    Out[43]:






  
    
      
      JUSTICE
      DOCKET
      VOTED_FOR_PETITIONER
      ARG_TYPE
      TEXT
    
  
  
    
      0
      ROBERTS
      05-83
      True
      PETITIONER_ARGUMENT
      [Is the jury given a\n\ncopy of the information?]
    
    
      1
      STEVENS
      05-83
      False
      PETITIONER_ARGUMENT
      [Could you -- could you\n\n\n\nclarify one thi...
    
    
      2
      SCALIA
      05-83
      True
      PETITIONER_ARGUMENT
      [Was deadly weapon still an\n\n\n\nenhancement...



In [44]:

    
# Define function
def reorient_args(row):
    '''Apply function to make respondent arguments useful.
        
    WARNING: HAND-WAVY, UNSCIENTIFIC FEATURE ENGINEERING BELOW.
    THIS ACTUALLY DECREASES ACCURACY AT PRESENT.
    
    We hamfistedly force the petitioner argument, respondent
    argument, and petitioner rebuttal into a single type of
    entry. Where before we had:
    
    JUSTICE, PET_ARG, PETITIONER_WINS
    JUSTICE, RES_ARG, PETITIONER_WINS
    JUSTICE, PET_REB, PETITIONER_WINS
    
    We will now have:
    
    JUSTICE, PET_ARG, QUESTIONEE_WON
    JUSTICE, RES_ARG, QUESTIONEE_WON
    JUSTICE, PET_REB, QUESTIONEE_WON
    
    The first notable change is that we transform PETITIONER_WINS
    to QUESTIONEE_WON. Previously, we could see what text is
    associated with petitioner wins because target PET_WINS was
    framed in terms of the petitioner. It was previously useless
    for respondent comments. We can get around this by reframing
    the target in terms of "Did the party to whom the justice
    directed the comment win?" instead of "Did the petitioner
    win?".
    
    This requires a big assumption: namely that petitioner
    arguments, respondent arguments, and petitioner rebuttals
    are roughly interchangable. In other words, we are presuming
    that justices will use similar terms i.e. "Your argument is
    bad and you should feel bad" whether it's the petitioner or
    it's the respondent.
    
    This theoretically results in some loss of prediction quality:
    negative words directed at a respondent may be markedly
    different in quality from those directed at a petitioner.
    However, this trades off with the fact that we have uroughly
    doubled the number of samples.
    '''

    vote_pet = row['VOTED_FOR_PETITIONER']
    arg_type = row['ARG_TYPE']
    
    if arg_type in ['RESPONDENT_ARGUMENT']:
        if vote_pet is True:
            voted_for_speaker = False
        else:
            voted_for_speaker = True

    if arg_type in ['PETITIONER_ARGUMENT', 'PETITIONER_REBUTTAL']:
        if vote_pet is True:
            voted_for_speaker = True
        else:
            voted_for_speaker = False

    return voted_for_speaker


# Run function
text_df['QUESTIONEE_WON'] = text_df.apply(reorient_args, axis=1)
text_df









    Out[44]:






  
    
      
      JUSTICE
      DOCKET
      VOTED_FOR_PETITIONER
      ARG_TYPE
      TEXT
      QUESTIONEE_WON
    
  
  
    
      0
      ROBERTS
      05-83
      True
      PETITIONER_ARGUMENT
      [Is the jury given a\n\ncopy of the information?]
      True
    
    
      1
      STEVENS
      05-83
      False
      PETITIONER_ARGUMENT
      [Could you -- could you\n\n\n\nclarify one thi...
      False
    
    
      2
      SCALIA
      05-83
      True
      PETITIONER_ARGUMENT
      [Was deadly weapon still an\n\n\n\nenhancement...
      True
    
    
      3
      KENNEDY
      05-83
      True
      PETITIONER_ARGUMENT
      [On -- on that point, I have\n\n\n\n-- I have ...
      True
    
    
      4
      SOUTER
      05-83
      True
      PETITIONER_ARGUMENT
      [, Now, did you have to prove\n\n\n\nthat beca...
      True
    
    
      5
      THOMAS
      05-83
      True
      PETITIONER_ARGUMENT
      []
      True
    
    
      6
      GINSBURG
      05-83
      False
      PETITIONER_ARGUMENT
      [I thought the deadly weapon\n\n\n\n-- the def...
      False
    
    
      7
      BREYER
      05-83
      True
      PETITIONER_ARGUMENT
      []
      True
    
    
      8
      ALITO
      05-83
      True
      PETITIONER_ARGUMENT
      []
      True
    
    
      9
      ROBERTS
      05-259
      False
      PETITIONER_ARGUMENT
      [It has been endorsed\n\nby the EEOC, though]
      False
    
    
      10
      STEVENS
      05-259
      False
      PETITIONER_ARGUMENT
      [May I ask you this\n\n\n\nhypothetical?\n\n\n...
      False
    
    
      11
      SCALIA
      05-259
      False
      PETITIONER_ARGUMENT
      [But  has the language and\n\n\n\n doesn't.\n\...
      False
    
    
      12
      KENNEDY
      05-259
      False
      PETITIONER_ARGUMENT
      [from the forklift forever or a year?\n\n\n\nt...
      False
    
    
      13
      SOUTER
      05-259
      False
      PETITIONER_ARGUMENT
      [Okay, but if that argument\n\nis sound -, Yes...
      False
    
    
      14
      THOMAS
      05-259
      False
      PETITIONER_ARGUMENT
      []
      False
    
    
      15
      GINSBURG
      05-259
      False
      PETITIONER_ARGUMENT
      [They say one lunch, but\n\n\n\nnot if there's...
      False
    
    
      16
      BREYER
      05-259
      False
      PETITIONER_ARGUMENT
      [Well, the answer would be\n\n\n\nbecause Cong...
      False
    
    
      17
      ALITO
      05-259
      False
      PETITIONER_ARGUMENT
      [But he says, you know, as you\n\nIsn't a chan...
      False
    
    
      18
      STEVENS
      05-352
      False
      PETITIONER_ARGUMENT
      [How do you reconcile your\n\n\n\nposition wit...
      False
    
    
      19
      SCALIA
      05-352
      False
      PETITIONER_ARGUMENT
      [When did -- when did we\n\n\n\nfirst hold tha...
      False
    
    
      20
      KENNEDY
      05-352
      True
      PETITIONER_ARGUMENT
      [But, of\n\nWell, that was my -- even\n\n\n\ni...
      True
    
    
      21
      SOUTER
      05-352
      False
      PETITIONER_ARGUMENT
      [It's a\n\nThe only issue in that case\n\n\n\n...
      False
    
    
      22
      THOMAS
      05-352
      True
      PETITIONER_ARGUMENT
      []
      True
    
    
      23
      GINSBURG
      05-352
      False
      PETITIONER_ARGUMENT
      [But in this -- in this\n\n\n\ncase, Mr. Dreeb...
      False
    
    
      24
      BREYER
      05-352
      False
      PETITIONER_ARGUMENT
      []
      False
    
    
      25
      ROBERTS
      05-352
      True
      PETITIONER_ARGUMENT
      [Well, but what if --\n\n\n\njust to take an e...
      True
    
    
      26
      ALITO
      05-352
      True
      PETITIONER_ARGUMENT
      [It's actually easier than\n\nWell, why would ...
      True
    
    
      27
      STEVENS
      05-5992
      True
      PETITIONER_ARGUMENT
      []
      True
    
    
      28
      SCALIA
      05-5992
      True
      PETITIONER_ARGUMENT
      [If the\n\nThe delay -- the delay was\n\n\n\nn...
      True
    
    
      29
      KENNEDY
      05-5992
      True
      PETITIONER_ARGUMENT
      [Prior to that time,\nIt couldn't\n\nYou're no...
      True
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      14116
      ALITO
      14-280
      False
      PETITIONER_REBUTTAL
      []
      False
    
    
      14117
      SOTOMAYOR
      14-280
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14118
      KAGAN
      14-280
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14119
      ROBERTS
      14-7505
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14120
      SCALIA
      14-7505
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14121
      KENNEDY
      14-7505
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14122
      THOMAS
      14-7505
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14123
      GINSBURG
      14-7505
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14124
      BREYER
      14-7505
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14125
      ALITO
      14-7505
      False
      PETITIONER_REBUTTAL
      []
      False
    
    
      14126
      SOTOMAYOR
      14-7505
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14127
      KAGAN
      14-7505
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14128
      ROBERTS
      14-857
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14129
      SCALIA
      14-857
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14130
      KENNEDY
      14-857
      False
      PETITIONER_REBUTTAL
      []
      False
    
    
      14131
      THOMAS
      14-857
      False
      PETITIONER_REBUTTAL
      []
      False
    
    
      14132
      GINSBURG
      14-857
      False
      PETITIONER_REBUTTAL
      []
      False
    
    
      14133
      BREYER
      14-857
      False
      PETITIONER_REBUTTAL
      []
      False
    
    
      14134
      ALITO
      14-857
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14135
      SOTOMAYOR
      14-857
      False
      PETITIONER_REBUTTAL
      []
      False
    
    
      14136
      KAGAN
      14-857
      False
      PETITIONER_REBUTTAL
      []
      False
    
    
      14137
      ROBERTS
      14-840
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14138
      SCALIA
      14-840
      False
      PETITIONER_REBUTTAL
      []
      False
    
    
      14139
      KENNEDY
      14-840
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14140
      THOMAS
      14-840
      False
      PETITIONER_REBUTTAL
      []
      False
    
    
      14141
      GINSBURG
      14-840
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14142
      BREYER
      14-840
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14143
      ALITO
      14-840
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14144
      SOTOMAYOR
      14-840
      True
      PETITIONER_REBUTTAL
      []
      True
    
    
      14145
      KAGAN
      14-840
      True
      PETITIONER_REBUTTAL
      []
      True
    
  

14146 rows × 6 columns



In [45]:

    
# Define function
def create_text_df(df):
    '''Clean up text.'''
    
    # ' '.join[question_1, question_2, question_3] so single string
    df['TEXT'] = df['TEXT'].map(lambda item: ' '.join(item))

    # Create string of punctuation chars to remove (but not '-')
    punctuation = string.punctuation.replace('-', '').replace('/', '')
        
    # Remove punctuation via [.!?,;:] regex
    df['TEXT'] = df['TEXT'].str.replace('[' +
                                        punctuation +
                                        ']',
                                        # Replacement value
                                        '')

    # Remove double dash pattern
    df['TEXT'] = df['TEXT'].str.replace('--',
                                        # Replacement value
                                        '')
    
    # Get rid of all items without text.
    df = df.loc[df['TEXT'].str.strip().str.len() > 0,:]

    return df


# Run function
text_df = create_text_df(text_df)
text_df









    Out[45]:






  
    
      
      JUSTICE
      DOCKET
      VOTED_FOR_PETITIONER
      ARG_TYPE
      TEXT
      QUESTIONEE_WON
    
  
  
    
      0
      ROBERTS
      05-83
      True
      PETITIONER_ARGUMENT
      Is the jury given a\n\ncopy of the information
      True
    
    
      1
      STEVENS
      05-83
      False
      PETITIONER_ARGUMENT
      Could you  could you\n\n\n\nclarify one thing ...
      False
    
    
      2
      SCALIA
      05-83
      True
      PETITIONER_ARGUMENT
      Was deadly weapon still an\n\n\n\nenhancement ...
      True
    
    
      3
      KENNEDY
      05-83
      True
      PETITIONER_ARGUMENT
      On  on that point I have\n\n\n\n I have one qu...
      True
    
    
      4
      SOUTER
      05-83
      True
      PETITIONER_ARGUMENT
      Now did you have to prove\n\n\n\nthat because...
      True
    
    
      6
      GINSBURG
      05-83
      False
      PETITIONER_ARGUMENT
      I thought the deadly weapon\n\n\n\n the defini...
      False
    
    
      9
      ROBERTS
      05-259
      False
      PETITIONER_ARGUMENT
      It has been endorsed\n\nby the EEOC though
      False
    
    
      10
      STEVENS
      05-259
      False
      PETITIONER_ARGUMENT
      May I ask you this\n\n\n\nhypothetical\n\n\n\n...
      False
    
    
      11
      SCALIA
      05-259
      False
      PETITIONER_ARGUMENT
      But  has the language and\n\n\n\n doesnt\n\n\n...
      False
    
    
      12
      KENNEDY
      05-259
      False
      PETITIONER_ARGUMENT
      from the forklift forever or a year\n\n\n\nthe...
      False
    
    
      13
      SOUTER
      05-259
      False
      PETITIONER_ARGUMENT
      Okay but if that argument\n\nis sound - Yes bu...
      False
    
    
      15
      GINSBURG
      05-259
      False
      PETITIONER_ARGUMENT
      They say one lunch but\n\n\n\nnot if theres a ...
      False
    
    
      16
      BREYER
      05-259
      False
      PETITIONER_ARGUMENT
      Well the answer would be\n\n\n\nbecause Congre...
      False
    
    
      17
      ALITO
      05-259
      False
      PETITIONER_ARGUMENT
      But he says you know as you\n\nIsnt a change i...
      False
    
    
      18
      STEVENS
      05-352
      False
      PETITIONER_ARGUMENT
      How do you reconcile your\n\n\n\nposition with...
      False
    
    
      19
      SCALIA
      05-352
      False
      PETITIONER_ARGUMENT
      When did  when did we\n\n\n\nfirst hold that t...
      False
    
    
      20
      KENNEDY
      05-352
      True
      PETITIONER_ARGUMENT
      But of\n\nWell that was my  even\n\n\n\nin the...
      True
    
    
      21
      SOUTER
      05-352
      False
      PETITIONER_ARGUMENT
      Its a\n\nThe only issue in that case\n\n\n\nis...
      False
    
    
      23
      GINSBURG
      05-352
      False
      PETITIONER_ARGUMENT
      But in this  in this\n\n\n\ncase Mr Dreeben we...
      False
    
    
      25
      ROBERTS
      05-352
      True
      PETITIONER_ARGUMENT
      Well but what if \n\n\n\njust to take an examp...
      True
    
    
      26
      ALITO
      05-352
      True
      PETITIONER_ARGUMENT
      Its actually easier than\n\nWell why would it ...
      True
    
    
      28
      SCALIA
      05-5992
      True
      PETITIONER_ARGUMENT
      If the\n\nThe delay  the delay was\n\n\n\nnot ...
      True
    
    
      29
      KENNEDY
      05-5992
      True
      PETITIONER_ARGUMENT
      Prior to that time\nIt couldnt\n\nYoure not qu...
      True
    
    
      30
      SOUTER
      05-5992
      True
      PETITIONER_ARGUMENT
      And it makes very clear\n\nNo but isnt  isnt t...
      True
    
    
      32
      GINSBURG
      05-5992
      True
      PETITIONER_ARGUMENT
      For\n\nWhat about Mr Zas that\n\n\n\nthe court...
      True
    
    
      33
      BREYER
      05-5992
      True
      PETITIONER_ARGUMENT
      Yes but on  on that one\n\n\n\nthe  the Second...
      True
    
    
      34
      ROBERTS
      05-5992
      True
      PETITIONER_ARGUMENT
      of justice finding for all time\n\n\n\nbeginni...
      True
    
    
      36
      STEVENS
      05-18
      False
      PETITIONER_ARGUMENT
      Do you think the right\n\n\n\nincludes any cos...
      False
    
    
      37
      SCALIA
      05-18
      True
      PETITIONER_ARGUMENT
      I thought the GAO study\n\n\n\nincluded not ju...
      True
    
    
      38
      KENNEDY
      05-18
      True
      PETITIONER_ARGUMENT
      But\n\nI  I would have thought\n\n\n\nthat you...
      True
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      13149
      SCALIA
      13-550
      True
      PETITIONER_REBUTTAL
      But if  if we agree that \n\nthere had to be...
      True
    
    
      13150
      KENNEDY
      13-550
      True
      PETITIONER_REBUTTAL
      Where  where are you \n\nreading from Well y...
      True
    
    
      13152
      GINSBURG
      13-550
      True
      PETITIONER_REBUTTAL
      You also said did you \n\n\n\nnot that  that...
      True
    
    
      13153
      BREYER
      13-550
      True
      PETITIONER_REBUTTAL
      Do you think both parties \n\n\n\nagree that t...
      True
    
    
      13154
      ALITO
      13-550
      True
      PETITIONER_REBUTTAL
      If we forget what happened \n\n\n\nat earlier ...
      True
    
    
      13155
      SOTOMAYOR
      13-550
      True
      PETITIONER_REBUTTAL
      One of the things that I \n\n\n\nwas looking f...
      True
    
    
      13156
      KAGAN
      13-550
      True
      PETITIONER_REBUTTAL
      And if I could just ask \n\n\n\nagain Mr Frede...
      True
    
    
      13382
      ROBERTS
      13-1174
      True
      PETITIONER_REBUTTAL
      Well hear argument \n\n\n\nfirst this morning ...
      True
    
    
      13383
      SCALIA
      13-1174
      True
      PETITIONER_REBUTTAL
      But these statutes do not \n\n\n\nspeak of con...
      True
    
    
      13384
      KENNEDY
      13-1174
      True
      PETITIONER_REBUTTAL
      But  but your  your \n\n\n\nanswer is that...
      True
    
    
      13386
      GINSBURG
      13-1174
      True
      PETITIONER_REBUTTAL
      Doesnt Rule  say \n\nunless otherwise provided...
      True
    
    
      13387
      BREYER
      13-1174
      True
      PETITIONER_REBUTTAL
      How do you do that \n\nBecause not  not all ...
      True
    
    
      13388
      ALITO
      13-1174
      True
      PETITIONER_REBUTTAL
      Well but that \nAnd that is Petitioners bri...
      True
    
    
      13389
      SOTOMAYOR
      13-1174
      True
      PETITIONER_REBUTTAL
      Could you tell me \n\n\n\nfirst what differenc...
      True
    
    
      13390
      KAGAN
      13-1174
      True
      PETITIONER_REBUTTAL
      Are you then saying that \n\n\n\ntheres no pra...
      True
    
    
      13409
      ROBERTS
      13-1075
      True
      PETITIONER_REBUTTAL
      We will hear \n\n\n\nargument next this mornin...
      True
    
    
      13410
      SCALIA
      13-1075
      True
      PETITIONER_REBUTTAL
      Didnt  didnt the \n\n\n\nprivate right of a...
      True
    
    
      13411
      KENNEDY
      13-1075
      False
      PETITIONER_REBUTTAL
      Were  were those \nDidnt our cases \n...
      False
    
    
      13413
      GINSBURG
      13-1075
      False
      PETITIONER_REBUTTAL
      I thought at least if a \n\n\n\nstatute passed...
      False
    
    
      13414
      BREYER
      13-1075
      False
      PETITIONER_REBUTTAL
      Well if that  if thats \n\nall  If in fact...
      False
    
    
      13416
      SOTOMAYOR
      13-1075
      False
      PETITIONER_REBUTTAL
      You are equating \n\n\n\nreasonable cause or ...
      False
    
    
      13417
      KAGAN
      13-1075
      False
      PETITIONER_REBUTTAL
      Ms  Ms Prelogar Im \n\n\n\nwondering what y...
      False
    
    
      13427
      ROBERTS
      13-435
      True
      PETITIONER_REBUTTAL
      Well hear argument \n\n\n\nnext this morning i...
      True
    
    
      13428
      SCALIA
      13-435
      True
      PETITIONER_REBUTTAL
      Do you think that  Justice Scalia \nDo you t...
      True
    
    
      13429
      KENNEDY
      13-435
      True
      PETITIONER_REBUTTAL
      If we adopt your position \n\n\n\nwhich I take...
      True
    
    
      13431
      GINSBURG
      13-435
      True
      PETITIONER_REBUTTAL
      So youre saying that \n\nthis evidence what we...
      True
    
    
      13432
      BREYER
      13-435
      True
      PETITIONER_REBUTTAL
      But suppose it is actually \n\n\n\ndisputed an...
      True
    
    
      13433
      ALITO
      13-435
      True
      PETITIONER_REBUTTAL
      Well that may be true but \n\n\n\ndo you deny ...
      True
    
    
      13434
      SOTOMAYOR
      13-435
      True
      PETITIONER_REBUTTAL
      Whats wrong with that \n\n\n\nTheres an assump...
      True
    
    
      13435
      KAGAN
      13-435
      True
      PETITIONER_REBUTTAL
      Well Mr Shanmugam \n\nsuppose  Mr Shanmugam ...
      True
    
  

5225 rows × 6 columns



In [46]:

    
# Create test/train split for text data
split = sklearn.model_selection.train_test_split

# Split test and train.
train_text_df, test_text_df = split(text_df, test_size = 0.2)
train_text_df = train_text_df.copy()
test_text_df = test_text_df.copy()



In [47]:

    
train_text_df.head(3)









    Out[47]:






  
    
      
      JUSTICE
      DOCKET
      VOTED_FOR_PETITIONER
      ARG_TYPE
      TEXT
      QUESTIONEE_WON
    
  
  
    
      4984
      SOTOMAYOR
      11-1351
      True
      PETITIONER_ARGUMENT
      Well one of the\n\n\n\nstrongest arguments by ...
      True
    
    
      4969
      SCALIA
      11-9953
      False
      PETITIONER_ARGUMENT
      Mr Bourke was  was it\n\nwithin the control of...
      False
    
    
      7278
      ROBERTS
      04-10566
      False
      PETITIONER_REBUTTAL
      Where you have a treaty that becomes part of\n...
      False



In [48]:

    
train_text_df['JUSTICE'].unique()









    Out[48]:





array(['SOTOMAYOR', 'SCALIA', 'ROBERTS', 'GINSBURG', 'ALITO', 'STEVENS',
       'KENNEDY', 'BREYER', 'KAGAN', 'SOUTER', 'THOMAS'], dtype=object)



In [49]:

    
# Define function
def create_pipelines(df):
    '''Creates pipelines for each justice.'''
    # Basic setup.
    gb = text_df.groupby('JUSTICE')
    justices = df['JUSTICE'].unique()
    dataframes = [gb.get_group(justice) for justice in justices]
    nb_pipelines = []
    sgd_pipelines = []
    rf_pipelines = []
    
    # Probably a vectorized way to do this.
    for justice, dataframe in zip(justices, dataframes):

        # Make aliases
        Pipe = sklearn.pipeline.Pipeline
        Vectorizer = sklearn.feature_extraction.text.CountVectorizer
        Transformer = sklearn.feature_extraction.text.TfidfTransformer
        MultiNB = sklearn.naive_bayes.MultinomialNB
        SGD = sklearn.linear_model.SGDClassifier
        RF = sklearn.ensemble.RandomForestClassifier

        # Reuseable arguments.
        vectorizer_params = {'ngram_range': (3, 5),
                             'min_df': 10}
        
        transformer_params = {'use_idf': True}

        ############# Multinomial Naive Bayes classifier
        nb_pipeline = Pipe([('vectorizer', Vectorizer(**vectorizer_params)),
                            ('transformer', Transformer(**transformer_params)),
                            ('classifier', MultiNB()),])
        try:
            nb_pipeline = nb_pipeline.fit(dataframe['TEXT'],
                                          dataframe['QUESTIONEE_WON'])
        except (ValueError, AttributeError):
            nb_pipeline = None
        nb_pipelines.append(nb_pipeline)

        ############ Gradient descent SGD
        sgd_pipeline = Pipe([('vectorizer', Vectorizer(**vectorizer_params)),
                             ('transformer', Transformer(**transformer_params)),
                             ('classifier', SGD(loss='log', penalty='l2')),])    
        try:
            sgd_pipeline = sgd_pipeline.fit(dataframe['TEXT'],
                                            dataframe['QUESTIONEE_WON'])
        except (ValueError, AttributeError):
            sgd_pipeline = None
        sgd_pipelines.append(sgd_pipeline)
        
        ############ RF
        rf_pipeline = Pipe([('vectorizer', Vectorizer(**vectorizer_params)),
                            ('transformer', Transformer(**transformer_params)),
                            ('classifier', RF(n_estimators=100))])    
        try:
            rf_pipeline = rf_pipeline.fit(dataframe['TEXT'],
                                          dataframe['QUESTIONEE_WON'])
        except (ValueError, AttributeError):
            rf_pipeline = None
        rf_pipelines.append(rf_pipeline)
    
    return [item for item in zip(justices,
                                 nb_pipelines, 
                                 sgd_pipelines, 
                                 rf_pipelines)]


# Create test and train pipelines
pipelines = create_pipelines(train_text_df)



In [50]:

    
pipelines[0][1]









    Out[50]:





Pipeline(steps=[('vectorizer', CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=10,
        ngram_range=(3, 5), preprocessor=None, stop_words=None,
      ...f=False, use_idf=True)), ('classifier', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])



In [51]:

    
# Define function for creating an argument for add_predictions()
def create_model_dict(model_pipelines):
    '''For convenience we create an associative array of models.'''
    model_dict = {}
    for justice, nb_pipe, sgd_pipe, rf_pipe in model_pipelines:
        # Nested dicts
        model_dict[justice] = {}
        model_dict[justice]['SGD'] = sgd_pipe
        model_dict[justice]['NB'] = nb_pipe
        model_dict[justice]['RF'] = rf_pipe 
    return model_dict


# Run function
model_dict = create_model_dict(pipelines)



In [52]:

    
# Define function to add predictions to test frame
def add_predictions(row, model_dict, model_type):
    '''Apply() function for adding predictions'''
    justice_name = row['JUSTICE']
    try:
        model = model_dict[justice_name][model_type]
        prediction = model.predict([row['TEXT']])[0]
    # If no model, predict will not be an attribute.
    # No justice, no peace (also no model).
    except (KeyError, AttributeError):
        return np.NaN
    return prediction


test_text_df['NB_PREDICTION'] = test_text_df.apply(add_predictions,
                                                   args=(model_dict, 'NB'),
                                                   axis=1).astype(bool)

test_text_df['SGD_PREDICTION'] = test_text_df.apply(add_predictions,
                                                    args=(model_dict, 'SGD'),
                                                    axis=1).astype(bool)

test_text_df['RF_PREDICTION'] = test_text_df.apply(add_predictions,
                                                   args=(model_dict, 'RF'),
                                                   axis=1).astype(bool)



In [53]:

    
test_text_df.head(3)









    Out[53]:






  
    
      
      JUSTICE
      DOCKET
      VOTED_FOR_PETITIONER
      ARG_TYPE
      TEXT
      QUESTIONEE_WON
      NB_PREDICTION
      SGD_PREDICTION
      RF_PREDICTION
    
  
  
    
      8900
      STEVENS
      06-666
      True
      PETITIONER_REBUTTAL
      We are\n\nWe are talking about not\n\njust a d...
      True
      False
      False
      False
    
    
      8551
      KENNEDY
      06-11429
      False
      PETITIONER_REBUTTAL
      It is simply our\n\nWell but there has to be\n...
      False
      True
      True
      False
    
    
      4644
      ROBERTS
      11-798
      True
      PETITIONER_ARGUMENT
      That matters  that\n\n\n\nmatters in your view...
      True
      True
      True
      True



In [54]:

    
# Check output for clarity
test_text_df.head(3)

# Assess accuracy
score = sklearn.metrics.accuracy_score
test_text_df = test_text_df.dropna()

# Conduct scoring
nb_score = score(test_text_df['QUESTIONEE_WON'],
                 test_text_df['NB_PREDICTION'])
sgd_score = score(test_text_df['QUESTIONEE_WON'],
                  test_text_df['SGD_PREDICTION'])
rf_score = score(test_text_df['QUESTIONEE_WON'],
                 test_text_df['RF_PREDICTION'])

# Format as string
base_string = '''
\n
The Naive Bayes model scored {:.1%}.\n\n
The Stochastic Gradient Decent model scored {:.1%}.\n\n
The Random Forest model scored {:.1%}.\n\n

This can't be real. TODO.
'''

print(base_string.format(nb_score, sgd_score, rf_score))









    





The Naive Bayes model scored 72.9%.


The Stochastic Gradient Decent model scored 83.7%.


The Random Forest model scored 94.6%.



This can't be real. TODO.



In [55]:

    
score = sklearn.metrics.roc_auc_score(test_text_df['QUESTIONEE_WON'].values,
                                      test_text_df['RF_PREDICTION'].values)

score









    Out[55]:





0.94360588874499463



In [56]:

    
# Define function
def get_nb_phrases(nb_pipeline, number):
    '''Pull relevant phrases from model'''
    nb_vec = nb_pipeline.named_steps['vectorizer']
    nb_clf = nb_pipeline.named_steps['classifier']
    nb_names = nb_vec.get_feature_names()
    # nb_clf.feature_log_prob_[0] is for False (voted against party)
    # nb_clf.feature_log_prob_[1] is for True (voted for party)
    nb_probs = nb_clf.feature_log_prob_[1]
    nb_series = pd.Series({name: prob
                           for name, prob
                           in zip(nb_names, nb_probs)})
    # Turn into series
    top_values_nb = nb_series.sort_values(ascending=False).head(number).copy()
    top_values_nb.name = 'Top Naive Bayes Log Prob'
    bottom_values_nb = nb_series.sort_values(ascending=True).head(number).copy()
    bottom_values_nb.name = 'Bottom Naive Bayes Log Prob'
    return (top_values_nb, bottom_values_nb)


# Define function
def get_sgd_phrases(sgd_pipeline, number):
    '''Pull phrases from model.'''
    sgd_clf = sgd_pipeline.named_steps['classifier']
    sgd_vec = sgd_pipeline.named_steps['vectorizer']
    sgd_names = sgd_vec.get_feature_names()
    # sgd_clf.coef_[0] is for False (voted against party)
    sgd_probs = sgd_clf.coef_[0]
    sgd_series = pd.Series({name: prob
                            for name, prob
                            in zip(sgd_names, sgd_probs)})
    # Turn into series.
    top_values_sgd = sgd_series.sort_values(ascending=False).head(number).copy()
    top_values_sgd.name = 'Top SGD Log Prob'
    bottom_values_sgd = sgd_series.sort_values(ascending=True).head(number).copy()
    bottom_values_sgd.name = 'Bottom SGD Log Prob'
    return(top_values_sgd, bottom_values_sgd)


# Define function
def get_rf_phrases(rf_pipeline, number):
    '''Pull phrases from model. Importances are both top and bottom items.'''
    rf_clf = rf_pipeline.named_steps['classifier']
    rf_vec = rf_pipeline.named_steps['vectorizer']
    rf_names = rf_vec.get_feature_names()
    # rf_clf.geature_importances
    rf_probs = rf_clf.feature_importances_
    rf_series = pd.Series({name: prob
                           for name, prob
                           in zip(rf_names, rf_probs)})
    # Turn into series.
    top_values_rf = rf_series.sort_values(ascending=False).head(number).copy()
    top_values_rf.name = 'Top RF Feature Imp'
    bottom_values_rf = rf_series.sort_values(ascending=True).head(number).copy()
    bottom_values_rf.name = 'Bottom RF Feature Imp'
    return(top_values_rf, bottom_values_rf)



In [57]:

    
# Define function
def create_phrase_series(pipelines, number=500):
    '''Top and bottom phrase DFs. Two columns per justice in each (SGD & NB models).'''
    
    # Create data holding dicts
    return_value = []
    
    # Iterate through pipelines to get data we need.
    for justice, nb_pipeline, sgd_pipeline, rf_pipeline in pipelines:
        
        # Skip any empty pipelines (insufficient comments)
        if any([nb_pipeline is None, 
                sgd_pipeline is None,
                rf_pipeline is None]):
            continue

        # Get actual phrases
        top_values_nb, bottom_values_nb = get_nb_phrases(nb_pipeline,
                                                         number)
        top_values_sgd, bottom_values_sgd = get_sgd_phrases(sgd_pipeline,
                                                            number)
        top_values_rf, bottom_values_rf = get_rf_phrases(rf_pipeline,
                                                         number)

        # Add to return value
        return_value.append({'justice': justice,
                             'TOP_NB': top_values_nb,
                             'BOTTOM_NB': bottom_values_nb,
                             'TOP_SGD': top_values_sgd,
                             'BOTTOM_SGD': bottom_values_sgd,
                             'TOP_RF': top_values_rf,
                             'BOTTOM_RF': bottom_values_rf})

    # Return list of dicts.
    return return_value
        
    
# Run function
justice_data = create_phrase_series(pipelines)



In [58]:

    
# Show sample data for clarity
pd.DataFrame(justice_data[6]['BOTTOM_RF']).head(3)









    Out[58]:






  
    
      
      Bottom RF Feature Imp
    
  
  
    
      would have to
      0.000326
    
    
      to make the
      0.000343
    
    
      are talking about
      0.000351



In [59]:

    
# Define function
def create_frequency_dfs(justice_data, text_df):
    '''This function takes bottom phrases and computes frequency.'''
        
    # Results
    bottom_phrase_results = []
    
    for data_dict in justice_data:
        
        # Get justice name
        justice = data_dict['justice']

        # Get bottom values
        bottom_nb_phrases = data_dict['BOTTOM_NB']
        bottom_sgd_phrases = data_dict['BOTTOM_SGD']
        bottom_rf_phrases = data_dict['BOTTOM_RF']
        bottom_phrases = (bottom_nb_phrases.append(bottom_sgd_phrases)
                                           .append(bottom_rf_phrases)
                                           .drop_duplicates()
                                           .index.values)
        
        # Create won and lost dataframes.
        won_df = text_df[(text_df['JUSTICE'] == justice) &
                         (text_df['QUESTIONEE_WON'] == True)]
        lost_df = text_df[(text_df['JUSTICE'] == justice) &
                          (text_df['QUESTIONEE_WON'] == False)]
        
        # To string
        won_string = won_df['TEXT'].str.lower().str.cat(sep=' ')
        lost_string = lost_df['TEXT'].str.lower().str.cat(sep=' ')

   
        # Calculate bottom phrases
        for phrase in bottom_phrases:
            won_count = 0
            lost_count = 0
            
            # Get counts
            won_count += won_string.count(phrase)
            lost_count += lost_string.count(phrase)
            all_count = won_count + lost_count
            if all_count == 0:
                percentage = np.NaN
            else:
                percentage = won_count / all_count
            # Stick in results (list of dicts)
            bottom_phrase_results.append({'JUSTICE': justice,
                                          'PHRASE': phrase,
                                          'AT_WINNER_COUNT': won_count,
                                          'AT_LOSER_COUNT': lost_count,
                                          'AT_WINNER_PERCENT': percentage})

    # Create bottom dataframe
    bottom_df = pd.DataFrame(bottom_phrase_results)
    bottom_df = bottom_df.set_index(['JUSTICE', 'PHRASE'])
    bottom_df = bottom_df[['AT_WINNER_COUNT', 'AT_LOSER_COUNT', 'AT_WINNER_PERCENT']]
    bottom_df['AT_LOSER_PERCENT'] = 1 - bottom_df['AT_WINNER_PERCENT']

    return bottom_df


# Run function
bottom_freq_df = create_frequency_dfs(justice_data, text_df)
bottom_freq_df.head(5).dropna()









    Out[59]:






  
    
      
      
      AT_WINNER_COUNT
      AT_LOSER_COUNT
      AT_WINNER_PERCENT
      AT_LOSER_PERCENT
    
    
      JUSTICE
      PHRASE
      
      
      
      
    
  
  
    
      SOTOMAYOR
      why should we
      2
      10
      0.166667
      0.833333
    
    
      could you tell me what
      3
      5
      0.375000
      0.625000
    
    
      that there are
      2
      6
      0.250000
      0.750000
    
    
      are you suggesting
      2
      7
      0.222222
      0.777778
    
    
      between the two
      2
      5
      0.285714
      0.714286



In [60]:

    
# Write bottom_freq_df to file
bottom_csv_path = os.path.join(DATA_FOLDER, 'bottom_phrases.csv')
bottom_freq_df.to_csv(bottom_csv_path, encoding='utf-8')



In [61]:

    
# Define function
def create_tabulation_df():
    '''(Case, justice, arg_vect) x  model.'''
    
    # Make justice/model multiindex for columns
    cases = CURRENT_CASES
    # Really should have standardized this earlier
    justices = [justice.upper() for justice in CURRENT_JUSTICES]
    arg_types = ['PETITIONER_ARGUMENT',
                 'RESPONDENT_ARGUMENT',
                 'PETITIONER_REBUTTAL']
    models = ['NB', 'SGD', 'RF']
    cja_index = pd.MultiIndex.from_product([cases, justices, arg_types])

    # Make dataframe
    tabulation_df = pd.DataFrame(index=cja_index, columns=models, data=np.NaN)
    return tabulation_df


# Run function
tabulation_df = create_tabulation_df()



In [62]:

    
# Demo for clarity ... should be empty.
tabulation_df.head(3)









    Out[62]:






  
    
      
      
      
      NB
      SGD
      RF
    
  
  
    
      15-214
      ROBERTS
      PETITIONER_ARGUMENT
      NaN
      NaN
      NaN
    
    
      RESPONDENT_ARGUMENT
      NaN
      NaN
      NaN
    
    
      PETITIONER_REBUTTAL
      NaN
      NaN
      NaN



In [63]:

    
def make_current_df(arg_df):
    # Create a lookup dataframe.
    lookup_df = arg_df[arg_df['DOCKET'].isin(CURRENT_CASES)]
    output_rows = []
    input_rows = [row.to_dict() for index, row in lookup_df.iterrows()]
    for justice in CURRENT_JUSTICES:
        for row_dict in input_rows:
            dict_copy = copy.deepcopy(row_dict)
            dict_copy['JUSTICE'] = justice
            output_rows.append(dict_copy)
    return pd.DataFrame.from_dict(output_rows)

# Create new df
current_df = make_current_df(arg_df)
# Run previous functions
current_df = current_df.apply(filter_justice_data, axis=1)
current_df.head(3)









    Out[63]:






  
    
      
      ARGUMENT_LINK
      ARGUMENT_PATH
      ARGUMENT_YEAR
      CASE
      DOCKET
      JUSTICE
      PETITIONER_ARGUMENT
      PETITIONER_REBUTTAL
      PET_ARG_HEADING
      PET_ARG_REGEX
      PET_REB_HEADING
      PET_REB_REGEX
      RESPONDENT_ARGUMENT
      RES_ARG_HEADING
      RES_ARG_REGEX
      TEXT
    
  
  
    
      0
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/15-214/argument.pdf
      2016
      Murr v. Wisconsin
      15-214
      ROBERTS
      [, , , And those would\n\nI thought your\n\n\n...
      []
      ORAL ARGUMENT OF JOHN M. GROEN\n\n\n\nON BEHAL...
      ORAL ARGUMENT OF JOHN M. GROEN\n\n\n\nON BEHAL...
      REBUTTAL ARGUMENT OF JOHN M. GROEN\n\n\n\nON B...
      REBUTTAL ARGUMENT OF JOHN M. GROEN\n\n\n\nON B...
      {'CHIEF JUSTICE ROBERTS': ['Well, that's -- an...
      ORAL ARGUMENT OF RICHARD J. LAZARUS\nON BEHALF...
      ORAL ARGUMENT OF RICHARD J. LAZARUS\nON BEHALF...
      \n\n\n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe w...
    
    
      1
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/15-1031/argument.pdf
      2016
      Howell v. Howell
      15-1031
      ROBERTS
      [This -- this is a\n\n\n\npretty basic questio...
      []
      ORAL ARGUMENT OF ADAM G. UNIKOWSKY\n\n\n\nON B...
      ORAL ARGUMENT OF ADAM G. UNIKOWSKY\n\n\n\nON B...
      REBUTTAL ARGUMENT OF ADAM G. UNIKOWSKI\n\n\n\n...
      REBUTTAL ARGUMENT OF ADAM G. UNIKOWSKI\n\n\n\n...
      {'CHIEF JUSTICE ROBERTS': ['', 'I think -What ...
      ORAL ARGUMENT OF CHARLES W. WIRKEN\n\n\n\nON B...
      ORAL ARGUMENT OF CHARLES W. WIRKEN\n\n\n\nON B...
      \n\n\n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe'l...
    
    
      2
      https://www.supremecourt.gov/oral_arguments/ar...
      /home/theo/.scoap/15-1189/argument.pdf
      2016
      Impressions Products, Inc. v. Lexmark Int'l, Inc.
      15-1189
      ROBERTS
      [I'm sorry to\n\n\n\ninterrupt you, but that l...
      []
      ORAL ARGUMENT OF ANDREW J. PINCUS\n\n\n\nON BE...
      ORAL ARGUMENT OF ANDREW J. PINCUS\n\n\n\nON BE...
      REBUTTAL ARGUMENT OF ANDREW J. PINCUS\n\n\n\nO...
      REBUTTAL ARGUMENT OF ANDREW J. PINCUS\n\n\n\nO...
      {'CHIEF JUSTICE ROBERTS': ['Why -- why is norm...
      ORAL ARGUMENT OF CONSTANTINE L. TRELA, JR.\n\n...
      ORAL ARGUMENT OF CONSTANTINE L. TRELA, JR.\n\n...
      \n\n\n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe'l...



In [64]:

    
# Define function
def create_lookup_series(current_df):
    '''Place text in df for processing. Each justice gets same data.'''
    # Flatten and reindex
    lookup_df = current_df[['DOCKET',
                            'JUSTICE',
                            'PETITIONER_ARGUMENT',
                            #'RESPONDENT_ARGUMENT',
                            'PETITIONER_REBUTTAL']]
    lookup_df = lookup_df.set_index(['DOCKET', 'JUSTICE'])
    # Flatten lists
    lookup_df = lookup_df.applymap(lambda x: ' '.join(x))
    # Make lookup series
    lookup_series = lookup_df.stack()
    # Sort and dedupe (where do dupes come from?)
    lookup_series.sort_index(inplace=True)
    lookup_series.drop_duplicates(inplace=True)
    return lookup_series


# Run
lookup_series = create_lookup_series(current_df)



In [65]:

    
lookup_series.head(3)









    Out[65]:





DOCKET   JUSTICE                      
14-1055  ALITO     PETITIONER_ARGUMENT                                                     
         BREYER    PETITIONER_ARGUMENT    It's tough.\n\nI mean, I find\n\n\n\nthis pret...
         GINSBURG  PETITIONER_ARGUMENT    Does that include --\n\n\n\nyou -- you said su...
dtype: object



In [66]:

    
# Define function
def populate_tabulation_df(row, lookup_series):
    '''Fill in the dataframe. Meant to be applied.'''
    try:
        case, justice, arg_type = row.name
        # Make this not chained indexing
        value = lookup_series[case][justice][arg_type]
        row[['NB', 'SGD', 'RF']] = value, value, value
    except KeyError:
        row[['NB', 'SGD', 'RF']] = np.NaN, np.NaN, np.NaN
    return row


# Run function
tabulation_df = tabulation_df.apply(populate_tabulation_df,
                                    args=(lookup_series,),
                                    axis=1)



In [67]:

    
tabulation_df.head(3)









    Out[67]:






  
    
      
      
      
      NB
      SGD
      RF
    
  
  
    
      15-214
      ROBERTS
      PETITIONER_ARGUMENT
      And those would\n\nI thought your\n\n\n\nar...
      And those would\n\nI thought your\n\n\n\nar...
      And those would\n\nI thought your\n\n\n\nar...
    
    
      RESPONDENT_ARGUMENT
      NaN
      NaN
      NaN
    
    
      PETITIONER_REBUTTAL
      NaN
      NaN
      NaN



In [68]:

    
# Define function
def run_predictions(column, model_dict, tabulation_df):
    '''This applied function adds results to the result series.
    
    It is initially framed in terms of "QUESTIONEE_WINS", which is
    the output of the model.predict(). It is then converted to 
    "PLAINTIFF_WINS", by flipping the respondent argument (e.g.
    if questionee is plaintiff because plaintiff arg or
    plaintiff rebuttal, QUESTIONEE_WINS == PLAINTIFF_WINS ...
    if respondent argument, QUESTOINEE_WINS != PLAINTIFF_WINS).
    
    '''

    # There has to be a better way to vectorize with groupby.
    model_name = column.name
    
    # Need copy so we can iterrate and change "in place"
    column_copy = column.copy()
    
    # iterate through items
    for index, text in column_copy.iteritems():
        case, justice, arg_type = index
        # Get text
        try:
            model = model_dict[justice][model_name]
            # Cannot comapre np.NaN
            if model is None or pd.isnull(text):
                column.loc[index] = np.NaN
                continue
            # If you've already gone over it, it's bool. Therefore skip.
            if type(text) is np.bool_ or type(text) is bool:
                continue
            # Predict
            prediction = model.predict([text])[0]
            # Flip prediction because speaker -> party flip recorrect.
            if arg_type == 'RESPONDENT_ARGUMENT':
                prediction = not prediction
            # Write back to clolumn
            column.loc[index] = prediction
        except KeyError:
            column.loc[index] = np.NaN
    
    return column


# Run function
tabulation_df = tabulation_df.apply(run_predictions,
                                    axis=0,
                                    args=(model_dict, tabulation_df))



In [69]:

    
# Demo for clarity
tabulation_df.head(3)









    Out[69]:






  
    
      
      
      
      NB
      SGD
      RF
    
  
  
    
      15-214
      ROBERTS
      PETITIONER_ARGUMENT
      True
      True
      True
    
    
      RESPONDENT_ARGUMENT
      NaN
      NaN
      NaN
    
    
      PETITIONER_REBUTTAL
      NaN
      NaN
      NaN



In [70]:

    
def modified_sum(row):
    if row['RESPONDENT_VOTES'] < row['PETITIONER_VOTES']:
        return 'Petitioner'
    if row['RESPONDENT_VOTES'] > row['PETITIONER_VOTES']:
        return 'Respondent'
    else:
        return None

#### Define function
def calculate_votes(tabulation_df):
    # Consensus vector ... vectorize this.
    consensus = pd.Series(index=tabulation_df.index
                                             .droplevel(2)
                                             .copy(),
                          dtype='object')
    consensus.name = 'VOTES'
    # Iterate through tabulation
    tdf = tabulation_df.unstack()
    tdf = tdf.apply(lambda row: pd.value_counts(row.values), axis=1)
    tdf.columns = ['RESPONDENT_VOTES', 'PETITIONER_VOTES']
    tdf = tdf.fillna(0)
    tdf['VOTE'] = tdf.apply(modified_sum, axis=1)
    return tdf

# Run function
votes = calculate_votes(tabulation_df)
votes.head(8)









    Out[70]:






  
    
      
      
      RESPONDENT_VOTES
      PETITIONER_VOTES
      VOTE
    
  
  
    
      14-1055
      ALITO
      0.0
      3.0
      Petitioner
    
    
      BREYER
      2.0
      1.0
      Respondent
    
    
      GINSBURG
      0.0
      3.0
      Petitioner
    
    
      KAGAN
      0.0
      3.0
      Petitioner
    
    
      KENNEDY
      0.0
      0.0
      None
    
    
      ROBERTS
      0.0
      3.0
      Petitioner
    
    
      SOTOMAYOR
      0.0
      6.0
      Petitioner
    
    
      THOMAS
      0.0
      0.0
      None



In [71]:

    
def harmonize_empty(votes, VOTING_RELATIONSHIPS):
    '''If null, make this justice copy another similarly-minded justice.'''

    voting_df = pd.DataFrame(VOTING_RELATIONSHIPS)
    # Don't want our inputed picks affecting other imputed picks.
    imputed_probabilities = []
        
    for index, row in votes.iterrows():
        # Parse
        case, justice = index
        if row['VOTE'] is None:
            # Get similarity rankings: ALITO: 7, BREYER: 2, KAGAN: 3
            similarity_ranks = voting_df.loc[justice].argsort()
            # Then rank so we have BREYER: 2, KAGAN: 3, ALITO: 7
            similarity_order = similarity_ranks.sort_values()
            # Similar justice list: [BREYER, KAGAN, ALITO]
            most_similar = similarity_order.index.values

            # Go through justice list to get closest.
            for sim_justice in most_similar:
                if sim_justice == 'SCALIA':
                    continue
                other_justice_prob = votes.loc[(case, sim_justice)]['VOTE']
                if other_justice_prob is None:
                    continue
                else:
                    imputed_probabilities.append({'case': case,
                                                  'justice': justice,
                                                  'prob': other_justice_prob})
                    
    # Now all imputed_probs are complete. Add back in.
    for prob in imputed_probabilities:
        index_tuple = tuple([prob['case'], prob['justice']])
        votes.loc[index_tuple, 'VOTE'] = prob['prob']
    return None


harmonize_empty(votes, VOTING_RELATIONSHIPS)
votes.head(8)









    Out[71]:






  
    
      
      
      RESPONDENT_VOTES
      PETITIONER_VOTES
      VOTE
    
  
  
    
      14-1055
      ALITO
      0.0
      3.0
      Petitioner
    
    
      BREYER
      2.0
      1.0
      Respondent
    
    
      GINSBURG
      0.0
      3.0
      Petitioner
    
    
      KAGAN
      0.0
      3.0
      Petitioner
    
    
      KENNEDY
      0.0
      0.0
      Petitioner
    
    
      ROBERTS
      0.0
      3.0
      Petitioner
    
    
      SOTOMAYOR
      0.0
      6.0
      Petitioner
    
    
      THOMAS
      0.0
      0.0
      Respondent



In [72]:

    
def get_petitioner_votes(row):
    '''Helper function for apply.'''
    vc = row.value_counts()
    try:
        petitioner_count = vc['Petitioner']
    except KeyError:
        petitioner_count = 0
    return petitioner_count


def get_respondent_votes(row):
    '''Helper function for apply.'''
    vc = row.value_counts()
    try:
        respondent_count = vc['Respondent']
    except KeyError:
        respondent_count = 0
    return respondent_count


def process_votes(votes):
    '''Apply function for results.'''
    # Get rid of superfluous columns
    result = votes[['VOTE']]
    # Turn into dataframe.
    result = result.unstack()
    # Get rid of superfluous multiindex
    result.columns = result.columns.droplevel(0)
    # Add winner and loser counts.
    result['PET_VOTES'] = result.apply(get_petitioner_votes, axis=1)
    result['RES_VOTES'] = result.apply(get_respondent_votes, axis=1)
    # Arbitrary
    result['VICTOR'] = result['PET_VOTES'] > result['RES_VOTES']
    result['VICTOR'] = result['VICTOR'].map({True: 'Petitioner',
                                             False: 'Respondent'})
    return result


result = process_votes(votes)

try:
    result = result.drop('15-1112')
except Exception:
    pass

result









    Out[72]:






  
    
      
      ALITO
      BREYER
      GINSBURG
      KAGAN
      KENNEDY
      ROBERTS
      SOTOMAYOR
      THOMAS
      PET_VOTES
      RES_VOTES
      VICTOR
    
  
  
    
      14-1055
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Respondent
      6
      2
      Petitioner
    
    
      14-1538
      Respondent
      Petitioner
      Petitioner
      Respondent
      Respondent
      Petitioner
      Respondent
      Petitioner
      4
      4
      Respondent
    
    
      14-9496
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      7
      1
      Petitioner
    
    
      15-1031
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Respondent
      Petitioner
      6
      2
      Petitioner
    
    
      15-1111
      Respondent
      Respondent
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      5
      3
      Petitioner
    
    
      15-118
      Respondent
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Respondent
      Petitioner
      Petitioner
      5
      3
      Petitioner
    
    
      15-1189
      Respondent
      Petitioner
      Respondent
      Respondent
      Petitioner
      Petitioner
      Respondent
      Petitioner
      4
      4
      Respondent
    
    
      15-1191
      Respondent
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Respondent
      5
      3
      Petitioner
    
    
      15-1194
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      7
      1
      Petitioner
    
    
      15-1204
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      8
      0
      Petitioner
    
    
      15-1248
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      8
      0
      Petitioner
    
    
      15-1251
      Respondent
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Respondent
      4
      4
      Respondent
    
    
      15-1262
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Petitioner
      7
      1
      Petitioner
    
    
      15-1293
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      7
      1
      Petitioner
    
    
      15-1358
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Respondent
      5
      3
      Petitioner
    
    
      15-1391
      Respondent
      Respondent
      Petitioner
      Respondent
      Respondent
      Respondent
      Petitioner
      Respondent
      2
      6
      Respondent
    
    
      15-1406
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Respondent
      6
      2
      Petitioner
    
    
      15-1498
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Respondent
      Petitioner
      6
      2
      Petitioner
    
    
      15-1500
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      7
      1
      Petitioner
    
    
      15-214
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      8
      0
      Petitioner
    
    
      15-423
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      6
      2
      Petitioner
    
    
      15-497
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      7
      1
      Petitioner
    
    
      15-513
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Respondent
      Petitioner
      5
      3
      Petitioner
    
    
      15-537
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Respondent
      5
      3
      Petitioner
    
    
      15-5991
      Petitioner
      Respondent
      Respondent
      Respondent
      Petitioner
      Petitioner
      Respondent
      Respondent
      3
      5
      Respondent
    
    
      15-606
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Petitioner
      6
      2
      Petitioner
    
    
      15-628
      Respondent
      Petitioner
      Respondent
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      5
      3
      Petitioner
    
    
      15-649
      Respondent
      Respondent
      Respondent
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Respondent
      3
      5
      Respondent
    
    
      15-680
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Petitioner
      6
      2
      Petitioner
    
    
      15-7250
      Respondent
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      6
      2
      Petitioner
    
    
      15-777
      Petitioner
      Respondent
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Respondent
      Respondent
      4
      4
      Respondent
    
    
      15-797
      Respondent
      Respondent
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Respondent
      4
      4
      Respondent
    
    
      15-8049
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Respondent
      6
      2
      Petitioner
    
    
      15-827
      Respondent
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      6
      2
      Petitioner
    
    
      15-8544
      Respondent
      Respondent
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Respondent
      4
      4
      Respondent
    
    
      15-866
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      8
      0
      Petitioner
    
    
      15-9260
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Respondent
      6
      2
      Petitioner
    
    
      15-927
      Respondent
      Respondent
      Petitioner
      Respondent
      Respondent
      Petitioner
      Respondent
      Respondent
      2
      6
      Respondent
    
    
      16-1256
      None
      None
      None
      None
      None
      None
      None
      None
      0
      0
      Respondent
    
    
      16-149
      Petitioner
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Respondent
      5
      3
      Petitioner
    
    
      16-254
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      7
      1
      Petitioner
    
    
      16-32
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Respondent
      Petitioner
      Petitioner
      5
      3
      Petitioner
    
    
      16-348
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      8
      0
      Petitioner
    
    
      16-369
      Petitioner
      Respondent
      Respondent
      Petitioner
      Petitioner
      Respondent
      Respondent
      Respondent
      3
      5
      Respondent
    
    
      16-54
      Respondent
      Petitioner
      Petitioner
      Petitioner
      Petitioner
      Respondent
      Petitioner
      Petitioner
      6
      2
      Petitioner



In [74]:

    
# Write results to file.
result_csv_path = os.path.join(DATA_FOLDER, 'case_results.csv')
result.to_csv(result_csv_path, encoding='utf-8')



In [75]:

    
result['VICTOR'].value_counts()









    Out[75]:





Petitioner    33
Respondent    12
Name: VICTOR, dtype: int64

	CASE	DOCKET	ARGUMENT_YEAR	ARGUMENT_LINK	ARGUMENT_PATH
0	Hudson v. Michigan (Reargued)	04-1360	2005	https://www.supremecourt.gov/oral_arguments/ar...	/home/theo/.scoap/04-1360/argument.pdf
1	Washington v. Recuenco	05-83	2005	https://www.supremecourt.gov/oral_arguments/ar...	/home/theo/.scoap/05-83/argument.pdf
2	Burlington N. & S. F. R. Co. v. White	05-259	2005	https://www.supremecourt.gov/oral_arguments/ar...	/home/theo/.scoap/05-259/argument.pdf

	caseId	docketId	caseIssuesId	voteId	dateDecision	decisionType	usCite	sctCite	ledCite	lexisCite	...	majVotes	minVotes	justice	justiceName	vote	opinion	direction	majority	firstAgreement	secondAgreement
0	1946-001	1946-001-01	1946-001-01-01	1946-001-01-01-01-01	11/18/1946	1	329 U.S. 1	67 S. Ct. 6	91 L. Ed. 3	1946 U.S. LEXIS 1724	...	8	1	86	HHBurton	2.0	1.0	1.0	1.0	NaN	NaN
1	1946-001	1946-001-01	1946-001-01-01	1946-001-01-01-01-02	11/18/1946	1	329 U.S. 1	67 S. Ct. 6	91 L. Ed. 3	1946 U.S. LEXIS 1724	...	8	1	84	RHJackson	1.0	1.0	2.0	2.0	NaN	NaN
2	1946-001	1946-001-01	1946-001-01-01	1946-001-01-01-01-03	11/18/1946	1	329 U.S. 1	67 S. Ct. 6	91 L. Ed. 3	1946 U.S. LEXIS 1724	...	8	1	81	WODouglas	1.0	1.0	2.0	2.0	NaN	NaN

	DOCKET	CASE	PETITIONER_ARGUMENT	PETITIONER_REBUTTAL
0	04-1360	Hudson v. Michigan (Reargued)	{'JUSTICE SCALIA': ['Counsel, what -- what do ...	{'CHIEF JUSTICE ROBERTS': ['You think there --...
1	05-83	Washington v. Recuenco	{'JUSTICE SCALIA': ['Was deadly weapon still a...	{'JUSTICE SCALIA': ['Thank you, counsel. Mr. ...
2	05-259	Burlington N. & S. F. R. Co. v. White	{'JUSTICE SCALIA': ['But has the language and...	{'MR PHILLIPS': ['Thank you, Mr. Chief Justice...

	Justice Votes Againt Majority	Justice Votes With Majority
Petitioner Wins Majority	Petitioner Loses Justice	Petitioner Wins Justice
Petitioner Loses Majority	Petitioner Wins Justice	Petitioner Loses Justice

	CASE	JUSTICE	VOTED_FOR_PETITIONER
9	Washington v. Recuenco	JGRoberts	True
10	Washington v. Recuenco	JPStevens	False
11	Washington v. Recuenco	AScalia	True
12	Washington v. Recuenco	AMKennedy	True
13	Washington v. Recuenco	DHSouter	True
14	Washington v. Recuenco	CThomas	True
15	Washington v. Recuenco	RBGinsburg	False
16	Washington v. Recuenco	SGBreyer	True
17	Washington v. Recuenco	SAAlito	True

	DOCKET	CASE	JUSTICE	PETITIONER_ARGUMENT	PETITIONER_REBUTTAL	VOTED_FOR_PETITIONER
9	05-83	Washington v. Recuenco	ROBERTS	[Is the jury given a\n\ncopy of the information?]	[Although under\n\nAnd those are the\n\n\n\nso...	True
10	05-83	Washington v. Recuenco	STEVENS	[Could you -- could you\n\n\n\nclarify one thi...	[But then we'd have Justice\n\n\n\nScalia's ca...	False
11	05-83	Washington v. Recuenco	SCALIA	[Was deadly weapon still an\n\n\n\nenhancement...	[Thank you, counsel.\n\nMr. Whisman, can I -- ...	True

	JUSTICE	DOCKET	VOTED_FOR_PETITIONER	ARG_TYPE	TEXT	QUESTIONEE_WON
4984	SOTOMAYOR	11-1351	True	PETITIONER_ARGUMENT	Well one of the\n\n\n\nstrongest arguments by ...	True
4969	SCALIA	11-9953	False	PETITIONER_ARGUMENT	Mr Bourke was was it\n\nwithin the control of...	False
7278	ROBERTS	04-10566	False	PETITIONER_REBUTTAL	Where you have a treaty that becomes part of\n...	False

	JUSTICE	DOCKET	VOTED_FOR_PETITIONER	ARG_TYPE	TEXT	QUESTIONEE_WON	NB_PREDICTION	SGD_PREDICTION	RF_PREDICTION
8900	STEVENS	06-666	True	PETITIONER_REBUTTAL	We are\n\nWe are talking about not\n\njust a d...	True	False	False	False
8551	KENNEDY	06-11429	False	PETITIONER_REBUTTAL	It is simply our\n\nWell but there has to be\n...	False	True	True	False
4644	ROBERTS	11-798	True	PETITIONER_ARGUMENT	That matters that\n\n\n\nmatters in your view...	True	True	True	True

	Bottom RF Feature Imp
would have to	0.000326
to make the	0.000343
are talking about	0.000351

		AT_WINNER_COUNT	AT_LOSER_COUNT	AT_WINNER_PERCENT	AT_LOSER_PERCENT
JUSTICE	PHRASE
SOTOMAYOR	why should we	2	10	0.166667	0.833333
	could you tell me what	3	5	0.375000	0.625000
	that there are	2	6	0.250000	0.750000
	are you suggesting	2	7	0.222222	0.777778
	between the two	2	5	0.285714	0.714286

			NB	SGD	RF
15-214	ROBERTS	PETITIONER_ARGUMENT	NaN	NaN	NaN
		RESPONDENT_ARGUMENT	NaN	NaN	NaN
		PETITIONER_REBUTTAL	NaN	NaN	NaN

	ARGUMENT_LINK	ARGUMENT_PATH	ARGUMENT_YEAR	CASE	DOCKET	JUSTICE	PETITIONER_ARGUMENT	PETITIONER_REBUTTAL	PET_ARG_HEADING	PET_ARG_REGEX	PET_REB_HEADING	PET_REB_REGEX	RESPONDENT_ARGUMENT	RES_ARG_HEADING	RES_ARG_REGEX	TEXT
0	https://www.supremecourt.gov/oral_arguments/ar...	/home/theo/.scoap/15-214/argument.pdf	2016	Murr v. Wisconsin	15-214	ROBERTS	[, , , And those would\n\nI thought your\n\n\n...	[]	ORAL ARGUMENT OF JOHN M. GROEN\n\n\n\nON BEHAL...	ORAL ARGUMENT OF JOHN M. GROEN\n\n\n\nON BEHAL...	REBUTTAL ARGUMENT OF JOHN M. GROEN\n\n\n\nON B...	REBUTTAL ARGUMENT OF JOHN M. GROEN\n\n\n\nON B...	{'CHIEF JUSTICE ROBERTS': ['Well, that's -- an...	ORAL ARGUMENT OF RICHARD J. LAZARUS\nON BEHALF...	ORAL ARGUMENT OF RICHARD J. LAZARUS\nON BEHALF...	\n\n\n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe w...
1	https://www.supremecourt.gov/oral_arguments/ar...	/home/theo/.scoap/15-1031/argument.pdf	2016	Howell v. Howell	15-1031	ROBERTS	[This -- this is a\n\n\n\npretty basic questio...	[]	ORAL ARGUMENT OF ADAM G. UNIKOWSKY\n\n\n\nON B...	ORAL ARGUMENT OF ADAM G. UNIKOWSKY\n\n\n\nON B...	REBUTTAL ARGUMENT OF ADAM G. UNIKOWSKI\n\n\n\n...	REBUTTAL ARGUMENT OF ADAM G. UNIKOWSKI\n\n\n\n...	{'CHIEF JUSTICE ROBERTS': ['', 'I think -What ...	ORAL ARGUMENT OF CHARLES W. WIRKEN\n\n\n\nON B...	ORAL ARGUMENT OF CHARLES W. WIRKEN\n\n\n\nON B...	\n\n\n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe'l...
2	https://www.supremecourt.gov/oral_arguments/ar...	/home/theo/.scoap/15-1189/argument.pdf	2016	Impressions Products, Inc. v. Lexmark Int'l, Inc.	15-1189	ROBERTS	[I'm sorry to\n\n\n\ninterrupt you, but that l...	[]	ORAL ARGUMENT OF ANDREW J. PINCUS\n\n\n\nON BE...	ORAL ARGUMENT OF ANDREW J. PINCUS\n\n\n\nON BE...	REBUTTAL ARGUMENT OF ANDREW J. PINCUS\n\n\n\nO...	REBUTTAL ARGUMENT OF ANDREW J. PINCUS\n\n\n\nO...	{'CHIEF JUSTICE ROBERTS': ['Why -- why is norm...	ORAL ARGUMENT OF CONSTANTINE L. TRELA, JR.\n\n...	ORAL ARGUMENT OF CONSTANTINE L. TRELA, JR.\n\n...	\n\n\n\n\n\n\n\nCHIEF JUSTICE ROBERTS:\n\nWe'l...

		RESPONDENT_VOTES	PETITIONER_VOTES	VOTE
14-1055	ALITO	0.0	3.0	Petitioner
	BREYER	2.0	1.0	Respondent
	GINSBURG	0.0	3.0	Petitioner
	KAGAN	0.0	3.0	Petitioner
	KENNEDY	0.0	0.0	None
	ROBERTS	0.0	3.0	Petitioner
	SOTOMAYOR	0.0	6.0	Petitioner
	THOMAS	0.0	0.0	None