Getting the Judges


In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

plt.style.use('ggplot')
import dateutil.parser
import re
import time

In [2]:
url = "http://www.bvger.ch/gericht/richter/00563/00580/index.html?lang=de"
response = requests.get(url)
response.encoding = 'utf-8'  # Fix Encoding
judges_IV_soup = BeautifulSoup(response.text, 'html.parser')
judges_IV = judges_IV_soup.find_all('div', {'class': 'contentFlex flexUnterseite'})

In [3]:
def extract_judges(html_elements):
    """Process a list of html elements containig data about the judges"""
    lst = []
    for soup in html_elements:
    
        #name
        name = soup.find('a').text
        name = re.sub('\(.+\)', '', name).strip()

        #url
        html = soup.find('a').get('href')
        url = 'http://www.bvger.ch' + html

        response = requests.get(url)
        response.encoding = 'utf-8'
        judges_text = BeautifulSoup(response.text, 'html.parser')
        partei = judges_text.find('div', {'class': 'webText flexTinymceDiv'}).text[-3:]
        partei = partei.replace('los', 'parteilos').replace('ux.', 'FDP').replace('PLR', 'FDP')
        partei = partei.replace('que', 'parteilos').replace('üne', 'Grüne')
        partei = partei.replace('UDC', 'SVP').replace('DC.', 'SVP').replace('DC.', 'FDP')
        partei = partei.replace('rts', 'Grüne').replace('üne', 'Grüne').replace('GrGrüne', 'Grüne')
        partei = partei.replace('UDC', 'SVP').replace('DC.', 'SVP').replace('DC.', 'FDP')
        partei = partei.replace('07.', 'parteilos').replace('ale', 'GLP')
        partei = partei.replace('PS', 'SP').replace('VP.', 'SVP')

        judge = {'Name': name,
                 'Partei': partei.strip(),
                }

        lst.append(judge)
    return lst

In [4]:
judges_IV_list = extract_judges(judges_IV)

In [5]:
url = "http://www.bvger.ch/gericht/richter/00563/00581/index.html?lang=de"
response = requests.get(url)
response.encoding = 'utf-8'
judges_V_soup = BeautifulSoup(response.text, 'html.parser')
judges_V = judges_V_soup.find_all('div', {'class': 'contentFlex flexUnterseite'})

In [6]:
judges_V_list = extract_judges(judges_V)

In [7]:
judges_list = judges_IV_list + judges_V_list

In [8]:
judges_list = pd.DataFrame(judges_list)

Reorganising the Names to merge later on


In [9]:
first_names = []
last_names = []
for name in judges_list['Name']:
    parts = name.split(' ')
    if len(parts) == 3 and parts[2] == 'R.':  
        # Handle special case for David R. Wenger
        first_names.append(' '.join(parts[1:]))
        last_names.append(parts[0])
    else:
        # Normal Case: Last Element is first name, everything else is last name
        first_names.append(parts[-1])
        last_names.append(' '.join(parts[:-1]))

In [10]:
vorname = pd.DataFrame(first_names)
nachname = pd.DataFrame(last_names)

In [11]:
df = pd.concat([judges_list, nachname, vorname], axis=1)
df.columns = [['Name', 'Partei', 'Nachname', 'Vorname']]

In [12]:
df['Nachname Vorname'] = df['Vorname'] + ' ' + df['Nachname']

In [13]:
df.to_csv('richter_partei.csv', index=False)

In [14]:
df


Out[14]:
Name Partei Nachname Vorname Nachname Vorname
0 Spälti Giannakitsas Nina SP Spälti Giannakitsas Nina Nina Spälti Giannakitsas
1 Bovier Gérald SVP Bovier Gérald Gérald Bovier
2 Brüschweiler Daniela BDP Brüschweiler Daniela Daniela Brüschweiler
3 Cattaneo Daniele FDP Cattaneo Daniele Daniele Cattaneo
4 Cotting-Schalch Claudia FDP Cotting-Schalch Claudia Claudia Cotting-Schalch
5 Felley Yanick SVP Felley Yanick Yanick Felley
6 Scherrer Gérard parteilos Scherrer Gérard Gérard Scherrer
7 Lang Walter parteilos Lang Walter Walter Lang
8 Schürch Hans FDP Schürch Hans Hans Schürch
9 Tellenbach Bendicht SP Tellenbach Bendicht Bendicht Tellenbach
10 Theis Contessina Grüne Theis Contessina Contessina Theis
11 Thurnheer Simon Mathias SVP Thurnheer Simon Mathias Mathias Thurnheer Simon
12 Wespi Thomas CVP Wespi Thomas Thomas Wespi
13 Luterbacher Christa SP Luterbacher Christa Christa Luterbacher
14 Antonioni Luftensteiner Emilia Grüne Antonioni Luftensteiner Emilia Emilia Antonioni Luftensteiner
15 Badoud François parteilos Badoud François François Badoud
16 Balmelli-Mühlematter Barbara GLP Balmelli-Mühlematter Barbara Barbara Balmelli-Mühlematter
17 Beck Kadima Muriel Grüne Beck Kadima Muriel Muriel Beck Kadima
18 Cossy Sylvie Grüne Cossy Sylvie Sylvie Cossy
19 Freihofer Gabriela SVP Freihofer Gabriela Gabriela Freihofer
20 Marti Esther GLP Marti Esther Esther Marti
21 König Markus SP König Markus Markus König
22 Monnet Jean-Pierre parteilos Monnet Jean-Pierre Jean-Pierre Monnet
23 Schenker Senn Regula SP Schenker Senn Regula Regula Schenker Senn
24 Waeber William SP Waeber William William Waeber
25 Wenger David R. SVP Wenger David R. David R. Wenger
26 Willisegger Daniel SVP Willisegger Daniel Daniel Willisegger

In [15]:
df_partei_count = pd.DataFrame(df['Partei'].value_counts())

In [16]:
df_partei_count = df_partei_count.reset_index()

In [17]:
total = sum(list(df_partei_count['Partei']))

In [18]:
def per(x):
    percentage = x / total * 100
    return percentage

In [19]:
df_partei_count['percentage'] = round(df_partei_count['Partei'].apply(per))

In [20]:
df_partei_count


Out[20]:
index Partei percentage
0 SP 6 22.0
1 SVP 6 22.0
2 Grüne 4 15.0
3 parteilos 4 15.0
4 FDP 3 11.0
5 GLP 2 7.0
6 CVP 1 4.0
7 BDP 1 4.0

In [ ]: