The Polish entry for Eurovision this year apparently divided the jury voters and the televoters. In case you've forgotten the entry, it's available on YouTube:
In [1]:
from IPython.display import YouTubeVideo
YouTubeVideo('VJ920cN2HmA')
Out[1]:
But which other entries would far better or worse had only jury or only televoting been allowed? Let's use some Python to find out.
In [2]:
from urllib.request import urlopen
import re
import json
from bs4 import BeautifulSoup
import pandas
import numpy as np
In [3]:
# Base URL for Eurovision grand final voting page. One needs to append '&voter=<CC>'
# to the end where <CC> is the two letter country code associated with the voting country.
BASE_URL='http://www.eurovision.tv/page/results?event=1893'
In [4]:
soup = BeautifulSoup(urlopen(BASE_URL).read())
In [5]:
# We know that the URL has 'voter=...' appended and so the name of the voting country
# <select> must be 'voter'.
voter_select = soup.find('select', attrs={'name': 'voter'})
# Check we found it
assert voter_select is not None
In [6]:
# Build a dictionary of country name -> voter code. Valid options have a "value"
# attribute which is two capital letters.
country_codes = dict(
(opt.text, opt.attrs['value'])
for opt in voter_select.find_all('option', attrs={'value': re.compile('[A-Z]{2}')})
)
In [7]:
# Build a dictionary mapping country codes to the soup representing that country's voting page
country_vote_page_soups = dict(
(code, BeautifulSoup(urlopen(BASE_URL + '&voter=' + code)))
for code in country_codes.values()
)
In [8]:
# How many <table> elements are in the soup?
len(country_vote_page_soups['GB'].find_all('table'))
Out[8]:
In [9]:
# This must be the one we want. Extract only the first table element from the soup.
country_tables = dict(
(code, soup.find('table'))
for code, soup in country_vote_page_soups.items()
)
In [10]:
# Let's take a look at what some rows in this table look like. Look a few lines into
# the source.
str(country_tables['GB']).split('\n')[40:60]
Out[10]:
In [11]:
# That's convenient. Each row which has a <td> with title "Tele aggr" is one of interest
def interesting_rows(table_soup):
"""Given a table's soup, return a list of the <tr> elements
containing a <td> with a title element set to "(Jury|Tele) aggr".
"""
return list(
row for row in table_soup.find_all('tr')
if len(row.find_all('td', attrs={'title': re.compile('(Jury|Tele) aggr')})) > 0
)
country_result_rows = dict(
(code, interesting_rows(table))
for code, table in country_tables.items()
)
In [12]:
# Let's look at a couple of those rows in more detail:
country_result_rows['GB'][:2]
Out[12]:
In [13]:
# For a particular country, we'll create a Pandas DataFrame object with the country code as
# the index and the jury, televoting and combined ranks as columns
def result_rows_to_dataframe(rows):
"""Take a list of <tr> elements containing results and combine them into a single
Pandas DataFrame object.
"""
# The row indices are the country codes. We get these from the <td> elements with CSS class "country".
# We need to strip leading and trailing spaces from the textual content as there is a space between
# the flag and the country name.
indices = list(country_codes[row.find('td', class_='country').text.strip()] for row in rows)
# We can use much the same method to generate pandas series for the data columns. Some rows won't have
# a particular entry since some countrys only use jury voting. In those, we set missing data to NaN
def make_series(title):
data = []
for row in rows:
result_td = row.find('td', title=title)
# If no result, use nan unless this is the combined result
if result_td is None:
data.append(np.nan)
else:
data.append(int(result_td.text))
return pandas.Series(data = data, index = indices)
jury_aggr = make_series('Jury aggr')
tele_aggr = make_series('Tele aggr')
combined = make_series('Combined rnk')
if np.all(np.isnan(tele_aggr.data)):
# If the tele votes are all NaNs, use the jury vote alone for combined
combined = jury_aggr
elif np.all(np.isnan(jury_aggr.data)):
# If the jury votes are all NaNs, use the tele vote alone for combined
combined = tele_aggr
return pandas.DataFrame({ 'jury': jury_aggr, 'tele': tele_aggr, 'combined': combined })
In [14]:
# Let's check the function for 10 rows of the UK voting
result_rows_to_dataframe(country_result_rows['GB'])[:10]
Out[14]:
In [15]:
# Let's check the function for 10 rows of the Albainian (no-televote) voting
result_rows_to_dataframe(country_result_rows['AL'])[:10]
Out[15]:
In [16]:
# Let's check the function for 10 rows of the Georgian (no-jury) voting
result_rows_to_dataframe(country_result_rows['GE'])[:10]
Out[16]:
In [17]:
# Let's generate a raw rank table for each country
country_rankings = dict(
(code, result_rows_to_dataframe(rows))
for code, rows in country_result_rows.items()
)
In [18]:
# We can now write a function to convert ranking into points. The top 10 ranked countries
# are awarded 12, 10, 8, 7, 6, 5, 4, 3, 2, 1 points in descending order of rank
def rank_frame_to_points_frame(frame):
new_frame_data = { }
for col_name, series in frame.iteritems():
series_points = np.zeros_like(series)
if not np.all(np.isnan(series.data)):
# Get the indices of the frame's data in ascending order
sorted_idxs = np.argsort(series.data)
# Assign points
series_points[sorted_idxs[:10]] = [12, 10, 8, 7, 6, 5, 4, 3, 2, 1]
# Record new series
new_frame_data[col_name] = pandas.Series(data=series_points, index=series.index)
return pandas.DataFrame(new_frame_data)
In [19]:
# Again, let's test with the UK
rank_frame_to_points_frame(country_rankings['GB'])[:10]
Out[19]:
In [20]:
# Again, let's test with Albania
rank_frame_to_points_frame(country_rankings['AL'])[:10]
Out[20]:
In [21]:
# Again, let's test with Georgia
rank_frame_to_points_frame(country_rankings['GE'])[:10]
Out[21]:
In [22]:
# Let's find each country's awarded points
country_awarded_points = dict(
(code, rank_frame_to_points_frame(rankings))
for code, rankings in country_rankings.items()
)
In [23]:
# We're now in a position to compute a total points table
# Start with a points table full of zeros
total_points = pandas.DataFrame({
'jury': np.zeros(len(country_codes)),
'tele': np.zeros(len(country_codes)),
'combined': np.zeros(len(country_codes)),
}, index=country_codes.values())
# Add each country's awarded points
for code, awarded_points in country_awarded_points.items():
# Here we need to re-index the awarded points table to match the
# total points table and, since a country does not vote for itself,
# we fill missing values with 0
total_points += awarded_points.reindex_like(total_points).fillna(0)
# Now add a column with the country name so that we don't have to read
# Eurovision country codes
names = list(country_codes.keys());
names_series = pandas.Series(names, index=list(country_codes[k] for k in names))
total_points.insert(0, 'name', names_series)
In [24]:
# So now let's check that we match the official winner if we look at combined scores
total_points.sort(columns='combined', ascending=False)[:10]
Out[24]:
In [25]:
# Cool, that matches. How about jury alone?
total_points.sort(columns='jury', ascending=False)
Out[25]:
In [26]:
# How about televoting alone?
total_points.sort(columns='tele', ascending=False)
Out[26]:
In [27]:
# Save the result to an HTML file
with open('dist/index.html', 'w') as html:
html.write('''
<!DOCTYPE html>
<html>
<head>
<title>Eurovision 2014 Jury- and Tele-voting</title>
<link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css">
<link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap-theme.min.css">
<link rel="stylesheet" href="vendor/css/bootstrap-sortable.css">
</head>
<body>
<div class="container">
<h1 class="page-header">
Eurovision 2014: Does tactical voting help?
</h1>
<p class="lead">
Does the imposition of jury voting affect the Eurovision result? Below is a table of scores for
Eurovision 2014 entries assuming points had been awarded for combined jury/televote score,
jury score alone and televote score alone.
</p>
<p>
Click on table column names to sort by that column.
</p>
''')
html.write(total_points.sort(columns='name', ascending=False).
to_html(index=False, classes=['table', 'sortable']).replace('border="1" ', ''))
html.write('''
<p>
Assembled by <a href="https://richwareham.com/gplus">Rich Wareham</a>. Data scraped from
<a href="http://www.eurovision.tv/">eurovision.tv</a> via
<a href="https://github.com/rjw57/eurovision-2014/">an IPython notebook</a>.
</p>
</div>
<script src="//code.jquery.com/jquery-1.11.0.min.js"></script>
<script src="//netdna.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
<script src="vendor/js/bootstrap-sortable.js"></script>
</body>
</html>
''')
In [27]: