Does televoting make a difference in Eurovision

The Polish entry for Eurovision this year apparently divided the jury voters and the televoters. In case you've forgotten the entry, it's available on YouTube:


In [1]:
from IPython.display import YouTubeVideo
YouTubeVideo('VJ920cN2HmA')


Out[1]:

But which other entries would far better or worse had only jury or only televoting been allowed? Let's use some Python to find out.


In [2]:
from urllib.request import urlopen
import re
import json

from bs4 import BeautifulSoup
import pandas
import numpy as np

In [3]:
# Base URL for Eurovision grand final voting page. One needs to append '&voter=<CC>'
# to the end where <CC> is the two letter country code associated with the voting country.
BASE_URL='http://www.eurovision.tv/page/results?event=1893'

In [4]:
soup = BeautifulSoup(urlopen(BASE_URL).read())

In [5]:
# We know that the URL has 'voter=...' appended and so the name of the voting country
# <select> must be 'voter'.
voter_select = soup.find('select', attrs={'name': 'voter'})

# Check we found it
assert voter_select is not None

In [6]:
# Build a dictionary of country name -> voter code. Valid options have a "value"
# attribute which is two capital letters.
country_codes = dict(
    (opt.text, opt.attrs['value'])
    for opt in voter_select.find_all('option', attrs={'value': re.compile('[A-Z]{2}')})
)

In [7]:
# Build a dictionary mapping country codes to the soup representing that country's voting page
country_vote_page_soups = dict(
    (code, BeautifulSoup(urlopen(BASE_URL + '&voter=' + code)))
    for code in country_codes.values()
)

In [8]:
# How many <table> elements are in the soup?
len(country_vote_page_soups['GB'].find_all('table'))


Out[8]:
1

In [9]:
# This must be the one we want. Extract only the first table element from the soup.
country_tables = dict(
    (code, soup.find('table'))
    for code, soup in country_vote_page_soups.items()
)

In [10]:
# Let's take a look at what some rows in this table look like. Look a few lines into
# the source.
str(country_tables['GB']).split('\n')[40:60]


Out[10]:
['<tbody>',
 '<tr>',
 '<td class="country"><img alt="Flag" src="/img/upload/flags/medium/Ukraine.png"/> Ukraine</td>',
 '<td class="result">22</td><td class="result">16</td><td class="result">20</td><td class="result">18</td><td class="result">7</td>',
 '<td class="result" title="Jury aggr">18</td> <td class="space">\xa0</td>',
 '<td class="result" title="Tele aggr">12</td> <td class="space">\xa0</td>',
 '<td class="result" title="Combined rnk">16</td> <td class="result" title="Combined points"></td>',
 '</tr><tr>',
 '<td class="country"><img alt="Flag" src="/img/upload/flags/medium/Belarus.png"/> Belarus</td>',
 '<td class="result">17</td><td class="result">7</td><td class="result">17</td><td class="result">25</td><td class="result">19</td>',
 '<td class="result" title="Jury aggr">19</td> <td class="space">\xa0</td>',
 '<td class="result" title="Tele aggr">19</td> <td class="space">\xa0</td>',
 '<td class="result" title="Combined rnk">21</td> <td class="result" title="Combined points"></td>',
 '</tr><tr>',
 '<td class="country"><img alt="Flag" src="/img/upload/flags/medium/Azerbaijan.png"/> Azerbaijan</td>',
 '<td class="result">5</td><td class="result">4</td><td class="result">25</td><td class="result">13</td><td class="result">6</td>',
 '<td class="result" title="Jury aggr">8</td> <td class="space">\xa0</td>',
 '<td class="result" title="Tele aggr">24</td> <td class="space">\xa0</td>',
 '<td class="result" title="Combined rnk">18</td> <td class="result" title="Combined points"></td>',
 '</tr><tr>']

In [11]:
# That's convenient. Each row which has a <td> with title "Tele aggr" is one of interest
def interesting_rows(table_soup):
    """Given a table's soup, return a list of the <tr> elements
    containing a <td> with a title element set to "(Jury|Tele) aggr".
    
    """
    return list(
        row for row in table_soup.find_all('tr')
        if len(row.find_all('td', attrs={'title': re.compile('(Jury|Tele) aggr')})) > 0
    )

country_result_rows = dict(
    (code, interesting_rows(table))
    for code, table in country_tables.items()
)

In [12]:
# Let's look at a couple of those rows in more detail:
country_result_rows['GB'][:2]


Out[12]:
[<tr>
 <td class="country"><img alt="Flag" src="/img/upload/flags/medium/Ukraine.png"/> Ukraine</td>
 <td class="result">22</td><td class="result">16</td><td class="result">20</td><td class="result">18</td><td class="result">7</td>
 <td class="result" title="Jury aggr">18</td> <td class="space"> </td>
 <td class="result" title="Tele aggr">12</td> <td class="space"> </td>
 <td class="result" title="Combined rnk">16</td> <td class="result" title="Combined points"></td>
 </tr>, <tr>
 <td class="country"><img alt="Flag" src="/img/upload/flags/medium/Belarus.png"/> Belarus</td>
 <td class="result">17</td><td class="result">7</td><td class="result">17</td><td class="result">25</td><td class="result">19</td>
 <td class="result" title="Jury aggr">19</td> <td class="space"> </td>
 <td class="result" title="Tele aggr">19</td> <td class="space"> </td>
 <td class="result" title="Combined rnk">21</td> <td class="result" title="Combined points"></td>
 </tr>]

In [13]:
# For a particular country, we'll create a Pandas DataFrame object with the country code as
# the index and the jury, televoting and combined ranks as columns
def result_rows_to_dataframe(rows):
    """Take a list of <tr> elements containing results and combine them into a single
    Pandas DataFrame object.
    
    """
    # The row indices are the country codes. We get these from the <td> elements with CSS class "country".
    # We need to strip leading and trailing spaces from the textual content as there is a space between
    # the flag and the country name.
    indices = list(country_codes[row.find('td', class_='country').text.strip()] for row in rows)
    
    # We can use much the same method to generate pandas series for the data columns. Some rows won't have
    # a particular entry since some countrys only use jury voting. In those, we set missing data to NaN
    def make_series(title):
        data = []
        for row in rows:
            result_td = row.find('td', title=title)
            # If no result, use nan unless this is the combined result
            if result_td is None:
                data.append(np.nan)
            else:
                data.append(int(result_td.text))
        return pandas.Series(data = data, index = indices)
    
    jury_aggr = make_series('Jury aggr')
    tele_aggr = make_series('Tele aggr')
    combined = make_series('Combined rnk')
    
    if np.all(np.isnan(tele_aggr.data)):
        # If the tele votes are all NaNs, use the jury vote alone for combined
        combined = jury_aggr
    elif np.all(np.isnan(jury_aggr.data)):
        # If the jury votes are all NaNs, use the tele vote alone for combined
        combined = tele_aggr
        
    return pandas.DataFrame({ 'jury': jury_aggr, 'tele': tele_aggr, 'combined': combined })

In [14]:
# Let's check the function for 10 rows of the UK voting
result_rows_to_dataframe(country_result_rows['GB'])[:10]


Out[14]:
combined jury tele
UA 16 18 12
BY 21 19 19
AZ 18 8 24
IS 7 15 4
NO 13 11 17
RO 17 22 9
AM 23 24 16
ME 25 21 25
PL 11 25 1
GR 9 14 7

10 rows × 3 columns


In [15]:
# Let's check the function for 10 rows of the Albainian (no-televote) voting
result_rows_to_dataframe(country_result_rows['AL'])[:10]


Out[15]:
combined jury tele
UA 19 19 NaN
BY 23 23 NaN
AZ 15 15 NaN
IS 24 24 NaN
NO 20 20 NaN
RO 16 16 NaN
AM 13 13 NaN
ME 5 5 NaN
PL 22 22 NaN
GR 9 9 NaN

10 rows × 3 columns


In [16]:
# Let's check the function for 10 rows of the Georgian (no-jury) voting
result_rows_to_dataframe(country_result_rows['GE'])[:10]


Out[16]:
combined jury tele
UA 5 NaN 5
BY 11 NaN 11
AZ 4 NaN 4
IS 19 NaN 19
NO 20 NaN 20
RO 15 NaN 15
AM 1 NaN 1
ME 25 NaN 25
PL 12 NaN 12
GR 7 NaN 7

10 rows × 3 columns


In [17]:
# Let's generate a raw rank table for each country
country_rankings = dict(
    (code, result_rows_to_dataframe(rows))
    for code, rows in country_result_rows.items()
)

In [18]:
# We can now write a function to convert ranking into points. The top 10 ranked countries
# are awarded 12, 10, 8, 7, 6, 5, 4, 3, 2, 1 points in descending order of rank

def rank_frame_to_points_frame(frame):
    new_frame_data = { }
    for col_name, series in frame.iteritems():
        series_points = np.zeros_like(series)
        
        if not np.all(np.isnan(series.data)):
            # Get the indices of the frame's data in ascending order
            sorted_idxs = np.argsort(series.data)

            # Assign points
            series_points[sorted_idxs[:10]] = [12, 10, 8, 7, 6, 5, 4, 3, 2, 1]
        
        # Record new series
        new_frame_data[col_name] = pandas.Series(data=series_points, index=series.index)
        
    return pandas.DataFrame(new_frame_data)

In [19]:
# Again, let's test with the UK
rank_frame_to_points_frame(country_rankings['GB'])[:10]


Out[19]:
combined jury tele
UA 0 0 0
BY 0 0 0
AZ 0 3 0
IS 4 0 7
NO 0 0 0
RO 0 0 2
AM 0 0 0
ME 0 0 0
PL 0 0 12
GR 2 0 4

10 rows × 3 columns


In [20]:
# Again, let's test with Albania
rank_frame_to_points_frame(country_rankings['AL'])[:10]


Out[20]:
combined jury tele
UA 0 0 0
BY 0 0 0
AZ 0 0 0
IS 0 0 0
NO 0 0 0
RO 0 0 0
AM 0 0 0
ME 6 6 0
PL 0 0 0
GR 2 2 0

10 rows × 3 columns


In [21]:
# Again, let's test with Georgia
rank_frame_to_points_frame(country_rankings['GE'])[:10]


Out[21]:
combined jury tele
UA 6 0 6
BY 0 0 0
AZ 7 0 7
IS 0 0 0
NO 0 0 0
RO 0 0 0
AM 12 0 12
ME 0 0 0
PL 0 0 0
GR 4 0 4

10 rows × 3 columns


In [22]:
# Let's find each country's awarded points
country_awarded_points = dict(
    (code, rank_frame_to_points_frame(rankings))
    for code, rankings in country_rankings.items()
)

In [23]:
# We're now in a position to compute a total points table

# Start with a points table full of zeros
total_points = pandas.DataFrame({
    'jury': np.zeros(len(country_codes)),
    'tele': np.zeros(len(country_codes)),
    'combined': np.zeros(len(country_codes)),
}, index=country_codes.values())

# Add each country's awarded points
for code, awarded_points in country_awarded_points.items():
    # Here we need to re-index the awarded points table to match the
    # total points table and, since a country does not vote for itself,
    # we fill missing values with 0
    total_points += awarded_points.reindex_like(total_points).fillna(0)
    
# Now add a column with the country name so that we don't have to read
# Eurovision country codes
names = list(country_codes.keys());
names_series = pandas.Series(names, index=list(country_codes[k] for k in names))
total_points.insert(0, 'name', names_series)

In [24]:
# So now let's check that we match the official winner if we look at combined scores
total_points.sort(columns='combined', ascending=False)[:10]


Out[24]:
name combined jury tele
AT Austria 290 214 306
NL The Netherlands 238 200 220
SE Sweden 218 199 173
AM Armenia 174 113 187
HU Hungary 143 138 83
UA Ukraine 113 72 112
RU Russia 89 62 132
NO Norway 88 102 39
ES Spain 74 83 29
DK Denmark 74 85 42

10 rows × 4 columns


In [25]:
# Cool, that matches. How about jury alone?
total_points.sort(columns='jury', ascending=False)


Out[25]:
name combined jury tele
AT Austria 290 214 306
NL The Netherlands 238 200 220
SE Sweden 218 199 173
HU Hungary 143 138 83
MT Malta 32 119 12
FI Finland 72 114 36
AM Armenia 174 113 187
NO Norway 88 102 39
AZ Azerbaijan 33 101 14
DK Denmark 74 85 42
ES Spain 74 83 29
UA Ukraine 113 72 112
RU Russia 89 62 132
IS Iceland 58 59 38
DE Germany 39 56 27
RO Romania 72 51 103
BY Belarus 43 50 56
GB United Kingdom 40 49 24
ME Montenegro 37 48 27
GR Greece 35 45 41
IT Italy 33 37 22
CH Switzerland 64 26 114
PL Poland 62 23 162
SI Slovenia 9 21 15
SM San Marino 14 16 15
FR France 2 5 1
MK F.Y.R. Macedonia 0 0 0
PT Portugal 0 0 0
GE Georgia 0 0 0
IL Israel 0 0 0
IE Ireland 0 0 0
LV Latvia 0 0 0
LT Lithuania 0 0 0
MD Moldova 0 0 0
BE Belgium 0 0 0
AL Albania 0 0 0
EE Estonia 0 0 0

37 rows × 4 columns


In [26]:
# How about televoting alone?
total_points.sort(columns='tele', ascending=False)


Out[26]:
name combined jury tele
AT Austria 290 214 306
NL The Netherlands 238 200 220
AM Armenia 174 113 187
SE Sweden 218 199 173
PL Poland 62 23 162
RU Russia 89 62 132
CH Switzerland 64 26 114
UA Ukraine 113 72 112
RO Romania 72 51 103
HU Hungary 143 138 83
BY Belarus 43 50 56
DK Denmark 74 85 42
GR Greece 35 45 41
NO Norway 88 102 39
IS Iceland 58 59 38
FI Finland 72 114 36
ES Spain 74 83 29
DE Germany 39 56 27
ME Montenegro 37 48 27
GB United Kingdom 40 49 24
IT Italy 33 37 22
SM San Marino 14 16 15
SI Slovenia 9 21 15
AZ Azerbaijan 33 101 14
MT Malta 32 119 12
FR France 2 5 1
PT Portugal 0 0 0
MK F.Y.R. Macedonia 0 0 0
GE Georgia 0 0 0
IE Ireland 0 0 0
LT Lithuania 0 0 0
IL Israel 0 0 0
LV Latvia 0 0 0
MD Moldova 0 0 0
BE Belgium 0 0 0
AL Albania 0 0 0
EE Estonia 0 0 0

37 rows × 4 columns


In [27]:
# Save the result to an HTML file
with open('dist/index.html', 'w') as html:
    html.write('''
    <!DOCTYPE html>
    <html>
    <head>
        <title>Eurovision 2014 Jury- and Tele-voting</title>
        <link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css">
        <link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap-theme.min.css">
        <link rel="stylesheet" href="vendor/css/bootstrap-sortable.css">
    </head>
    <body>
        <div class="container">
        <h1 class="page-header">
        Eurovision 2014: Does tactical voting help?
        </h1>
        <p class="lead">
        Does the imposition of jury voting affect the Eurovision result? Below is a table of scores for
        Eurovision 2014 entries assuming points had been awarded for combined jury/televote score,
        jury score alone and televote score alone.
        </p>
        <p>
        Click on table column names to sort by that column.
        </p>
    ''')
    html.write(total_points.sort(columns='name', ascending=False).
                    to_html(index=False, classes=['table', 'sortable']).replace('border="1" ', ''))
    html.write('''
        <p>
        Assembled by <a href="https://richwareham.com/gplus">Rich Wareham</a>. Data scraped from
        <a href="http://www.eurovision.tv/">eurovision.tv</a> via
        <a href="https://github.com/rjw57/eurovision-2014/">an IPython notebook</a>.
        </p>
        </div>
        <script src="//code.jquery.com/jquery-1.11.0.min.js"></script>
        <script src="//netdna.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
        <script src="vendor/js/bootstrap-sortable.js"></script>
    </body>
    </html>
    ''')

In [27]: