Does televoting make a difference in Eurovision

The Polish entry for Eurovision this year apparently divided the jury voters and the televoters. In case you've forgotten the entry, it's available on YouTube:



In [1]:

    
from IPython.display import YouTubeVideo
YouTubeVideo('VJ920cN2HmA')









    Out[1]:

But which other entries would far better or worse had only jury or only televoting been allowed? Let's use some Python to find out.



In [2]:

    
from urllib.request import urlopen
import re
import json

from bs4 import BeautifulSoup
import pandas
import numpy as np



In [3]:

    
# Base URL for Eurovision grand final voting page. One needs to append '&voter=<CC>'
# to the end where <CC> is the two letter country code associated with the voting country.
BASE_URL='http://www.eurovision.tv/page/results?event=1893'



In [4]:

    
soup = BeautifulSoup(urlopen(BASE_URL).read())



In [5]:

    
# We know that the URL has 'voter=...' appended and so the name of the voting country
# <select> must be 'voter'.
voter_select = soup.find('select', attrs={'name': 'voter'})

# Check we found it
assert voter_select is not None



In [6]:

    
# Build a dictionary of country name -> voter code. Valid options have a "value"
# attribute which is two capital letters.
country_codes = dict(
    (opt.text, opt.attrs['value'])
    for opt in voter_select.find_all('option', attrs={'value': re.compile('[A-Z]{2}')})
)



In [7]:

    
# Build a dictionary mapping country codes to the soup representing that country's voting page
country_vote_page_soups = dict(
    (code, BeautifulSoup(urlopen(BASE_URL + '&voter=' + code)))
    for code in country_codes.values()
)



In [8]:

    
# How many <table> elements are in the soup?
len(country_vote_page_soups['GB'].find_all('table'))









    Out[8]:





1



In [9]:

    
# This must be the one we want. Extract only the first table element from the soup.
country_tables = dict(
    (code, soup.find('table'))
    for code, soup in country_vote_page_soups.items()
)



In [10]:

    
# Let's take a look at what some rows in this table look like. Look a few lines into
# the source.
str(country_tables['GB']).split('\n')[40:60]









    Out[10]:





['<tbody>',
 '<tr>',
 '<td class="country"><img alt="Flag" src="/img/upload/flags/medium/Ukraine.png"/> Ukraine</td>',
 '<td class="result">22</td><td class="result">16</td><td class="result">20</td><td class="result">18</td><td class="result">7</td>',
 '<td class="result" title="Jury aggr">18</td> <td class="space">\xa0</td>',
 '<td class="result" title="Tele aggr">12</td> <td class="space">\xa0</td>',
 '<td class="result" title="Combined rnk">16</td> <td class="result" title="Combined points"></td>',
 '</tr><tr>',
 '<td class="country"><img alt="Flag" src="/img/upload/flags/medium/Belarus.png"/> Belarus</td>',
 '<td class="result">17</td><td class="result">7</td><td class="result">17</td><td class="result">25</td><td class="result">19</td>',
 '<td class="result" title="Jury aggr">19</td> <td class="space">\xa0</td>',
 '<td class="result" title="Tele aggr">19</td> <td class="space">\xa0</td>',
 '<td class="result" title="Combined rnk">21</td> <td class="result" title="Combined points"></td>',
 '</tr><tr>',
 '<td class="country"><img alt="Flag" src="/img/upload/flags/medium/Azerbaijan.png"/> Azerbaijan</td>',
 '<td class="result">5</td><td class="result">4</td><td class="result">25</td><td class="result">13</td><td class="result">6</td>',
 '<td class="result" title="Jury aggr">8</td> <td class="space">\xa0</td>',
 '<td class="result" title="Tele aggr">24</td> <td class="space">\xa0</td>',
 '<td class="result" title="Combined rnk">18</td> <td class="result" title="Combined points"></td>',
 '</tr><tr>']



In [11]:

    
# That's convenient. Each row which has a <td> with title "Tele aggr" is one of interest
def interesting_rows(table_soup):
    """Given a table's soup, return a list of the <tr> elements
    containing a <td> with a title element set to "(Jury|Tele) aggr".
    
    """
    return list(
        row for row in table_soup.find_all('tr')
        if len(row.find_all('td', attrs={'title': re.compile('(Jury|Tele) aggr')})) > 0
    )

country_result_rows = dict(
    (code, interesting_rows(table))
    for code, table in country_tables.items()
)



In [12]:

    
# Let's look at a couple of those rows in more detail:
country_result_rows['GB'][:2]









    Out[12]:





[<tr>
 <td class="country"><img alt="Flag" src="/img/upload/flags/medium/Ukraine.png"/> Ukraine</td>
 <td class="result">22</td><td class="result">16</td><td class="result">20</td><td class="result">18</td><td class="result">7</td>
 <td class="result" title="Jury aggr">18</td> <td class="space"> </td>
 <td class="result" title="Tele aggr">12</td> <td class="space"> </td>
 <td class="result" title="Combined rnk">16</td> <td class="result" title="Combined points"></td>
 </tr>, <tr>
 <td class="country"><img alt="Flag" src="/img/upload/flags/medium/Belarus.png"/> Belarus</td>
 <td class="result">17</td><td class="result">7</td><td class="result">17</td><td class="result">25</td><td class="result">19</td>
 <td class="result" title="Jury aggr">19</td> <td class="space"> </td>
 <td class="result" title="Tele aggr">19</td> <td class="space"> </td>
 <td class="result" title="Combined rnk">21</td> <td class="result" title="Combined points"></td>
 </tr>]



In [13]:

    
# For a particular country, we'll create a Pandas DataFrame object with the country code as
# the index and the jury, televoting and combined ranks as columns
def result_rows_to_dataframe(rows):
    """Take a list of <tr> elements containing results and combine them into a single
    Pandas DataFrame object.
    
    """
    # The row indices are the country codes. We get these from the <td> elements with CSS class "country".
    # We need to strip leading and trailing spaces from the textual content as there is a space between
    # the flag and the country name.
    indices = list(country_codes[row.find('td', class_='country').text.strip()] for row in rows)
    
    # We can use much the same method to generate pandas series for the data columns. Some rows won't have
    # a particular entry since some countrys only use jury voting. In those, we set missing data to NaN
    def make_series(title):
        data = []
        for row in rows:
            result_td = row.find('td', title=title)
            # If no result, use nan unless this is the combined result
            if result_td is None:
                data.append(np.nan)
            else:
                data.append(int(result_td.text))
        return pandas.Series(data = data, index = indices)
    
    jury_aggr = make_series('Jury aggr')
    tele_aggr = make_series('Tele aggr')
    combined = make_series('Combined rnk')
    
    if np.all(np.isnan(tele_aggr.data)):
        # If the tele votes are all NaNs, use the jury vote alone for combined
        combined = jury_aggr
    elif np.all(np.isnan(jury_aggr.data)):
        # If the jury votes are all NaNs, use the tele vote alone for combined
        combined = tele_aggr
        
    return pandas.DataFrame({ 'jury': jury_aggr, 'tele': tele_aggr, 'combined': combined })



In [14]:

    
# Let's check the function for 10 rows of the UK voting
result_rows_to_dataframe(country_result_rows['GB'])[:10]









    Out[14]:






  
    
      
      combined
      jury
      tele
    
  
  
    
      UA
       16
       18
       12
    
    
      BY
       21
       19
       19
    
    
      AZ
       18
        8
       24
    
    
      IS
        7
       15
        4
    
    
      NO
       13
       11
       17
    
    
      RO
       17
       22
        9
    
    
      AM
       23
       24
       16
    
    
      ME
       25
       21
       25
    
    
      PL
       11
       25
        1
    
    
      GR
        9
       14
        7
    
  

10 rows × 3 columns



In [15]:

    
# Let's check the function for 10 rows of the Albainian (no-televote) voting
result_rows_to_dataframe(country_result_rows['AL'])[:10]









    Out[15]:






  
    
      
      combined
      jury
      tele
    
  
  
    
      UA
       19
       19
      NaN
    
    
      BY
       23
       23
      NaN
    
    
      AZ
       15
       15
      NaN
    
    
      IS
       24
       24
      NaN
    
    
      NO
       20
       20
      NaN
    
    
      RO
       16
       16
      NaN
    
    
      AM
       13
       13
      NaN
    
    
      ME
        5
        5
      NaN
    
    
      PL
       22
       22
      NaN
    
    
      GR
        9
        9
      NaN
    
  

10 rows × 3 columns



In [16]:

    
# Let's check the function for 10 rows of the Georgian (no-jury) voting
result_rows_to_dataframe(country_result_rows['GE'])[:10]









    Out[16]:






  
    
      
      combined
      jury
      tele
    
  
  
    
      UA
        5
      NaN
        5
    
    
      BY
       11
      NaN
       11
    
    
      AZ
        4
      NaN
        4
    
    
      IS
       19
      NaN
       19
    
    
      NO
       20
      NaN
       20
    
    
      RO
       15
      NaN
       15
    
    
      AM
        1
      NaN
        1
    
    
      ME
       25
      NaN
       25
    
    
      PL
       12
      NaN
       12
    
    
      GR
        7
      NaN
        7
    
  

10 rows × 3 columns



In [17]:

    
# Let's generate a raw rank table for each country
country_rankings = dict(
    (code, result_rows_to_dataframe(rows))
    for code, rows in country_result_rows.items()
)



In [18]:

    
# We can now write a function to convert ranking into points. The top 10 ranked countries
# are awarded 12, 10, 8, 7, 6, 5, 4, 3, 2, 1 points in descending order of rank

def rank_frame_to_points_frame(frame):
    new_frame_data = { }
    for col_name, series in frame.iteritems():
        series_points = np.zeros_like(series)
        
        if not np.all(np.isnan(series.data)):
            # Get the indices of the frame's data in ascending order
            sorted_idxs = np.argsort(series.data)

            # Assign points
            series_points[sorted_idxs[:10]] = [12, 10, 8, 7, 6, 5, 4, 3, 2, 1]
        
        # Record new series
        new_frame_data[col_name] = pandas.Series(data=series_points, index=series.index)
        
    return pandas.DataFrame(new_frame_data)



In [19]:

    
# Again, let's test with the UK
rank_frame_to_points_frame(country_rankings['GB'])[:10]









    Out[19]:






  
    
      
      combined
      jury
      tele
    
  
  
    
      UA
       0
       0
        0
    
    
      BY
       0
       0
        0
    
    
      AZ
       0
       3
        0
    
    
      IS
       4
       0
        7
    
    
      NO
       0
       0
        0
    
    
      RO
       0
       0
        2
    
    
      AM
       0
       0
        0
    
    
      ME
       0
       0
        0
    
    
      PL
       0
       0
       12
    
    
      GR
       2
       0
        4
    
  

10 rows × 3 columns



In [20]:

    
# Again, let's test with Albania
rank_frame_to_points_frame(country_rankings['AL'])[:10]









    Out[20]:






  
    
      
      combined
      jury
      tele
    
  
  
    
      UA
       0
       0
       0
    
    
      BY
       0
       0
       0
    
    
      AZ
       0
       0
       0
    
    
      IS
       0
       0
       0
    
    
      NO
       0
       0
       0
    
    
      RO
       0
       0
       0
    
    
      AM
       0
       0
       0
    
    
      ME
       6
       6
       0
    
    
      PL
       0
       0
       0
    
    
      GR
       2
       2
       0
    
  

10 rows × 3 columns



In [21]:

    
# Again, let's test with Georgia
rank_frame_to_points_frame(country_rankings['GE'])[:10]









    Out[21]:






  
    
      
      combined
      jury
      tele
    
  
  
    
      UA
        6
       0
        6
    
    
      BY
        0
       0
        0
    
    
      AZ
        7
       0
        7
    
    
      IS
        0
       0
        0
    
    
      NO
        0
       0
        0
    
    
      RO
        0
       0
        0
    
    
      AM
       12
       0
       12
    
    
      ME
        0
       0
        0
    
    
      PL
        0
       0
        0
    
    
      GR
        4
       0
        4
    
  

10 rows × 3 columns



In [22]:

    
# Let's find each country's awarded points
country_awarded_points = dict(
    (code, rank_frame_to_points_frame(rankings))
    for code, rankings in country_rankings.items()
)



In [23]:

    
# We're now in a position to compute a total points table

# Start with a points table full of zeros
total_points = pandas.DataFrame({
    'jury': np.zeros(len(country_codes)),
    'tele': np.zeros(len(country_codes)),
    'combined': np.zeros(len(country_codes)),
}, index=country_codes.values())

# Add each country's awarded points
for code, awarded_points in country_awarded_points.items():
    # Here we need to re-index the awarded points table to match the
    # total points table and, since a country does not vote for itself,
    # we fill missing values with 0
    total_points += awarded_points.reindex_like(total_points).fillna(0)
    
# Now add a column with the country name so that we don't have to read
# Eurovision country codes
names = list(country_codes.keys());
names_series = pandas.Series(names, index=list(country_codes[k] for k in names))
total_points.insert(0, 'name', names_series)



In [24]:

    
# So now let's check that we match the official winner if we look at combined scores
total_points.sort(columns='combined', ascending=False)[:10]









    Out[24]:






  
    
      
      name
      combined
      jury
      tele
    
  
  
    
      AT
               Austria
       290
       214
       306
    
    
      NL
       The Netherlands
       238
       200
       220
    
    
      SE
                Sweden
       218
       199
       173
    
    
      AM
               Armenia
       174
       113
       187
    
    
      HU
               Hungary
       143
       138
        83
    
    
      UA
               Ukraine
       113
        72
       112
    
    
      RU
                Russia
        89
        62
       132
    
    
      NO
                Norway
        88
       102
        39
    
    
      ES
                 Spain
        74
        83
        29
    
    
      DK
               Denmark
        74
        85
        42
    
  

10 rows × 4 columns



In [25]:

    
# Cool, that matches. How about jury alone?
total_points.sort(columns='jury', ascending=False)









    Out[25]:






  
    
      
      name
      combined
      jury
      tele
    
  
  
    
      AT
                Austria
       290
       214
       306
    
    
      NL
        The Netherlands
       238
       200
       220
    
    
      SE
                 Sweden
       218
       199
       173
    
    
      HU
                Hungary
       143
       138
        83
    
    
      MT
                  Malta
        32
       119
        12
    
    
      FI
                Finland
        72
       114
        36
    
    
      AM
                Armenia
       174
       113
       187
    
    
      NO
                 Norway
        88
       102
        39
    
    
      AZ
             Azerbaijan
        33
       101
        14
    
    
      DK
                Denmark
        74
        85
        42
    
    
      ES
                  Spain
        74
        83
        29
    
    
      UA
                Ukraine
       113
        72
       112
    
    
      RU
                 Russia
        89
        62
       132
    
    
      IS
                Iceland
        58
        59
        38
    
    
      DE
                Germany
        39
        56
        27
    
    
      RO
                Romania
        72
        51
       103
    
    
      BY
                Belarus
        43
        50
        56
    
    
      GB
         United Kingdom
        40
        49
        24
    
    
      ME
             Montenegro
        37
        48
        27
    
    
      GR
                 Greece
        35
        45
        41
    
    
      IT
                  Italy
        33
        37
        22
    
    
      CH
            Switzerland
        64
        26
       114
    
    
      PL
                 Poland
        62
        23
       162
    
    
      SI
               Slovenia
         9
        21
        15
    
    
      SM
             San Marino
        14
        16
        15
    
    
      FR
                 France
         2
         5
         1
    
    
      MK
       F.Y.R. Macedonia
         0
         0
         0
    
    
      PT
               Portugal
         0
         0
         0
    
    
      GE
                Georgia
         0
         0
         0
    
    
      IL
                 Israel
         0
         0
         0
    
    
      IE
                Ireland
         0
         0
         0
    
    
      LV
                 Latvia
         0
         0
         0
    
    
      LT
              Lithuania
         0
         0
         0
    
    
      MD
                Moldova
         0
         0
         0
    
    
      BE
                Belgium
         0
         0
         0
    
    
      AL
                Albania
         0
         0
         0
    
    
      EE
                Estonia
         0
         0
         0
    
  

37 rows × 4 columns



In [26]:

    
# How about televoting alone?
total_points.sort(columns='tele', ascending=False)









    Out[26]:






  
    
      
      name
      combined
      jury
      tele
    
  
  
    
      AT
                Austria
       290
       214
       306
    
    
      NL
        The Netherlands
       238
       200
       220
    
    
      AM
                Armenia
       174
       113
       187
    
    
      SE
                 Sweden
       218
       199
       173
    
    
      PL
                 Poland
        62
        23
       162
    
    
      RU
                 Russia
        89
        62
       132
    
    
      CH
            Switzerland
        64
        26
       114
    
    
      UA
                Ukraine
       113
        72
       112
    
    
      RO
                Romania
        72
        51
       103
    
    
      HU
                Hungary
       143
       138
        83
    
    
      BY
                Belarus
        43
        50
        56
    
    
      DK
                Denmark
        74
        85
        42
    
    
      GR
                 Greece
        35
        45
        41
    
    
      NO
                 Norway
        88
       102
        39
    
    
      IS
                Iceland
        58
        59
        38
    
    
      FI
                Finland
        72
       114
        36
    
    
      ES
                  Spain
        74
        83
        29
    
    
      DE
                Germany
        39
        56
        27
    
    
      ME
             Montenegro
        37
        48
        27
    
    
      GB
         United Kingdom
        40
        49
        24
    
    
      IT
                  Italy
        33
        37
        22
    
    
      SM
             San Marino
        14
        16
        15
    
    
      SI
               Slovenia
         9
        21
        15
    
    
      AZ
             Azerbaijan
        33
       101
        14
    
    
      MT
                  Malta
        32
       119
        12
    
    
      FR
                 France
         2
         5
         1
    
    
      PT
               Portugal
         0
         0
         0
    
    
      MK
       F.Y.R. Macedonia
         0
         0
         0
    
    
      GE
                Georgia
         0
         0
         0
    
    
      IE
                Ireland
         0
         0
         0
    
    
      LT
              Lithuania
         0
         0
         0
    
    
      IL
                 Israel
         0
         0
         0
    
    
      LV
                 Latvia
         0
         0
         0
    
    
      MD
                Moldova
         0
         0
         0
    
    
      BE
                Belgium
         0
         0
         0
    
    
      AL
                Albania
         0
         0
         0
    
    
      EE
                Estonia
         0
         0
         0
    
  

37 rows × 4 columns



In [27]:

    
# Save the result to an HTML file
with open('dist/index.html', 'w') as html:
    html.write('''
    <!DOCTYPE html>
    <html>
    <head>
        <title>Eurovision 2014 Jury- and Tele-voting</title>
        <link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css">
        <link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap-theme.min.css">
        <link rel="stylesheet" href="vendor/css/bootstrap-sortable.css">
    </head>
    <body>
        <div class="container">
        <h1 class="page-header">
        Eurovision 2014: Does tactical voting help?
        </h1>
        <p class="lead">
        Does the imposition of jury voting affect the Eurovision result? Below is a table of scores for
        Eurovision 2014 entries assuming points had been awarded for combined jury/televote score,
        jury score alone and televote score alone.
        </p>
        <p>
        Click on table column names to sort by that column.
        </p>
    ''')
    html.write(total_points.sort(columns='name', ascending=False).
                    to_html(index=False, classes=['table', 'sortable']).replace('border="1" ', ''))
    html.write('''
        <p>
        Assembled by <a href="https://richwareham.com/gplus">Rich Wareham</a>. Data scraped from
        <a href="http://www.eurovision.tv/">eurovision.tv</a> via
        <a href="https://github.com/rjw57/eurovision-2014/">an IPython notebook</a>.
        </p>
        </div>
        <script src="//code.jquery.com/jquery-1.11.0.min.js"></script>
        <script src="//netdna.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
        <script src="vendor/js/bootstrap-sortable.js"></script>
    </body>
    </html>
    ''')



In [27]:

	combined	jury	tele
UA	16	18	12
BY	21	19	19
AZ	18	8	24
IS	7	15	4
NO	13	11	17
RO	17	22	9
AM	23	24	16
ME	25	21	25
PL	11	25	1
GR	9	14	7

	combined	jury	tele
UA	19	19	NaN
BY	23	23	NaN
AZ	15	15	NaN
IS	24	24	NaN
NO	20	20	NaN
RO	16	16	NaN
AM	13	13	NaN
ME	5	5	NaN
PL	22	22	NaN
GR	9	9	NaN

	combined	jury	tele
UA	5	NaN	5
BY	11	NaN	11
AZ	4	NaN	4
IS	19	NaN	19
NO	20	NaN	20
RO	15	NaN	15
AM	1	NaN	1
ME	25	NaN	25
PL	12	NaN	12
GR	7	NaN	7

	name	combined	jury	tele
AT	Austria	290	214	306
NL	The Netherlands	238	200	220
SE	Sweden	218	199	173
AM	Armenia	174	113	187
HU	Hungary	143	138	83
UA	Ukraine	113	72	112
RU	Russia	89	62	132
NO	Norway	88	102	39
ES	Spain	74	83	29
DK	Denmark	74	85	42

	combined	jury	tele
UA	16	18	12
BY	21	19	19
AZ	18	8	24
IS	7	15	4
NO	13	11	17
RO	17	22	9
AM	23	24	16
ME	25	21	25
PL	11	25	1
GR	9	14	7

	combined	jury	tele
UA	19	19	NaN
BY	23	23	NaN
AZ	15	15	NaN
IS	24	24	NaN
NO	20	20	NaN
RO	16	16	NaN
AM	13	13	NaN
ME	5	5	NaN
PL	22	22	NaN
GR	9	9	NaN

	combined	jury	tele
UA	5	NaN	5
BY	11	NaN	11
AZ	4	NaN	4
IS	19	NaN	19
NO	20	NaN	20
RO	15	NaN	15
AM	1	NaN	1
ME	25	NaN	25
PL	12	NaN	12
GR	7	NaN	7

	combined	jury	tele
UA	16	18	12
BY	21	19	19
AZ	18	8	24
IS	7	15	4
NO	13	11	17
RO	17	22	9
AM	23	24	16
ME	25	21	25
PL	11	25	1
GR	9	14	7

	combined	jury	tele
UA	19	19	NaN
BY	23	23	NaN
AZ	15	15	NaN
IS	24	24	NaN
NO	20	20	NaN
RO	16	16	NaN
AM	13	13	NaN
ME	5	5	NaN
PL	22	22	NaN
GR	9	9	NaN

	combined	jury	tele
UA	5	NaN	5
BY	11	NaN	11
AZ	4	NaN	4
IS	19	NaN	19
NO	20	NaN	20
RO	15	NaN	15
AM	1	NaN	1
ME	25	NaN	25
PL	12	NaN	12
GR	7	NaN	7