In [1]:
# Dependencies

install.packages(c('readxl', 'qgraph', 'reshape2', 'RColorBrewer'))


Installing packages into ‘/home/sharedvi/R/x86_64-pc-linux-gnu-library/3.4’
(as ‘lib’ is unspecified)

In [2]:
# We'll need this for XLSX files:
library(readxl)
rm(list=ls())

Load and additionally preprocess the data:


In [3]:
# Open file
eurovision = data.frame(read_xlsx('Eurovision-Processed.xlsx', ))

# Load everything as factors
eurovision$Year = factor(eurovision$Year, ordered=T)
eurovision$Round = factor(eurovision$Round, ordered=T, levels=c('sf','sf1','sf2','f'))
eurovision$Voters = factor(eurovision$Voters)

#For simplicity replace S&M with Serbia (sorry Montenegro)
eurovision$From_country[eurovision$From_country=='Serbia & Montenegro'] = 'Serbia'
eurovision$To_country[eurovision$To_country=='Serbia & Montenegro'] = 'Serbia'

# Shorten long names so that they fit on diagrams
eurovision$From_country[eurovision$From_country=='Bosnia & Herzegovina'] = 'Bosnia&H.'
eurovision$To_country[eurovision$To_country=='Bosnia & Herzegovina'] = 'Bosnia&H.'

eurovision$From_country[eurovision$From_country=='F.Y.R. Macedonia'] = 'Macedonia'
eurovision$To_country[eurovision$To_country=='F.Y.R. Macedonia'] = 'Macedonia'

eurovision$From_country[eurovision$From_country=='Czech Republic'] = 'Czech R.'
eurovision$To_country[eurovision$To_country=='Czech Republic'] = 'Czech R.'

eurovision$From_country[eurovision$From_country=='United Kingdom'] = 'UK'
eurovision$To_country[eurovision$To_country=='United Kingdom'] = 'UK'

# Factor country names
eurovision$From_country = factor(eurovision$From_country)
eurovision$To_country = factor(eurovision$To_country)

# Discard votes from countries for themselves, they're always 0

eurovision = eurovision[eurovision$From_country != eurovision$To_country, ]

# Discard 2004 semifinal for countries that didn't vote in that:
eurovision = eurovision[eurovision$Year != '2004' | eurovision$Round != 'sf' | !(eurovision$From_country %in% c("France","Poland","Russia")),]

# Look what we got
head(eurovision)


YearRoundVotersFrom_countryTo_countryPoints
21975 f J BelgiumFinland 0
31975 f J BelgiumFrance 2
41975 f J BelgiumGermany 0
51975 f J BelgiumIreland12
61975 f J BelgiumIsrael 1
71975 f J BelgiumItaly 6

Do some sanity checks


In [4]:
# Sanity check 1: sets of countries in both fields are identical
all(levels(eurovision$From_country) == levels(eurovision$To_country))


TRUE

In [5]:
# Sanity check 2: every country gave away 11 distinct scores
table(aggregate(Points~From_country+Year+Round, data=eurovision, FUN=function(x){length(unique(x))})$Points)


  11 
1670 

In [6]:
# Sanity check 2.5: countries that didn't give away 11 distinct scores
ver_points = aggregate(Points~From_country+Year+Round+Voters, data=eurovision, FUN=function(c){length(unique(c))})
ver_points[ver_points$Points!=11,]


From_countryYearRoundVotersPoints

In [7]:
# Discard countries that participated in 3 contests or less: 

# List of participations:
participations = aggregate(Year ~ From_country, eurovision, FUN=function(x){length(unique(x))})
participations = participations[order(participations$Year),]
participations

# Excluded list:
exclude = participations$From_country[participations$Year <= 3]
eurovision = eurovision[!(eurovision$From_country %in% exclude | eurovision$To_country %in% exclude),]


From_countryYear
34Morocco 1
4Australia 3
2Andorra 6
13Czech R. 6
43Slovakia 7
32Monaco 8
41San Marino 8
33Montenegro 9
6Azerbaijan 10
18Georgia 10
3Armenia 11
10Bulgaria 11
31Moldova 13
42Serbia 13
51Yugoslavia 13
1Albania 14
7Belarus 14
50Ukraine 14
21Hungary 15
29Macedonia 17
26Latvia 18
27Lithuania 18
39Romania 18
9Bosnia&H. 19
28Luxembourg 19
37Poland 20
40Russia 20
11Croatia 23
15Estonia 23
44Slovenia 23
25Italy 24
30Malta 28
22Iceland 30
12Cyprus 34
48Turkey 34
5Austria 36
14Denmark 36
20Greece 37
16Finland 38
24Israel 38
35Netherlands39
38Portugal 39
47Switzerland39
8Belgium 40
23Ireland 41
17France 42
19Germany 42
36Norway 42
46Sweden 42
45Spain 43
49UK 43

In [8]:
# Re-factor the list after exclusion
eurovision$From_country = factor(eurovision$From_country)
eurovision$To_country = factor(eurovision$To_country)

In [9]:
# Calculate average score for each country in each round:
avg_scores = aggregate(Points ~ To_country+Year+Round+Voters, data=eurovision, mean)

In [10]:
# Calculate biases by substracting average score from votes
biases = eurovision

# This function (signed square of difference) is used to calculate bias:
sigsq <- function(x,y){(x-y)^2*sign(x-y)}

# Calculate bias:
for(i in 1:nrow(biases)) {
    biases$Points[i] = sigsq(biases$Points[i], avg_scores$Points[avg_scores$Year == biases[i, 'Year'] & avg_scores$Round == biases[i, 'Round'] & avg_scores$To_country == biases[i, 'To_country']][1])
}

In [11]:
# Aggregate bias scores over all years and tours:

aggr.bias = aggregate(Points~From_country+To_country, data=biases, FUN=mean)
aggr.bias = aggr.bias[order(aggr.bias$From_country),]

In [12]:
# Look at the most and least biased country pairs

aggr.bias[order(aggr.bias$Points, decreasing = T),]


From_countryTo_countryPoints
85Spain Andorra 100.36205
1928Andorra Spain 98.99650
1460Serbia Montenegro 87.94754
26Macedonia Albania 81.24176
1370Romania Moldova 80.87022
1686Moldova Romania 79.59022
481Greece Cyprus 76.70039
849Cyprus Greece 73.12261
1611Andorra Portugal 71.43950
1279Serbia Macedonia 70.79639
2075AzerbaijanTurkey 66.71288
1242Albania Macedonia 61.38359
1824MontenegroSerbia 60.75346
1077Albania Italy 56.11229
1424Albania Montenegro 55.95970
1271MontenegroMacedonia 55.84327
1148Lithuania Latvia 55.08256
227Turkey Azerbaijan 53.75507
275Ukraine Belarus 53.72198
627Latvia Estonia 53.02138
354Montenegro Bosnia&H. 52.60407
106Georgia Armenia 51.52404
1709Belarus Russia 50.96996
617Finland Estonia 50.58039
748Armenia Georgia 50.31625
30MontenegroAlbania 50.29516
1801Bosnia&H. Serbia 47.95811
265Russia Belarus 47.32202
424Bosnia&H. Croatia 46.75370
18Greece Albania 46.71958
862Latvia Greece -11.15534
1497Moldova Netherlands-11.24309
2110Serbia Turkey -11.27535
2073Armenia Turkey -11.42813
758Denmark Georgia -11.48964
111Ireland Armenia -11.52371
2182France Ukraine -11.86288
2167Albania Ukraine -11.97886
1472Azerbaijan Netherlands-12.30594
1979Azerbaijan Sweden -12.72011
1823Monaco Serbia -12.72884
915Monaco Hungary -12.91482
2095Latvia Turkey -13.27765
1746SwitzerlandRussia -13.60521
2082Czech R. Turkey -14.14808
1819Lithuania Serbia -14.17520
119Monaco Armenia -14.29547
1527Czech R. Norway -14.84361
2210SwitzerlandUkraine -14.88330
1364Monaco Moldova -15.06545
1499Montenegro Netherlands-15.52775
92Andorra Armenia -15.73666
1641Montenegro Portugal -15.98554
187Armenia Azerbaijan -17.18304
94Azerbaijan Armenia -17.82632
325Andorra Bosnia&H. -18.17740
1837Turkey Serbia -21.24264
1733Monaco Russia -21.30934
1807Estonia Serbia -21.50463
1795Andorra Serbia -34.26385

In [13]:
# Load this for visualisation

library(qgraph)

In [14]:
# Plot biases on a weighted directional graph: (uncolored for now)

qgraph(aggr.bias, minimum=25, cut=40)



In [15]:
# This one is needed to convert between long and wide matrices forms

library('reshape2')

In [16]:
# Cast long form to wide form, turn country names into rownames, replace NA's with 0

aggr.distance = dcast(aggr.bias, From_country~To_country)
aggr.distance.matrix = aggr.distance[,-1]
aggr.distance.matrix = as.matrix(aggr.distance.matrix)
rownames(aggr.distance.matrix) = aggr.distance[,1]

aggr.distance.matrix[is.na(aggr.distance.matrix)] = 0


Using Points as value column: use value.var to override.

In [17]:
# To make matrix symmetric, we take the larger (by absolute value) number out of (m,n) and (n,m)

indeces = as.numeric(abs(aggr.distance.matrix)>=abs(t(aggr.distance.matrix)))
aggr.distance.matrix.sym = aggr.distance.matrix*indeces + t(aggr.distance.matrix) * (1-indeces)

In [18]:
# Rescale this matrix so that Largest bias is smallest distance

aggr.distance.matrix.sym = max(aggr.distance.matrix.sym)-aggr.distance.matrix.sym

In [19]:
# Convert to distance matrix

aggr.distance.matrix.sym = as.dist(aggr.distance.matrix.sym)

In [20]:
# Run hierarchical clustering:

clust = hclust(aggr.distance.matrix.sym)

In [21]:
# Show the dendrogram

plot(clust)



In [22]:
# Split into 25 clusters 

cl_groups=cutree(clust, k=25)
cl_groups=cl_groups[clust$order]
cl_groups

# Get grouplist - list of character vectors; each vector represent 1 cluster
grlist = list()
i=1
for(j in unique(cl_groups)) {
    countries = names(cl_groups)[cl_groups==j]
    countries = factor(countries, levels = levels(eurovision$From_country))
    grlist[[i]] = countries
    i=i+1
}

# Remove groups with 1 item

grlist2 = grlist[lapply(grlist,length) > 1]
grlist2


San Marino
22
Azerbaijan
5
Turkey
5
Israel
17
Yugoslavia
17
Belgium
7
Netherlands
7
Denmark
11
Iceland
11
Norway
11
Sweden
11
Andorra
2
Spain
2
France
13
Portugal
13
Estonia
12
Finland
12
Italy
18
Poland
18
Ireland
16
Latvia
16
Lithuania
16
Germany
14
Monaco
21
Austria
4
UK
25
Bosnia&H.
8
Montenegro
8
Serbia
8
Croatia
8
Slovenia
8
Switzerland
24
Albania
1
Macedonia
1
Slovakia
23
Czech R.
10
Hungary
15
Luxembourg
19
Malta
19
Armenia
3
Georgia
3
Russia
6
Belarus
6
Ukraine
6
Moldova
20
Romania
20
Bulgaria
9
Cyprus
9
Greece
9
    1. Azerbaijan
    2. Turkey
    1. Israel
    2. Yugoslavia
    1. Belgium
    2. Netherlands
    1. Denmark
    2. Iceland
    3. Norway
    4. Sweden
    1. Andorra
    2. Spain
    1. France
    2. Portugal
    1. Estonia
    2. Finland
    1. Italy
    2. Poland
    1. Ireland
    2. Latvia
    3. Lithuania
    1. Bosnia&H.
    2. Montenegro
    3. Serbia
    4. Croatia
    5. Slovenia
    1. Albania
    2. Macedonia
    1. Luxembourg
    2. Malta
    1. Armenia
    2. Georgia
    1. Russia
    2. Belarus
    3. Ukraine
    1. Moldova
    2. Romania
    1. Bulgaria
    2. Cyprus
    3. Greece

In [23]:
# Remove countries which are alone in a cluster

loners = unlist(grlist[lapply(grlist,length) == 1])
aggr.bias.clust = aggr.bias[!(aggr.bias$From_country %in% loners | aggr.bias$To_country %in% loners),]
aggr.bias.clust$From_country = factor(aggr.bias.clust$From_country)
aggr.bias.clust$To_country = factor(aggr.bias.clust$To_country)

# Re-factorize the list
grlist2 = lapply(grlist2, function(c) {c = factor(c,levels = levels(aggr.bias.clust$From_country))})

Now, visualizations!


In [24]:
# Need this for the color palette
library(RColorBrewer)

In [25]:
# A palette of background colors for graph circles

my_pal=c(brewer.pal(9,"Pastel1"), brewer.pal(8,"Pastel2"), brewer.pal(12, "Paired")[c(1,3,5,7,9,11)], brewer.pal(6,"Set2")[c(5,6)])

In [26]:
# A pallette of foreground colors for the dendrogram

my_pal2=c(brewer.pal(12,"Paired")[c(2,4,6,8,10,12)], brewer.pal(8,"Dark2"), brewer.pal(5,"Set1"), brewer.pal(8,"Accent")[5:8],brewer.pal(10,"RdGy")[c(2,10)])

In [27]:
# Re-draw our unclustered graph with colors

qgraph(aggr.bias, minimum=25, cut=40, color=my_pal[cl_groups][order(names(cl_groups))])

postscript("unclustered.eps", width = 6, height = 6, horizontal = FALSE, onefile = FALSE)
qgraph(aggr.bias, minimum=25, cut=40, color=my_pal[cl_groups][order(names(cl_groups))])
dev.off()


png: 2

In [28]:
# Unclustered graph with colors for negative items only

qgraph(aggr.bias[aggr.bias$Points<0,], minimum=15, color=my_pal[cl_groups][order(names(cl_groups))])

postscript("unclusteredNeg.eps", width = 6, height = 6, horizontal = FALSE, onefile = FALSE)
qgraph(aggr.bias[aggr.bias$Points<0,], minimum=15, color=my_pal[cl_groups][order(names(cl_groups))])
dev.off()


png: 2

In [29]:
# Draw a clustered graphs (customly rearranged palette)

qgraph(aggr.bias.clust, layout='groups', groups=grlist2, minimum=35, cut=50, color=my_pal[c(5,17,7,11,2,13,12,18,16,8,1,19,3,6,20,9)])

postscript("clustered.eps", width = 6, height = 6, horizontal = FALSE, onefile = FALSE)
qgraph(aggr.bias.clust, layout='groups', groups=grlist2, minimum=35, cut=50, color=my_pal[c(5,17,7,11,2,13,12,18,16,8,1,19,3,6,20,9)])
dev.off()


png: 2

In [30]:
# Export clusters into CSV file for map visualisation
cl_groups2  = cl_groups[!names(cl_groups) %in% loners]
ccc = sapply(cl_groups2, function(x){which.max(unique(cl_groups2)==x)})
write.csv(cbind(names(ccc),unname(ccc)), 'cl.csv')

In [31]:
# Count number of wins

a1 = aggregate(Points~To_country+Year+Voters, eurovision, subset = Round=="f", sum)
a2 = aggregate(Points~To_country+Year, a1, mean)
a3 = aggregate(Points~Year, a1, which.max)

winners = c()
for (y in a3$Year) {
    aa4 = a2[a2$Year == y,]
    winners = c(winners, as.character(aa4$To_country[which.max(aa4$Points)]))
}
winners = table(winners)
winners


winners
    Austria  Azerbaijan     Belgium     Denmark     Estonia     Finland 
          1           1           1           2           1           1 
     France     Germany      Greece     Ireland      Israel       Italy 
          2           2           1           6           3           1 
     Latvia  Luxembourg Netherlands      Norway    Portugal      Russia 
          1           1           1           3           1           1 
     Serbia      Sweden Switzerland      Turkey          UK     Ukraine 
          1           4           1           1           3           2 
 Yugoslavia 
          1 

In [32]:
# Extend wins with 0

winners[levels(eurovision$To_country)[!levels(eurovision$To_country) %in% names(winners)]]=0
winners=winners[order(names(winners))]

In [33]:
# For dendrogram fan layout

suppressPackageStartupMessages(library(ape))

In [34]:
# For circle

library(plotrix)

In [35]:
# Plot dendrogram with circular layout, draw the level circle

plot(as.phylo(clust), type = "radial",  tip.color =  my_pal2[cl_groups][order(names(cl_groups))], label.offset=0.04, cex=0.6)
draw.circle(0,0,0.94, border = 'gray', lty='dashed')

postscript("printsPerfectly.eps", width = 6, height = 6, horizontal = FALSE, onefile = FALSE)
plot(as.phylo(clust), type = "radial",  tip.color =  my_pal2[cl_groups][order(names(cl_groups))], label.offset=0.04, cex=0.6)
draw.circle(0,0,0.94, border = 'gray', lty='dashed')
dev.off()


png: 2

In [36]:
# Re-sort bias to analyse how mutual the arrows are

aggr.bias.2 = aggr.bias[order(aggr.bias$To_country),]
head(aggr.bias.2,20)


From_countryTo_countryPoints
Andorra Albania -2.4968906
Armenia Albania -1.9974329
Austria Albania 7.6983172
AzerbaijanAlbania 0.5321833
Belarus Albania -5.3390520
Belgium Albania 5.4738469
Bosnia&H. Albania 5.9029563
Bulgaria Albania -1.8414504
Croatia Albania 19.0443762
Cyprus Albania -4.1637952
Czech R. Albania -2.0225044
Denmark Albania -0.6275144
Estonia Albania -6.1742963
Finland Albania -1.3782228
France Albania -2.1389018
Georgia Albania -2.5838171
Germany Albania 0.7955888
Greece Albania 46.7195833
Hungary Albania 0.1842059
Iceland Albania 0.5571413

In [37]:
# Here, two mutual bias scores between the countries are geometrically averaged:

ab3 = aggr.bias
ab3$Points = sign(aggr.bias$Points * aggr.bias.2$Points) * sqrt(abs(aggr.bias$Points * aggr.bias.2$Points))
ab3 = ab3[order(ab3$Points, decreasing = T),]
head(ab3)
tail(ab3)


From_countryTo_countryPoints
1928Andorra Spain 99.67693
85Spain Andorra 99.67693
1242Albania Macedonia 70.61806
227Turkey Azerbaijan59.88452
1911MontenegroSlovenia 54.75479
1686Moldova Romania 54.58501
From_countryTo_countryPoints
119Monaco Armenia -22.78478
2075AzerbaijanTurkey -23.81112
124Portugal Armenia -25.72876
529Greece Czech R. -26.24580
1279Serbia Macedonia -31.67890
1460Serbia Montenegro-33.45848

In [38]:
# Plot graph of the bias symmetricity:

qgraph(ab3, minimum=15)

# Something didn't work as expected...



In [39]:
# Who is Estonia biased towards:

estb = aggr.bias[aggr.bias$From_country=="Estonia",]
estb[order(estb$Points, decreasing = T),]


From_countryTo_countryPoints
1137Estonia Latvia 32.5545744
663Estonia Finland 29.1778962
1717Estonia Russia 23.2028222
1988Estonia Sweden 16.8565226
2036Estonia Switzerland 11.5616235
1529Estonia Norway 8.6534081
759Estonia Georgia 7.9781957
900Estonia Hungary 6.4901383
946Estonia Iceland 6.4338352
568Estonia Denmark 5.7420918
288Estonia Belgium 5.3858450
1183Estonia Lithuania 5.2452928
149Estonia Austria 4.6877539
2180Estonia Ukraine 3.7877037
994Estonia Ireland 3.7741506
1481Estonia Netherlands 3.6981671
711Estonia France 3.5743829
59Estonia Andorra 2.4425681
1850Estonia Slovakia 2.2567448
476Estonia Cyprus 1.9627130
804Estonia Germany 1.8035730
1763Estonia San Marino 1.7582407
2132Estonia UK 0.8761830
1301Estonia Malta 0.6784959
1393Estonia Monaco 0.6748106
242Estonia Belarus 0.2349362
1577Estonia Poland -0.2844809
1893Estonia Slovenia -1.4848296
1940Estonia Spain -1.9728128
383Estonia Bulgaria -2.3657965
1623Estonia Portugal -2.6223165
429Estonia Croatia -2.9622247
1349Estonia Moldova -3.1700515
1255Estonia Macedonia -3.3573282
1042Estonia Israel -3.8256419
1437Estonia Montenegro -4.0482060
1671Estonia Romania -4.7003816
1089Estonia Italy -4.8205481
197Estonia Azerbaijan -5.3100408
13Estonia Albania -6.1742963
852Estonia Greece -7.1975083
524Estonia Czech R. -7.4298871
336Estonia Bosnia&H. -9.3170767
2084Estonia Turkey -10.0270763
103Estonia Armenia -10.5126774
1807Estonia Serbia -21.5046298

In [40]:
# By years: 1975-1984

aggr.bias.84 = aggregate(Points~From_country+To_country, data=biases, FUN=mean, subset = (Year >= 1975 & Year < 1985))
aggr.bias.84 = aggr.bias.84[order(aggr.bias.84$From_country),]

In [41]:
# By years: 1985-1994

aggr.bias.94 = aggregate(Points~From_country+To_country, data=biases, FUN=mean, subset = (Year >= 1985 & Year < 1995))
aggr.bias.94 = aggr.bias.94[order(aggr.bias.94$From_country),]

In [42]:
# By years: 1995-2004

aggr.bias.04 = aggregate(Points~From_country+To_country, data=biases, FUN=mean, subset = (Year >= 1995 & Year < 2005))
aggr.bias.04 = aggr.bias.04[order(aggr.bias.04$From_country),]

In [43]:
# By years: 2005-2017

aggr.bias.17 = aggregate(Points~From_country+To_country, data=biases, FUN=mean, subset = (Year >= 2005))
aggr.bias.17 = aggr.bias.17[order(aggr.bias.17$From_country),]

In [44]:
# Plot all of these:

In [45]:
qgraph(aggr.bias.84, minimum=10, cut=20)

postscript("84.eps", width = 6, height = 6, horizontal = FALSE, onefile = FALSE)
qgraph(aggr.bias.84, minimum=10, cut=20)
dev.off()


png: 2

In [46]:
qgraph(aggr.bias.94, minimum=20, cut=40)

postscript("94.eps", width = 6, height = 6, horizontal = FALSE, onefile = FALSE)
qgraph(aggr.bias.94, minimum=20, cut=40)
dev.off()


png: 2

In [47]:
qgraph(aggr.bias.04, minimum=30, cut=40)

postscript("04.eps", width = 6, height = 6, horizontal = FALSE, onefile = FALSE)
qgraph(aggr.bias.04, minimum=30, cut=40)
dev.off()


png: 2

In [48]:
qgraph(aggr.bias.17, minimum=30, cut=40)

postscript("17.eps", width = 6, height = 6, horizontal = FALSE, onefile = FALSE)
qgraph(aggr.bias.17, minimum=30, cut=40)
dev.off()


png: 2