In [1918]:
import numpy as np
import pandas as pd
import random as rd
from collections import defaultdict
Structure of overall applications.
Get data ready and split into two groups. Group 1 is prop on the impromptu motion on Day 1 and Group 2 is opp on the prepped motion of Day 1. On Day 2, Group 1 is opp on the impromptu and Group 2 is Prop on the impromptu. The alternation continues on Day 3 and Day 4.
Then the same process is applied for each day. First, pairs are formed between countries within groups. The pairs from both groups get paired into 2x2 blocks.
Day 1 to 4, with the sides that groups take alternating Pairs (for both groups) Blocks (update penalty scores)
In [1919]:
rankings = pd.read_csv('WSDCrankings.csv', sep=';')
rankings['penaltyStrength'] = pd.Series(np.repeat(0,62), index=rankings.index) # Create penalty columns
rankings['penaltyPowerdif'] = pd.Series(np.repeat(0,62), index=rankings.index)
rankings = rankings[:60]
rankings
Out[1919]:
In [1920]:
rankings.count()
Out[1920]:
In [1921]:
group1 = rankings['nation'][range(0,59,2)].as_matrix()
group2 = rankings['nation'][range(1,60,2)].as_matrix()
len(group1)
Out[1921]:
In [1922]:
# Set up dict to remember what nations have faced on another.
nations = rankings['nation'].as_matrix()
matchUps = defaultdict(list)
for nation in nations:
matchUps[nation].append('land')
In [1923]:
averageStrength = rankings['strength'].mean()
averageStrength
Out[1923]:
In [1924]:
def generatePairs(group):
# Pair countries based on two criteria: strength and penalty.
# Iterate through countries in the group.
# Take the sum of the difference between the strength and penalty for each combination.
# Choose partner with lowest sum.
# Put paired countries on list that are no longer available for partnering.
# Repeat.
# Create a table to store all the matches in.
pairTable = pd.DataFrame(columns=('nation', 'pairno','groupno'))
pairCount = 0
# If the group I am matching is group 2, I remove group 1 countries.
if group is group2:
pairCount += 100 # We should be able to identify pairs from group 2. They are pairs with an id > 100.
groupno = 2
# Below does the same as a for loop but is more efficient.
countrieslist = [x for x in rankings['nation'].as_matrix().tolist() if x not in group1]
else:
groupno = 1
# Same as above
countrieslist = [x for x in rankings['nation'].as_matrix().tolist() if x not in group2]
for countryOne in group:
if countryOne in countrieslist: # If country was already matched, can't match again.
# Remove now, so it is not matched with self and is not matched again.
countrieslist.remove(countryOne)
pairCount += 1 # Get ready to match a pair with this number
# Filter the table for countries still requiring a matching
matchableCountries = rankings.loc[rankings['nation'].isin(countrieslist)]
# Compile a score of strength match, strength penalty and powerdif penalty.
# Following the logic that the Average strength of a pair is the pair's scores divided by two we can find the
# country that brings the pair closest to the average.
bestMatchStrength = 2 * averageStrength - rankings['strength'].loc[rankings['nation'] == countryOne].values[0]
strengthError = np.abs(bestMatchStrength - matchableCountries['strength'].values)
bestPenaltyPowerdif = rankings['penaltyPowerdif'].loc[rankings['nation'] == countryOne].values[0]
penaltyPowerdifError = np.abs(bestPenaltyPowerdif - matchableCountries['penaltyPowerdif'].values)
bestPenaltyStrength = rankings['penaltyStrength'].loc[rankings['nation'] == countryOne].values[0]
penaltyStrengthError = np.abs(bestPenaltyStrength - matchableCountries['penaltyStrength'].values)
penaltyTotal = strengthError + penaltyPowerdifError + penaltyStrengthError
indexOfBestMatch = np.argmin(penaltyTotal) # Get index of lowest penalty score.
bestMatchedCountries = [matchableCountries['nation'].iloc[indexOfBestMatch]]
bestMatchedCountry = bestMatchedCountries[rd.randint(0,len(bestMatchedCountries) - 1)]
countrieslist.remove(bestMatchedCountry)
# Power differential is the difference between the strenghts of the paired nations.
bestMatchedCountryStrength = rankings['strength'].loc[rankings['nation'] == bestMatchedCountry].values
countryOneStrenght = rankings['strength'].loc[rankings['nation'] == countryOne].values[0]
powerdif = np.abs(bestMatchedCountryStrength - countryOneStrenght)
newPairing = pd.DataFrame({'nation': [countryOne, bestMatchedCountry],
'pairno': [pairCount, pairCount],
'groupno': [groupno, groupno],
'powerdif': [powerdif, powerdif]})
pairTable = pd.concat([pairTable, newPairing])
return(pairTable)
In [1925]:
def generateBlocks(pairTable):
attributesTable = pd.DataFrame(columns=('pairno', 'strength','penalty', 'groupno')) # Table to collect the attributes data.
pairNumbers = np.unique(pairTable['pairno'].values) # Get all unique pairs.
averagePowerdif = pairTable['powerdif'].mean()
nedPair = pairTable['pairno'].loc[pairTable['nation'] == 'Bangaldesh'].values
for pairNumber in pairNumbers:
pairCountries = pairTable['nation'].loc[pairTable['pairno'] == pairNumber].values # Get the two countries in pair.
meanStrength = np.mean(rankings['strength'].loc[rankings['nation'].isin(pairCountries)])
meanPenaltyStrength = np.mean(rankings['penaltyStrength'].loc[rankings['nation'].isin(pairCountries)])
meanPenaltyPowerdif = np.mean(rankings['penaltyPowerdif'].loc[rankings['nation'].isin(pairCountries)])
powerdif = np.mean(pairTable['powerdif'].loc[pairTable['pairno'] == pairNumber])
powerdif = powerdif.item(0)
groupNo = pairTable['groupno'].loc[pairTable['pairno'] == pairNumber].values[0]
pairAttributes = pd.DataFrame({'pairno': [pairNumber],
'strength': [meanStrength],
'powerdif': [powerdif],
'groupno': [groupNo],
'penaltyStrength': [meanPenaltyStrength],
'penaltyPowerdif': [meanPenaltyPowerdif],
})
attributesTable = pd.concat([attributesTable, pairAttributes]) # Add results to attributes tables.
# End of for loop.
# Let's make some blocks!
unmatchedPairs = pairNumbers.tolist() # List of all pair numbers, because all remain to be matched.
blockTable = pd.DataFrame(columns=('pairno', 'blockno','penaltyStrength', 'penaltyPowerdif')) # Table with all the blocks.
blockCount = 0
attributesTable = attributesTable.sort_values(by = "penaltyStrength")
pairNumbers = attributesTable['pairno'].values
if secondTry == True: # If it didn't work sorted by priority, let's scramble the order!
pairNumbers = rd.sample(pairNumbers, len(pairNumbers))
for pairNumber in pairNumbers:
if pairNumber in unmatchedPairs:
# Get data necessary to sort legal matches.
groupNo = attributesTable['groupno'].loc[attributesTable['pairno'] == pairNumber].values[0]
unmatchedPairs.remove(pairNumber) # Remove so pair doesn't match itself.
blockCount += 1 # Set id for new block.
# Statement checks if the group is from the other group and filters out already matched pairs.
matchablePairs = attributesTable.loc[(attributesTable['groupno'] != groupNo) &
(attributesTable['pairno'].isin(unmatchedPairs))]
if len(matchablePairs.index) < 1:
print("matchablePairs is empty when created. Here is attributesTable.")
print(attributesTable)
#elif len(matchablePairs.index) < 5:
#print(["There are fewer than 5 matchable pairs. These are:",matchablePairs['pairno'].values])
# Also filter out pairs with countries already have been seen.
pairCountries = pairTable['nation'].loc[pairTable['pairno'] == pairNumber].values
countriesAlreadyMet = [matchUps[pairCountries[0]], matchUps[pairCountries[1]]]
countriesAlreadyMet = [item for sublist in countriesAlreadyMet for item in sublist]
countriesAlreadyMet = [item for sublist in countriesAlreadyMet for item in sublist]
clashPairs = pairTable['pairno'].loc[pairTable['nation'].isin(list(countriesAlreadyMet))].values
matchablePairs = matchablePairs.loc[~matchablePairs['pairno'].isin(clashPairs)]
if len(matchablePairs.index) < 1:
print("matchablePairs goes empty after filtering clashPairs.")
return "matchablePairs empty!"
# Vector with absolute error margin strength
pairStrength = attributesTable['strength'].loc[attributesTable['pairno'] == pairNumber].values[0]
bestMatchStrength = averageStrength + attributesTable['penaltyStrength'].loc[attributesTable['pairno'] == pairNumber].values[0]
strengthError = bestMatchStrength - matchablePairs['strength'].values
# Vector with absolute error margin powerdif
pairPowerdif = attributesTable['powerdif'].loc[attributesTable['pairno'] == pairNumber].values[0]
bestMatchPowerdif = averagePowerdif + attributesTable['penaltyPowerdif'].loc[attributesTable['pairno'] == pairNumber].values
powerdifError = bestMatchPowerdif - matchablePairs['powerdif'].values
# Also determine quality of match 'the other way around'
listofErrorsStrength = []
listofErrorsPowerdif = []
for otherPair in matchablePairs['pairno'].values:
otherPairPowerdif = attributesTable['powerdif'].loc[attributesTable['pairno'] == otherPair].values[0]
otherBestMatchPowerdif = averagePowerdif + attributesTable['penaltyPowerdif'].loc[attributesTable['pairno'] == otherPair].values[0]
otherBestMatchPowerdif = otherBestMatchPowerdif.item(0)
otherPowerdifError = otherBestMatchPowerdif - pairPowerdif
otherPowerdifError = otherPowerdifError.item(0)
otherPairStrength = attributesTable['strength'].loc[attributesTable['pairno'] == otherPair].values[0]
otherBestMatchStrength = averageStrength + attributesTable['penaltyStrength'].loc[attributesTable['pairno'] == otherPair].values[0]
otherStrengthError = otherBestMatchStrength - pairStrength
listofErrorsStrength.append(otherStrengthError)
listofErrorsPowerdif.append(otherPowerdifError)
# Vector with overall penalty score
if len(listofErrorsPowerdif) < 1:
print(['listofErrorsPowerdif is empty!',listofErrorsPowerdif, pairNumber, otherPair])
print(['Is list of strenghts errors empty?',listofErrorsStrength, pairNumber, otherPair])
print(['Are there countries to match against?', matchablePairs])
penaltyScore = sum([np.abs(strengthError)*1.5, np.abs(powerdifError), np.abs(listofErrorsStrength)*1.5, np.abs(listofErrorsPowerdif)]) # Calculate total penalty
try:
indexOfBestMatchInMatchablePairs = np.argmin(penaltyScore) # Get index of lowest penalty score.
except(ValueError):
print("Geen indexOfBestMatchInMatchablePairs!")
if indexOfBestMatchInMatchablePairs > len(penaltyScore):
print('Impossible index.')
try:
bestMatchedPair = matchablePairs['pairno'].iloc[indexOfBestMatchInMatchablePairs]
except(IndexError):
print("Geen bestMatchedPair!")
bestMatchedStrength = matchablePairs['strength'].loc[matchablePairs['pairno'] == bestMatchedPair].values
bestMatchedPowerdif = matchablePairs['powerdif'].loc[matchablePairs['pairno'] == bestMatchedPair].values
otherBestMatchStrength = averageStrength + matchablePairs['penaltyStrength'].loc[matchablePairs['pairno'] == bestMatchedPair].values[0]
otherBestMatchPowerdif = averagePowerdif + matchablePairs['penaltyPowerdif'].loc[matchablePairs['pairno'] == bestMatchedPair].values[0]
otherBestMatchedPowerdif = listofErrorsPowerdif[indexOfBestMatchInMatchablePairs]
unmatchedPairs.remove(bestMatchedPair)
if pairNumber == nedPair:
print(bestMatchedPowerdif.tolist() - bestMatchPowerdif)
elif bestMatchedPair == nedPair:
print(pairPowerdif.tolist() - bestMatchPowerdif)
# Calculate the penalty scores for strength and powerdif.
penaltyPairStrength = averageStrength - bestMatchedStrength # Strength
penaltyMatchedPairStrength = averageStrength - pairStrength
penaltyPairPowerdif = averagePowerdif - bestMatchedPowerdif # Powerdif
penaltyMatchedPairPowerdif = averagePowerdif - attributesTable['powerdif'].loc[attributesTable['pairno'] == pairNumber].values
if (pairNumber == 1 or bestMatchedPair == 1):
if pairNumber == 1:
print('Pair 1 is main pair.')
else:
print('Pair 1 is matched pair.')
print(['powerdif of pair',attributesTable['powerdif'].loc[attributesTable['pairno'] == pairNumber].values])
print(['best match for pair:',bestMatchPowerdif])
print(['matched powerdif:',bestMatchedPowerdif])
print(['penalties from block',penaltyPairPowerdif])
block1 = pd.DataFrame({'pairno': [pairNumber],
'blockno': blockCount,
'penaltyStrength': penaltyPairStrength.item(0),
'penaltyPowerdif': penaltyPairPowerdif.item(0)
})
block2 = pd.DataFrame({'pairno': [bestMatchedPair],
'blockno': blockCount,
'penaltyStrength': penaltyMatchedPairStrength,
'penaltyPowerdif': penaltyMatchedPairPowerdif.item(0)
})
blockTable = pd.concat([blockTable, block1, block2])
return blockTable
In [1926]:
sum([1,2,3])
Out[1926]:
In [1927]:
def updatePenalties(blockTable, pairTable):
# Fetch PairNumber
for pairNumber in pairTable['pairno'].unique():
# Get countries in pair
countryOne = pairTable['nation'].loc[pairTable['pairno'] == pairNumber].values[0]
countryTwo = pairTable['nation'].loc[pairTable['pairno'] == pairNumber].values[1]
# Get pair penalty updates
try:
strengthUpdate = blockTable['penaltyStrength'].loc[blockTable['pairno'] == pairNumber].values
except(KeyError):
print(blockTable)
powerdifUpdate = blockTable['penaltyPowerdif'].loc[blockTable['pairno'] == pairNumber].values
if pairNumber == 1:
print(['powerdifUpdate:',powerdifUpdate])
for country in [countryOne, countryTwo]:
countryIndex = rankings.loc[rankings['nation'] == country].index
strengthUpdated = rankings['penaltyStrength'].loc[rankings['nation'] == country] + strengthUpdate
rankings.set_value(countryIndex, 'penaltyStrength', strengthUpdated)
powerdifUpdated = rankings['penaltyPowerdif'].loc[rankings['nation'] == country] + powerdifUpdate
rankings.set_value(countryIndex, 'penaltyPowerdif', powerdifUpdated)
if (pairNumber == 1 and country == countryOne):
print(['powerdifUpdated (in rankings):',powerdifUpdated])
# Don't forget to sort by penalty at the end!
In [ ]:
In [1928]:
def generateDayDraw(dayno): # This function will return a table with the matchups of the day
pairs1 = generatePairs(group1) # Generate pairs within each group.
pairs2 = generatePairs(group2)
pairTable = pd.concat([pairs1,pairs2]) # Concat into one pairTable
global secondTry
secondTry = False # First try to create blockTable.
blockTable = generateBlocks(pairTable)
if type(blockTable) == str:
print("blockTable is string, so we are going to try this again!")
secondTry = True
while type(blockTable) == str:
blockTable = generateBlocks(pairTable) # Create blocks. Retry if fail and return string.
updatePenalties(blockTable, pairTable) # Update penalties based on blocks.
rankings.sort_values(by = 'penaltyPowerdif', ascending = False)
# Create matchups from blocks
dayDraw = pd.DataFrame(columns=('round', 'prop','opp'))
if dayno == 1:
dayFirstRound = 1
daySecondRound = 2
elif dayno == 2:
dayFirstRound = 3
daySecondRound = 4
elif dayno == 3:
dayFirstRound = 5
daySecondRound = 6
elif dayno == 4:
dayFirstRound = 7
daySecondRound = 8
else:
print("No correct day specified. Please try again!")
for blockNo in np.unique(blockTable['blockno'].values):
pairs = blockTable['pairno'].loc[blockTable['blockno'] == blockNo].values
group1Pair = pairs[pairs < 100]
group2Pair = pairs[pairs > 100]
group1CountryOne = pairTable['nation'].loc[pairTable['pairno'] == group1Pair[0]].values[0]
group1CountryTwo = pairTable['nation'].loc[pairTable['pairno'] == group1Pair[0]].values[1]
group2CountryOne = pairTable['nation'].loc[pairTable['pairno'] == group2Pair[0]].values[0]
group2CountryTwo = pairTable['nation'].loc[pairTable['pairno'] == group2Pair[0]].values[1]
blockCountries = [group1CountryOne,group1CountryTwo,group2CountryOne,group2CountryTwo]
# Countries from group 1 are prop in the first round and opp in the second round on day 1 and day 3.
if dayno == 1 or dayno == 3:
blockDraw = pd.DataFrame({'round': [dayFirstRound, dayFirstRound, daySecondRound, daySecondRound],
'prop': [group1CountryOne, group1CountryTwo, group2CountryTwo, group2CountryOne],
'opp': [group2CountryOne, group2CountryTwo, group1CountryOne, group1CountryTwo]
})
elif dayno == 2 or dayno == 4:
blockDraw = pd.DataFrame({'round': [dayFirstRound, dayFirstRound, daySecondRound, daySecondRound],
'prop': [group2CountryOne, group2CountryTwo, group1CountryTwo, group1CountryOne],
'opp': [group1CountryOne, group1CountryTwo, group2CountryOne, group2CountryTwo]
})
dayDraw = pd.concat([dayDraw, blockDraw])
# Add countries to the list of 'seen' for next rounds!
for country in [group1CountryOne,group1CountryTwo]:
matchUps[country].append([group2CountryOne,group2CountryTwo])
for country in [group2CountryOne,group2CountryTwo]:
matchUps[country].append([group1CountryOne,group1CountryTwo])
return dayDraw.sort_values(by = 'round')
In [1929]:
def generateDraw():
matchedPowerdifs = []
draw = pd.DataFrame(columns=('round', 'prop','opp'))
for day in range(1,5):
dayDraw = generateDayDraw(day)
draw = pd.concat([draw,dayDraw])
return draw
In [1930]:
draw = generateDraw()
In [1944]:
pd.concat([draw.loc[draw['prop'] == 'Netherlands'],
draw.loc[draw['opp'] == 'Netherlands']]).sort_values('round')
Out[1944]:
In [1945]:
rankings['strength'].loc[rankings['nation'].isin([item for sublist in matchUps['Netherlands'] for item in sublist])]
Out[1945]:
In [1946]:
rankings['strength'].loc[rankings['nation'].isin([item for sublist in matchUps['Netherlands'] for item in sublist])].mean()
Out[1946]:
In [1947]:
matchUps['Netherlands']
Out[1947]:
In [1935]:
rankings.sort_values(by = "penaltyPowerdif", ascending = False)
Out[1935]:
In [1936]:
rankings['penaltyStrength'].mean()
Out[1936]:
In [1937]:
rankings['penaltyPowerdif'].mean()
Out[1937]: