In [1]:
class ComputeResourceInformation:
def __init__(self, processor="", memory="", disk="", operating_system="", kernel="", compiler="", file_system="", system_layer="", motherboard="", chipset="", graphics="", network_card="", monitor=""):
self.processor = processor
self.memory = memory
self.disk = disk
self.operating_system = operating_system
self.kernel = kernel
self.compiler = compiler
self.file_system = file_system
self.system_layer = system_layer
self.motherboard = motherboard
self.chipset = chipset
self.graphics = graphics
self.network_card = network_card
self.monitor = monitor
def as_map(self):
map_repr = {}
map_repr['Processor'] = self.processor
map_repr['Memory'] = self.memory
map_repr['Disk'] = self.disk
map_repr['OS'] = self.operating_system
map_repr['Kernel'] = self.kernel
map_repr['Compiler'] = self.compiler
map_repr['File System'] = self.file_system
map_repr['System Layer'] = self.system_layer
map_repr['Motherboard'] = self.motherboard
map_repr['Chipset'] = self.chipset
map_repr['Graphics'] = self.graphics
map_repr['Network'] = self.network_card
map_repr['Monitor'] = self.monitor
return map_repr
def __str__(self):
return str( self.as_map() )
In [2]:
class TestNotFound(Exception):
def __init__(self, value="Test Not Found"):
self.value = value
def __str__(self):
return repr(self.value)
In [3]:
import csv
class ComputeResourceTestResults:
def __init__(self, file = None):
self._results = {}
self._info = ComputeResourceInformation()
if file is not None:
self.load_from_file(file)
def update_resource_info(self, processor, memory, disk, operating_system, kernel, compiler, file_system,
system_layer='', motherboard='', chipset='', graphics='', network_card=''):
self._info.processor = processor
self._info.memory = memory
self._info.disk = disk
self._info.operating_system = operating_system
self._info.kernel = kernel
self._info.compiler = compiler
self._info.file_system = file_system
self._info.system_layer = system_layer
self._info.motherboard = motherboard
self._info.chipset = chipset
self._info.graphics = graphics
self._info.network_card = network_card
def add_test_results(self, test_name, test_results_values):
if test_name not in self._results:
self._results[test_name] = test_results_values
else:
self._results[test_name].extend( test_results_values )
def add_test_result(self, test_name, test_result_value):
results = []
results.append(test_result_value)
self.add_test_results(test_name, results)
def get_test_results(self, test_name):
try:
return self._results[test_name]
except KeyError as e:
raise TestNotFound("Test Not Found")
def delete_test_results(self, test_name):
try:
del self._results[test_name]
except KeyError as e:
raise TestNotFound("Test Not Found")
def load_from_file(self, file):
processor = ''
memory = ''
disk = ''
operating_system = ''
kernel = ''
compiler = ''
file_system = ''
system_layer = ''
motherboard = ''
chipset = ''
graphics = ''
network_card = ''
with open(file, newline='') as csvfile:
test_result_reader = csv.reader(csvfile, delimiter=',')
for row in test_result_reader:
#print(row)
if len(row) >= 2 and row[0] != ' ':
if row[0] == 'Processor':
processor = row[1] # Processor decoded
elif row[0] == 'Memory':
memory = row[1] # Memory decoded
elif row[0] == 'Disk':
disk = row[1] # Disk decoded
elif row[0] == 'OS':
operating_system = row[1] ## OS decoded
elif row[0] == 'Kernel':
kernel = row[1] # Kernel decoded
elif row[0] == 'Compiler':
compiler = row[1] # Compiler decoded
elif row[0] == 'File-System':
file_system = row[1] # File System decoded
elif row[0] == 'System Layer':
system_layer = row[1] # System layer decoded
elif row[0] == 'Motherboard':
motherboard = row[1] # Mother board decoded
elif row[0] == 'Chipset':
chipset = row[1] # Chipset decoded
elif row[0] == 'Graphics':
graphics = row[1] # Graphic card decoded
elif row[0] == 'Network':
network_card = row[1] # Network card decoded
elif row[0] == 'Screen Resolution' or row[0] == 'Monitor':
pass
else:
# Fill test results
self.add_test_results(row[0], [float(x) for x in row[1:]] )
self.update_resource_info(processor, memory, disk, operating_system, kernel, compiler, file_system,
system_layer, motherboard, chipset, graphics, network_card)
In [4]:
aws_m1large_variable = ComputeResourceTestResults('./benchmarks/aws/m1large-aws-variable-ubuntu/merge-2092.csv')
aws_m4xlarge_variable = ComputeResourceTestResults('./benchmarks/aws/m44xlarge-aws-variable-ubuntu/merge-7274.csv')
aws_m4large_magnetic = ComputeResourceTestResults('./benchmarks/aws/m4large-aws-magnetic-ubuntu/merge-7288.csv')
aws_m4large_sdd = ComputeResourceTestResults('./benchmarks/aws/m4large-aws-sdd-ubuntu/merge-6914.csv')
aws_t2medium_magnetic = ComputeResourceTestResults('./benchmarks/aws/t2medium-aws-magnetic-ubuntu/merge-9347.csv')
aws_t2medium_variable = ComputeResourceTestResults('./benchmarks/aws/t2medium-aws-variable-ubuntu/merge-9691.csv')
aws_t2small_magnetic = ComputeResourceTestResults('./benchmarks/aws/t2small-aws-magnetic-ubuntu/merge-8438.csv')
In [5]:
cecad_2xlarge_gp = ComputeResourceTestResults('./benchmarks/cecad/2xlarge-gp-ubuntu/merge-5274.csv')
cecad_2xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/2xlarge-hpc-ubuntu/merge-4929.csv')
cecad_3xlarge_gp = ComputeResourceTestResults('./benchmarks/cecad/3xlarge-gp-ubuntu/merge-6495.csv')
cecad_3xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/3xlarge-hpc-ubuntu/merge-6359.csv')
cecad_4xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/4xlarge-hpc-ubuntu/merge-7671.csv')
cecad_5xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/5xlarge-hpc-ubuntu/merge-5104.csv')
cecad_6xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/6xlarge-hpc-ubuntu/merge-4492.csv')
cecad_7xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/7xlarge-hpc-ubuntu/merge-1826.csv')
cecad_8xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/8xlarge-hpc-ubuntu/merge-3615.csv')
cecad_dl1606g = ComputeResourceTestResults('./benchmarks/cecad/dl1606g-ubuntu/merge-6157.csv')
cecad_large_gp = ComputeResourceTestResults('./benchmarks/cecad/large-gp-ubuntu/merge-9893.csv')
cecad_large_hpc = ComputeResourceTestResults('./benchmarks/cecad/large-hpc-ubuntu/merge-7675.csv')
cecad_large_nova = ComputeResourceTestResults('./benchmarks/cecad/large-ubuntu/merge-9724.csv')
cecad_medium_gp = ComputeResourceTestResults('./benchmarks/cecad/medium-gp-ubuntu/merge-5312.csv')
cecad_medium_hpc = ComputeResourceTestResults('./benchmarks/cecad/medium-hpc-ubuntu/merge-8901.csv')
cecad_medium_nova = ComputeResourceTestResults('./benchmarks/cecad/medium-ubuntu/merge-2260.csv')
cecad_r610 = ComputeResourceTestResults('./benchmarks/cecad/r610-ubuntu/merge-8348.csv')
cecad_r900 = ComputeResourceTestResults('./benchmarks/cecad/r900-debian/merge-4836.csv')
cecad_small_gp = ComputeResourceTestResults('./benchmarks/cecad/small-gp-ubuntu/merge-3500.csv')
cecad_small_hpc = ComputeResourceTestResults('./benchmarks/cecad/small-hpc-ubuntu/merge-1569.csv')
cecad_small_test = ComputeResourceTestResults('./benchmarks/cecad/small-test-ubuntu/merge-1472.csv')
cecad_small_nova = ComputeResourceTestResults('./benchmarks/cecad/small-ubuntu/merge-4467.csv')
cecad_xlarge_gp = ComputeResourceTestResults('./benchmarks/cecad/xlarge-gp-ubuntu/merge-1562.csv')
cecad_xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/xlarge-hpc-ubuntu/merge-3341.csv')
cecad_xsmall_gp = ComputeResourceTestResults('./benchmarks/cecad/xsmall-gp-ubuntu/merge-3733.csv')
cecad_xsmall_hpc = ComputeResourceTestResults('./benchmarks/cecad/xsmall-hpc-ubuntu/merge-2556.csv')
cecad_xsmall_nova = ComputeResourceTestResults('./benchmarks/cecad/xsmall-ubuntu/merge-1877.csv')
In [6]:
print(aws_m4large_sdd._info )
In [7]:
class TestInformation:
CPU_TEST = 0
MEMORY_TEST = 1
DISK_TEST = 2
categories = ("CPU","MEMORY","DISK")
def __init__(self, test_name, units, category = MEMORY_TEST, more_is_better=True):
self.test_name = test_name
self.units = units
self.category = category
self.more_is_better = more_is_better
def as_map(self):
map_repr = {}
map_repr['Test Name'] = self.test_name
map_repr['Units'] = self.units
map_repr['Category'] = self.categories[self.category]
map_repr['More Is Better'] = self.more_is_better
return map_repr
def __eq__(self,other):
return self.test_name == other.test_name and self.units == other.units
def __str__(self):
return str( self.as_map() )
In [8]:
test_info = TestInformation("SQLite", "MB/s", TestInformation.CPU_TEST, True)
test_info.as_map()
Out[8]:
In [9]:
class TestSuite:
def __init__(self, file = None):
self._tests = {}
if file is not None:
self.load_from_file(file)
def add_test(self, test_name, units, category=TestInformation.MEMORY_TEST, more_is_better=True ):
test_info = TestInformation(test_name, units, category, more_is_better)
self._tests[test_info.test_name] = test_info
def load_from_file(self, file ):
with open(file, newline='') as csvfile:
test_result_reader = csv.reader(csvfile, delimiter=',')
for row in test_result_reader:
test_name = row[0]
units = row[1]
category = int(row[2])
more_is_better = False
if( row[3] == "True" ):
more_is_better = True
self.add_test(test_name, units, category, more_is_better)
def get_test_info(self, test_name ):
return self._tests[test_name]
In [10]:
test_suite = TestSuite("./benchmarks/tests.csv")
print( test_suite._tests['7-Zip Compression - Compress Speed Test'] )
In [11]:
import scipy as sp
from sklearn.cluster import KMeans
class ComputeResourceTestTournament:
POINTS_PER_WIN = 3
POINTS_PER_TIE = 1
POINTS_PER_LOSE = 0
def __init__(self, test_name, more_is_better=True):
self._test_name = test_name
self._more_is_better = more_is_better
self._players = {}
self._km = None
self._cluster_results = {}
self._tournament = {}
self._results = {}
def add_player(self, player_name, player):
try:
test_result = player.get_test_results( self._test_name )
self._players[ player_name ] = test_result[0]
except TestNotFound as e:
self._players[ player_name ] = sp.NAN
def clusterize_results( self, number_clusters=10, init_mode='random', number_init=10, max_iterations=300 ):
self._km = KMeans(n_clusters=number_clusters, init=init_mode, n_init=number_init, max_iter=max_iterations, random_state=0)
# Sort players by value
sorted_players = [ (k,v) for v,k in sorted([(v,k) for k,v in self._players.items() ]) ]
try:
# Create a 2-dimensional array for doing K-Means
results = [[w,w] for w in sorted(list(self._players.values()))]
results_as_np = sp.array(results)
# Execute cluster algorithm
y_km = self._km.fit_predict( results_as_np )
# Create clustering results {(p_i,clusterid_i)}
for i,item in enumerate(y_km) :
self._cluster_results[ sorted_players[i][0] ] = y_km[i]
except ValueError as e: #Doesn't work if Nan values
results = []
for r in sorted(list(self._players.values())):
if r is not sp.NAN:
results.append( [r,r] )
results_as_np = sp.array(results)
y_km = self._km.fit_predict( results_as_np ) # Call clustering without Nan
# Create clustering results {(p_i,clusterid_i)}
assign_index = 0
for info in sorted_players:
player_name = info[0]
if self._players[ player_name ] is not sp.NAN:
self._cluster_results[ player_name ] = y_km[assign_index]
assign_index = assign_index + 1
else:
self._cluster_results[ player_name ] = sp.NAN
def play_tournament(self):
for key1 in self._players:
for key2 in self._players:
if key1 != key2:
cluster_player_1 = self._cluster_results[ key1 ]
cluster_player_2 = self._cluster_results[ key2 ]
if cluster_player_1 is sp.NAN: #Player doesn't compete in tournamet. Automatically loses
points = self.POINTS_PER_LOSE
else:
if cluster_player_2 is sp.NAN: #Player doesn't compete in tournamet. Automatically wins
points = self.POINTS_PER_WIN
else:
points = self.POINTS_PER_TIE
# Different cluster?
if cluster_player_1 != cluster_player_2:
if( self._km.cluster_centers_[cluster_player_1][0] > self._km.cluster_centers_[cluster_player_2][0] ):
if self._more_is_better:
points = self.POINTS_PER_WIN
else:
points = self.POINTS_PER_LOSE
else:
if self._more_is_better:
points = self.POINTS_PER_LOSE
else:
points = self.POINTS_PER_WIN
match_result = (key2, points)
if( key1 not in self._tournament):
self._tournament[key1] = []
self._tournament[key1].append( match_result )
def calculate_tournament_results(self ):
# results is a dictionary with the player as the key and a list [points,position] as the value
# Calculate points and positions for every player
# First, calculate points
for player in self._tournament:
points = 0
for (op,p) in self._tournament[player]:
points = points + p
self._results[player] = [points,0]
# Second, recalculate positions
unsorted_cluster_centers = []
for centers in self._km.cluster_centers_:
unsorted_cluster_centers.append(centers[0])
sorted_cluster_centers = sorted( unsorted_cluster_centers, reverse=(self._more_is_better) )
for i,cc in enumerate(sorted_cluster_centers):
# Get cluster index of this value
index = unsorted_cluster_centers.index(cc)
# Update positions
for (k,v) in self._cluster_results.items():
if index == v:
self._results[k][1] = i
if v is sp.NAN:
self._results[k][1] = len( unsorted_cluster_centers ) +1
def get_points(self, player ):
return self._results[player][0]
def get_position(self, player):
return self._results[player][1]
In [12]:
test = ComputeResourceTestTournament('Tachyon - Total Time',False)
test.add_player( 'aws_m1large_variable', aws_m1large_variable )
test.add_player( 'aws_m4xlarge_variable', aws_m4xlarge_variable )
test.add_player( 'aws_m4large_magnetic', aws_m4large_magnetic )
test.add_player( 'aws_m4large_sdd', aws_m4large_sdd )
test.add_player( 'aws_t2medium_magnetic', aws_t2medium_magnetic )
test.add_player( 'aws_t2medium_variable', aws_t2medium_variable )
test.add_player( 'aws_t2small_magnetic', aws_t2small_magnetic )
test.add_player( 'cecad_2xlarge_gp', cecad_2xlarge_gp )
test.add_player( 'cecad_2xlarge_hpc', cecad_2xlarge_hpc )
test.add_player( 'cecad_3xlarge_gp', cecad_3xlarge_gp )
test.add_player( 'cecad_3xlarge_hpc', cecad_3xlarge_hpc )
test.add_player( 'cecad_4xlarge_hpc', cecad_4xlarge_hpc )
test.add_player( 'cecad_5xlarge_hpc', cecad_5xlarge_hpc )
test.add_player( 'cecad_6xlarge_hpc', cecad_6xlarge_hpc )
test.add_player( 'cecad_7xlarge_hpc', cecad_7xlarge_hpc )
test.add_player( 'cecad_8xlarge_hpc', cecad_8xlarge_hpc )
test.add_player( 'cecad_dl1606g', cecad_dl1606g )
test.add_player( 'cecad_large_gp', cecad_large_gp )
test.add_player( 'cecad_large_hpc', cecad_large_hpc )
test.add_player( 'cecad_large_nova', cecad_large_nova )
test.add_player( 'cecad_medium_gp', cecad_medium_gp )
test.add_player( 'cecad_medium_hpc', cecad_medium_hpc )
test.add_player( 'cecad_medium_nova', cecad_medium_nova )
test.add_player( 'cecad_r610', cecad_r610 )
test.add_player( 'cecad_r900', cecad_r900 )
test.add_player( 'cecad_small_gp', cecad_small_gp )
test.add_player( 'cecad_small_hpc', cecad_small_hpc )
test.add_player( 'cecad_small_test', cecad_small_test )
test.add_player( 'cecad_small_nova', cecad_small_nova )
test.add_player( 'cecad_xlarge_gp', cecad_xlarge_gp )
test.add_player( 'cecad_xlarge_hpc', cecad_xlarge_hpc )
test.add_player( 'cecad_xsmall_gp', cecad_xsmall_gp )
test.add_player( 'cecad_xsmall_hpc', cecad_xsmall_hpc )
test.add_player( 'cecad_xsmall_nova', cecad_xsmall_nova )
test._players
Out[12]:
In [13]:
test.clusterize_results( 10, 'random', 10, 300 )
In [14]:
test._cluster_results
Out[14]:
In [15]:
test._km.cluster_centers_
Out[15]:
In [16]:
test.play_tournament()
In [17]:
test._tournament["aws_m1large_variable"]
Out[17]:
In [18]:
test._cluster_results.items()
Out[18]:
In [19]:
test.calculate_tournament_results()
points = test.get_points("aws_m1large_variable")
points
Out[19]:
In [20]:
points = test.get_points("cecad_dl1606g")
points
Out[20]:
In [21]:
points = test.get_points("cecad_r610")
points
Out[21]:
In [22]:
test._results
Out[22]:
In [23]:
sorted_players = [ (k,v) for v,k in sorted([(v,k) for k,v in test._players.items() ]) ]
sorted_players
Out[23]:
In [24]:
import scipy as sp
test_results = sp.array( sorted(list(test._players.values()) ) )
test_results
Out[24]:
In [25]:
import matplotlib.pyplot as plt
#plt.scatter([w for w in range( len( stream_copy._players ) ) ],stream_copy_results,s=10)
plt.scatter(test_results, test_results,s=20)
plt.title(test._test_name)
plt.xlabel("MB/s")
plt.ylabel("MB/s")
plt.autoscale(tight=True)
plt.grid(True, linestyle='-', color='0.75')
plt.show()
In [26]:
from sklearn.cluster import KMeans
km = KMeans(n_clusters=10, init='random', n_init=10, max_iter=300, random_state=0)
In [27]:
results = [[w,w] for w in sorted(list(test._players.values()))]
results
Out[27]:
In [28]:
data_to_cluster = sp.array(results)
data_to_cluster
Out[28]:
In [29]:
y_km = km.fit_predict(data_to_cluster)
y_km
In [30]:
km.labels_
In [31]:
km.cluster_centers_
In [32]:
len(y_km)
In [33]:
clustering_results = {}
for i,item in enumerate(y_km) :
clustering_results[ sorted_players[i][0] ] = y_km[i]
clustering_results
In [34]:
test._players.keys()
Out[34]:
In [124]:
import csv
class EquivalenceTournament:
def __init__(self):
self._competitions = []
self._players = {}
self._tournament = {}
self._results = {}
def add_player(self, player_name, new_player):
self._players[player_name] = new_player
def add_competition(self, competition):
self._competitions.append( competition )
def add_competitions(self, competitions):
self._competitions.extend( competitions )
def add_test_suite(self, file):
test_suite = TestSuite(file)
for key,test_info in test_suite._tests.items():
self.add_competition(test_info)
def load_players_from_file(self, file):
with open(file, newline='') as csvfile:
player_result_reader = csv.reader(csvfile, delimiter=',')
for row in player_result_reader:
player_name = row[0]
player_info_path = row[1]
# Create info
new_player = ComputeResourceTestResults(player_info_path)
self.add_player(player_name, new_player)
def play_tournament_at_competition(self, players, competition):
pass
def play_tournament_at_competitions(self, players, competitions):
pass
def play_tournament_at_all_competitions(self, players ):
pass
def play_complete_tournament(self):
for test_info in self._competitions:
test = ComputeResourceTestTournament(test_info.test_name, test_info.more_is_better )
for k,v in self._players.items():
test.add_player(k,v)
test.clusterize_results( 10, 'random', 10, 300 )
test.play_tournament()
test.calculate_tournament_results()
self._tournament[test_info.test_name] = test
def calculate_tournament_results(self):
# Calculate total points of player
for player in self._players:
points = 0
for result in self._tournament.values():
points = points + result.get_points( player )
self._results[player] = [points,0]
# Second, recalculate positions
unsorted_points = [ r[0] for r in self._results.values() ]
sorted_points = sorted( unsorted_points, reverse=True )
for i,position in enumerate(sorted_points):
# Update positions
for (k,v) in self._results.items():
if position == v[0]:
self._results[k][1] = i
def get_tournament_positions(self):
pass
def get_tournament_results(self):
pass
def export_tournament_to_csv_file(self, file):
with open(file,'w', newline='') as csvfile:
writer = csv.writer(csvfile,delimiter=',')
#Write headers
writer.writerow( [""] + [ w.test_name for w in self._competitions] )
for player_name, test_results in self._players.items():
row_info = []
row_info.append( player_name )
for test_name in [ w.test_name for w in self._competitions ]:
try:
result = test_results.get_test_results(test_name)[0]
except TestNotFound as not_found:
result = ""
row_info.append(result)
writer.writerow( row_info )
In [125]:
equivalence_tournament = EquivalenceTournament()
equivalence_tournament.add_test_suite("./benchmarks/tests.csv")
equivalence_tournament.load_players_from_file("./benchmarks/players.csv")
In [126]:
equivalence_tournament._players
Out[126]:
In [127]:
len( equivalence_tournament._players )
Out[127]:
In [128]:
len( equivalence_tournament._competitions )
Out[128]:
In [129]:
equivalence_tournament.play_complete_tournament()
In [130]:
equivalence_tournament._tournament
Out[130]:
In [131]:
equivalence_tournament._tournament["7-Zip Compression - Compress Speed Test"]._results
Out[131]:
In [132]:
equivalence_tournament.calculate_tournament_results()
equivalence_tournament._results
Out[132]:
In [133]:
equivalence_tournament.export_tournament_to_csv_file( "tournament.csv")
In [ ]:
In [ ]: