In [1]:
%pylab inline
First, using the webpage pdftables the PDF tables are translate to Excel format.
In [2]:
import pandas as pd
In [3]:
table_alg =pd.ExcelFile("results_cec2015.pdf.xlsx")
Then, we read the Sheet right (the 2 in the example)
In [4]:
print(table_alg.sheet_names)
df=table_alg.parse(table_alg.sheet_names[1])
In [5]:
print(df)
I need a function that get the right position of the Data Frame, considering the function number and the accuracy level.
In [6]:
def get_best_pos(function, accuracy_level=0):
"""
This function get the final position from the function and the accurary_level required (0 = 1.2e5, 1=6e5, 2=3e6)
Keyword arguments:
function -- function.
accuracy_level -- level of accuracy (0 to 2)
"""
f = function - 1
r = (f % 5)
c = (f / 5)*16+accuracy_level*5
return r+1, c
In [7]:
for f in range(1, 16):
print(f, get_best_pos(f))
In [8]:
def parse_table_orig(df):
accuracies = ['1.2e5', '6e5', '3e6']
best = pd.DataFrame(columns=accuracies)
for acc_index, acc in enumerate(accuracies):
val = []
for f in range(1,16):
r, c = get_best_pos(f, acc_index)
val.append(df['f {}'.format(r)][c])
best[acc] = val
best.index = ['f{:02d}'.format(i+1) for i in range(15)]
return best
In [9]:
df=table_alg.parse(table_alg.sheet_names[1])
In [10]:
df = {}
for alg in table_alg.sheet_names:
df[alg]=table_alg.parse(alg)
# if "_orig" in alg:
# df[alg] = parse_table_orig(df[alg])
In [11]:
def calculate_points(fun, dfs, algs, acc=0):
"""
Returns the ranking in positions for the function and algorithm desired.
Keyword parameters:
- fun -- function to compare.
- dfs -- hash with the dataframes for algorithm.
- algs -- algorithms to compare (must be into df).
- acc -- accuracy level (from 0 to 2).
"""
values = pd.DataFrame(columns=algs)
for alg in algs:
df = dfs[alg]
values[alg] = [df[acc][fun-1]]
ranks = values.rank(1, method='min')
return np.array(ranks, dtype=np.int).reshape(len(algs))
The following function return the scores for each position following the Formula 1 criterion.
In [12]:
def get_f1_score(num_algs):
"""
Return a np.array with the scoring criterio by position from the Formula 1, in which
the first 10 items have scores.
The array have num_algs positions.
- If num_algs is lower than 10, it is shorten.
- If num_algs is greater than 10, it is increased with 0s.
"""
f1 = np.array([25, 18, 15, 12, 10, 8, 6, 4, 2, 1])
if len(algs) < len(f1):
f1 = f1[:len(algs)]
else:
f1 += np.zeros(len(algs))
return f1
Finally, a function get_scores combine the previous two functions.
In [13]:
def get_scores(df, algs, funs, accuracies):
"""
This function returns the scores for the algs 'algs', functions 'funs' and accuracies 'accuracies'.
Keyword parameters:
df -- dataframe with the data.
algs -- algorithms to compare (must be included in df).
funs -- functions to compare.
accuracies -- accuracy levels to compare (in string).
"""
size = len(algs)
f1 = get_f1_score(size)
result = np.zeros(size)
for acc in accuracies:
for i in funs:
result += f1[calculate_points(i, df, algs, acc)-1]
results_alg = {alg: res for alg, res in zip(algs, result)}
return results_alg
Init the initial parameters.
In [14]:
algs = filter(lambda x: not "_orig" in x, df.keys())
algs = filter(lambda x: not "_old" in x, algs)
algs = [alg for alg in algs]
print(algs)
#algs = ['IHDELS', 'MOS']
accuracies = ['1.2e5', '6e5', '3e6']
funs_group = [range(1, 4), range(4, 8), range(8, 12), range(12, 15), [15]]
funs_group_names = ['Fully Separable', 'Partially Separable I', 'Partially Separable II', 'Overlapping', 'Non-separable']
Now, we prepare the library.
In [15]:
from matplotlib import pyplot as plt
import seaborn as sns
# Increase font
sns.set(font_scale=1.5)
# Put white grid
sns.set_style("whitegrid")
In [16]:
for fid, funs in enumerate(funs_group):
title = funs_group_names[fid]
results = get_scores(df, algs, funs, accuracies)
results_df = pd.Series(results)
plt.figure()
results_df.plot(kind='bar', title=title)
locs, labels = plt.xticks()
plt.setp(labels, rotation=90)
Now, we are going to show for group for the Excel file.
In [17]:
funs_categories = dict(zip(funs_group_names, funs_group))
excel = pd.ExcelWriter("results.xls")
In [18]:
def print_results(df, algs, title, funs, accuracies, style='b'):
results = get_scores(df, algs, funs, accuracies)
results_df = pd.Series(results)
plt.figure()
pd.DataFrame(results_df, columns=['Results']).to_excel(excel, title)
results_df.plot(kind='bar', title=title, color=style)
locs, labels = plt.xticks()
plt.setp(labels, rotation=90)
In [19]:
fig_names = ['cat1', 'cat2', 'cat3', 'cat4', 'cat5']
styles = ['blue', 'orange', 'yellow', 'green', 'brown']
for id, title in enumerate(funs_group_names):
fig_name = fig_names[id]
print_results(df, algs, title, funs_categories[title], accuracies, styles[id])
plt.savefig(fig_name, bbox_inches='tight')
In [20]:
def results_by_accuracy(acc):
styles = ['blue', 'orange', 'yellow', 'green', 'brown']
funs = range(15)
results_cat = pd.DataFrame(columns=funs_group_names)
for id, fun in enumerate(funs_group_names):
fig_name = 'fe{}'.format(acc)
results = get_scores(df, algs, funs_categories[fun], [acc])
results_cat[fun] = pd.Series(results)
title = 'Results after {} Fitness Evaluations'.format(acc)
results_cat.plot(kind='bar', title=title, stacked=True)
results_cat.to_excel(excel, acc)
In [21]:
for acc in accuracies:
plt.figure()
results_by_accuracy(acc)
fname = '{}.png'.format(acc.replace('.',''))
lgd = plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
locs, labels = plt.xticks()
plt.setp(labels, rotation=90)
plt.savefig(fname, bbox_extra_artists=(lgd,), bbox_inches='tight')
In [22]:
def results_by_all():
styles = ['blue', 'orange', 'yellow', 'green', 'brown']
funs = range(15)
results_cat = pd.DataFrame(columns=funs_group_names)
for id, fun in enumerate(funs_group_names):
fig_name = 'fall'
results = get_scores(df, algs, funs_categories[fun], accuracies)
results_cat[fun] = pd.Series(results)
title = 'Overall score'
results_cat.plot(kind='bar', title=title, stacked=True)
results_cat.to_excel(excel, 'all')
In [23]:
plt.figure()
results_by_all()
fname = 'all.png'
lgd = plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
locs, labels = plt.xticks()
plt.setp(labels, rotation=90)
plt.savefig(fname, bbox_extra_artists=(lgd,), bbox_inches='tight')
In [24]:
excel.save()