In [1]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

Updating final reports for CEC'2015

First, using the webpage pdftables the PDF tables are translate to Excel format.

  • First, we have put all results in a Excel file.
  • Then, we are going to use the pandas library to read the data.

In [2]:
import pandas as pd

In [3]:
table_alg =pd.ExcelFile("results_cec2015.pdf.xlsx")

Then, we read the Sheet right (the 2 in the example)


In [4]:
print(table_alg.sheet_names)
df=table_alg.parse(table_alg.sheet_names[1])


['DECC-G', 'DEEPSO', 'DEEPSO_orig', 'SACC', 'MOS', 'VMODE', 'IHDELS', 'IHDELS_orig', 'CC-CMA-ES', 'CC-CMA-ES_orig']

In [5]:
print(df)


            1.2e5           6e5           3e6
f01  2.990000e+10  4.610000e+09  1.440000e+08
f02  3.450000e+04  2.610000e+04  1.490000e+04
f03  2.120000e+01  2.090000e+01  2.040000e+01
f04  4.270000e+11  4.950000e+10  4.770000e+09
f05  1.560000e+07  1.460000e+07  1.450000e+07
f06  1.050000e+06  1.040000e+06  1.020000e+06
f07  3.500000e+09  2.400000e+08  1.540000e+07
f08  8.330000e+14  1.680000e+14  5.420000e+12
f09  1.030000e+09  9.290000e+09  9.170000e+08
f10  9.410000e+07  9.220000e+07  9.070000e+07
f11  7.520000e+11  8.030000e+09  5.600000e+08
f12  7.800000e+11  1.550000e+11  1.540000e+10
f13  1.820000e+11  1.290000e+10  8.750000e+08
f14  2.180000e+12  1.630000e+11  4.330000e+08
f15  6.600000e+12  3.000000e+07  7.040000e+06

I need a function that get the right position of the Data Frame, considering the function number and the accuracy level.


In [6]:
def get_best_pos(function, accuracy_level=0):
    """
    This function get the final position from the function and the accurary_level required (0 = 1.2e5, 1=6e5, 2=3e6)
    
    Keyword arguments:
    function -- function.
    accuracy_level -- level of accuracy (0 to 2)
    """
    f = function - 1
    r = (f % 5)
    c = (f / 5)*16+accuracy_level*5
    return r+1, c

In [7]:
for f in range(1, 16):
    print(f, get_best_pos(f))


1 (1, 0.0)
2 (2, 3.2)
3 (3, 6.4)
4 (4, 9.6)
5 (5, 12.8)
6 (1, 16.0)
7 (2, 19.2)
8 (3, 22.4)
9 (4, 25.6)
10 (5, 28.8)
11 (1, 32.0)
12 (2, 35.2)
13 (3, 38.4)
14 (4, 41.6)
15 (5, 44.8)

In [8]:
def parse_table_orig(df):
    accuracies = ['1.2e5', '6e5', '3e6']
    best = pd.DataFrame(columns=accuracies)

    for acc_index, acc in enumerate(accuracies):
        val = []
        for f in range(1,16):
            r, c = get_best_pos(f, acc_index)
            val.append(df['f {}'.format(r)][c])
        best[acc] = val
    best.index = ['f{:02d}'.format(i+1) for i in range(15)]
    return best

In [9]:
df=table_alg.parse(table_alg.sheet_names[1])

Get all the data for comparisons


In [10]:
df = {}

for alg in table_alg.sheet_names:
    df[alg]=table_alg.parse(alg)
    
#    if "_orig" in alg:
#        df[alg] = parse_table_orig(df[alg])

Get function that calculate the points for function


In [11]:
def calculate_points(fun, dfs, algs, acc=0):
    """
    Returns the ranking in positions for the function and algorithm desired. 
    
    Keyword parameters: 
    - fun -- function to compare.
    - dfs -- hash with the dataframes for algorithm.
    - algs -- algorithms to compare (must be into df). 
    - acc -- accuracy level (from 0 to 2).
    """
    values = pd.DataFrame(columns=algs)
    
    for alg in algs:
        df = dfs[alg]
        values[alg] = [df[acc][fun-1]]

    ranks = values.rank(1, method='min')
    return np.array(ranks, dtype=np.int).reshape(len(algs))

The following function return the scores for each position following the Formula 1 criterion.


In [12]:
def get_f1_score(num_algs):
    """
    Return a np.array with the scoring criterio by position from the Formula 1, in which
    the first 10 items have scores. 
    The array have num_algs positions. 
    
    - If num_algs is lower than 10, it is shorten. 
    - If num_algs is greater than 10, it is increased with 0s. 
    """
    f1 = np.array([25, 18, 15, 12, 10, 8, 6, 4, 2, 1])
    
    if len(algs) < len(f1):
        f1 = f1[:len(algs)]
    else:
        f1 += np.zeros(len(algs))

    return f1

Finally, a function get_scores combine the previous two functions.


In [13]:
def get_scores(df, algs, funs, accuracies):
    """
    This function returns the scores for the algs 'algs', functions 'funs' and accuracies 'accuracies'.
    
    Keyword parameters:
    df -- dataframe with the data.
    algs -- algorithms to compare (must be included in df). 
    funs -- functions to compare.
    accuracies -- accuracy levels to compare (in string).
    """
    size = len(algs)
    f1 = get_f1_score(size)
    result = np.zeros(size)

    for acc in accuracies:
        for i in funs:
            result += f1[calculate_points(i, df, algs, acc)-1]
    
    results_alg = {alg: res for alg, res in zip(algs, result)}
    return results_alg

Putting all together

Init the initial parameters.


In [14]:
algs = filter(lambda x: not "_orig" in x, df.keys())

algs = filter(lambda x: not "_old" in x, algs)
algs = [alg for alg in algs]
print(algs)
#algs = ['IHDELS', 'MOS']
accuracies = ['1.2e5', '6e5', '3e6']
funs_group = [range(1, 4), range(4, 8), range(8, 12), range(12, 15), [15]]
funs_group_names = ['Fully Separable', 'Partially Separable I', 'Partially Separable II', 'Overlapping', 'Non-separable']


['DECC-G', 'MOS', 'VMODE', 'SACC', 'IHDELS', 'CC-CMA-ES', 'DEEPSO']

Now, we prepare the library.


In [15]:
from matplotlib import pyplot as plt
import seaborn as sns
# Increase font
sns.set(font_scale=1.5)
# Put white grid
sns.set_style("whitegrid")

First, Comparing by group of variables

It is very simple


In [16]:
for fid, funs in enumerate(funs_group):
    title = funs_group_names[fid]
    results = get_scores(df, algs, funs, accuracies)
    results_df = pd.Series(results)
    plt.figure()
    results_df.plot(kind='bar', title=title)
    locs, labels = plt.xticks()
    plt.setp(labels, rotation=90)


Now, we are going to show for group for the Excel file.


In [17]:
funs_categories = dict(zip(funs_group_names, funs_group))
excel = pd.ExcelWriter("results.xls")

In [18]:
def print_results(df, algs, title, funs, accuracies, style='b'):
    results = get_scores(df, algs, funs, accuracies)
    results_df = pd.Series(results)
    plt.figure()
    pd.DataFrame(results_df, columns=['Results']).to_excel(excel, title)
    results_df.plot(kind='bar', title=title, color=style)
    locs, labels = plt.xticks()
    plt.setp(labels, rotation=90)

By Groups


In [19]:
fig_names = ['cat1', 'cat2', 'cat3', 'cat4', 'cat5']
styles = ['blue', 'orange', 'yellow', 'green', 'brown']

for id, title in enumerate(funs_group_names):
    fig_name = fig_names[id]
    print_results(df, algs, title, funs_categories[title], accuracies, styles[id])
    plt.savefig(fig_name, bbox_inches='tight')


By Category


In [20]:
def results_by_accuracy(acc):
    styles = ['blue', 'orange', 'yellow', 'green', 'brown']
    funs = range(15)
    results_cat = pd.DataFrame(columns=funs_group_names)

    for id, fun in enumerate(funs_group_names):
        fig_name = 'fe{}'.format(acc)
        results = get_scores(df, algs, funs_categories[fun], [acc])
        results_cat[fun] = pd.Series(results)
 
    title = 'Results after {} Fitness Evaluations'.format(acc)
    results_cat.plot(kind='bar', title=title, stacked=True)
    results_cat.to_excel(excel, acc)

In [21]:
for acc in accuracies:
    plt.figure()
    results_by_accuracy(acc)
    fname = '{}.png'.format(acc.replace('.',''))
    lgd = plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
    locs, labels = plt.xticks()
    plt.setp(labels, rotation=90)
    plt.savefig(fname, bbox_extra_artists=(lgd,), bbox_inches='tight')


<matplotlib.figure.Figure at 0x7f23acdebef0>
<matplotlib.figure.Figure at 0x7f23acf08e80>
<matplotlib.figure.Figure at 0x7f23ad0400b8>

In [22]:
def results_by_all():
    styles = ['blue', 'orange', 'yellow', 'green', 'brown']
    funs = range(15)
    results_cat = pd.DataFrame(columns=funs_group_names)

    for id, fun in enumerate(funs_group_names):
        fig_name = 'fall'
        results = get_scores(df, algs, funs_categories[fun], accuracies)
        results_cat[fun] = pd.Series(results)
 
    title = 'Overall score'
    results_cat.plot(kind='bar', title=title, stacked=True)
    results_cat.to_excel(excel, 'all')

In [23]:
plt.figure()
results_by_all()
fname = 'all.png'
lgd = plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
locs, labels = plt.xticks()
plt.setp(labels, rotation=90)
plt.savefig(fname, bbox_extra_artists=(lgd,), bbox_inches='tight')


<matplotlib.figure.Figure at 0x7f23acb14e48>

In [24]:
excel.save()