In [1]:
    
from ltlcross_runner import LtlcrossRunner
from IPython.display import display
import pandas as pd
import spot
import sys
import re
spot.setup(show_default='.a')
pd.options.display.float_format = '{: .0f}'.format
pd.options.display.latex.multicolumn_format = 'c'
from tables_utils import split_cols, high_min, high_max, highlight_by_level
from tables_utils import fix_latex, fix_type, fix_tool
from tables_utils import cummulative_to_latex
    
With rerun = True, all experiments are executed again (takes several hours). With False, the data are taken from the *.csv files:
In [2]:
    
rerun = False
    
In [3]:
    
%%bash
ltl3ba -v
ltl3tela -v
ltl2tgba --version
delag --version
ltl2dgra --version # Rabinizer 4
    
    
The generate function may be used to generate random formulae. Uncomment the function call below to generate different set of formulae.
In [4]:
    
def generate(n=1000,func=(lambda x: True),filename=None,priorities='',ap=['a','b','c','d','e']):
    if filename is None:
        file_h = sys.stdout
    else:
        file_h = open(filename,'w')
    f = spot.randltl(ap,
                     ltl_priorities=priorities,
                     simplify=3,tree_size=15).relabel_bse(spot.Abc)
    i = 0
    printed = set()
    while(i < n):
        form = next(f)
        if form in printed:
            continue
        if func(form) and not form.is_tt() and not form.is_ff():
            print(form,file=file_h)
            printed.add(form)
            i += 1
    
In [5]:
    
f_rand = 'formulae/atva19/rand.ltl'
f_patterns = 'formulae/atva19/patterns.ltl'
# generate(1000, filename = f_rand)
    
We compare output of ltl3tela -D1, ltl2tgba -DG (this setting guarantees that the output will be deterministic), Delag and Rabinizer 4.
In [6]:
    
d_tools = {
    "ltl3tela-D1": "ltl3tela -D1 -f %f > %O",
    "ltl2tgba-DG": "ltl2tgba -DG %f > %O",
    "delag": "delag %f > %O",
    "rabinizer4": "ltl2dgra %f > %O"
}
d_order = ["ltl3tela-D1", "ltl2tgba-DG", "delag", "rabinizer4"]
d_cols = ["states", "edges", "acc"]
    
In [7]:
    
d_csv_rand = 'formulae/atva19/det.rand.csv'
d_data_rand = LtlcrossRunner(d_tools, formula_files = [f_rand], res_filename = d_csv_rand, cols = d_cols)
if rerun:
    d_data_rand.run_ltlcross(automata = False, timeout = '60')
d_data_rand.parse_results()
    
The sum of states, edges and acceptance sets over the set of 1000 random formulae:
In [8]:
    
det_rand = d_data_rand.cummulative(col = d_cols).unstack(level = 0).loc[d_order, d_cols]
det_rand
    
    Out[8]:
In [9]:
    
d_csv_patterns = 'formulae/atva19/det.patterns.csv'
d_data_patterns = LtlcrossRunner(d_tools, formula_files = [f_patterns], res_filename = d_csv_patterns, cols = d_cols)
if rerun:
    d_data_patterns.run_ltlcross(automata = False, timeout = '60')
d_data_patterns.parse_results()
    
The sum of states, edges and acceptance sets over the set of patterns and literature formulae, including timeouts (TO) and parse errors (PE). The latter means that ltlcross was unable to parse the produced automaton due to excessive number of acceptance sets.
In [10]:
    
det_to = pd.DataFrame(d_data_patterns.get_error_count(),columns=['TO.literature'])
det_err = pd.DataFrame(d_data_patterns.get_error_count('parse error',False),columns=['PE.literature'])
det_lit = d_data_patterns.cummulative(col = d_cols).unstack(level = 0).loc[d_order, d_cols]
det_lit = pd.concat([det_lit,det_to,det_err],axis=1,join='inner',sort=False)
det_lit
    
    Out[10]:
In [11]:
    
to = d_data_rand.exit_status
to[to != "ok"].dropna(how='all')
    
    Out[11]:
Comparison of ltl3tela and ltl2tgba on patterns formulae where the produced automata have different numbers of states:
In [12]:
    
d_data_patterns.smaller_than('ltl3tela-D1', 'ltl2tgba-DG')
    
    Out[12]:
In [13]:
    
d_data_patterns.smaller_than('ltl2tgba-DG', 'ltl3tela-D1')
    
    Out[13]:
Merge the tables to get the TeX output later:
In [15]:
    
det_tmp = pd.merge(det_rand, det_lit, suffixes=('.random','.literature'),on='tool')
det_tmp
    
    Out[15]:
In [16]:
    
det = split_cols(det_tmp,'.').swaplevel(axis=1)
det
    
    Out[16]:
Since ltl3ba sometimes produces slighlty incorrect HOAF, be more tolerant about this (otherwise ltlcross refuses to parse its output):
In [16]:
    
import os
os.environ['SPOT_HOA_TOLERANT']='TRUE'
    
The following computations work the same way as for deterministic automata.
In [17]:
    
n_tools = {
    "ltl3tela": "ltl3tela -f %f > %O",
    "ltl2tgba": "ltl2tgba %f > %O",
    "ltl2tgba-G": "ltl2tgba -G %f > %O",
    "ltl3ba": "ltldo 'ltl3ba -H2' -f %f > %O",
}
n_order = ["ltl3tela", "ltl2tgba-G", "ltl2tgba", "ltl3ba"]
n_cols = ["states", "edges", "acc"]
    
In [18]:
    
n_csv_rand = 'formulae/atva19/nondet.rand.csv'
n_data_rand = LtlcrossRunner(n_tools, formula_files = [f_rand], res_filename = n_csv_rand, cols = n_cols)
if rerun:
    n_data_rand.run_ltlcross(automata = False, timeout = '60')
n_data_rand.parse_results()
    
In [19]:
    
nd_rand = n_data_rand.cummulative(col = n_cols).unstack(level = 0).loc[n_order, n_cols]
nd_rand
    
    Out[19]:
In [20]:
    
n_csv_patterns = 'formulae/atva19/nondet.patterns.csv'
    
In [21]:
    
n_data_patterns = LtlcrossRunner(n_tools, formula_files = [f_patterns], res_filename = n_csv_patterns, cols = n_cols)
if rerun:
    n_data_patterns.run_ltlcross(automata = False, timeout = '60')
n_data_patterns.parse_results()
    
In [22]:
    
nd_to = pd.DataFrame(n_data_patterns.get_error_count(),columns=['TO.literature'])
nd_err = pd.DataFrame(n_data_patterns.get_error_count('parse error',False),columns=['PE.literature'])
nd_lit = n_data_patterns.cummulative(col = n_cols).unstack(level = 0).loc[n_order, n_cols]
nd_lit = pd.concat([nd_lit,nd_to,nd_err],axis=1,join='inner',sort=False)
nd_lit
    
    Out[22]:
In [23]:
    
n_data_patterns.smaller_than('ltl3tela', 'ltl2tgba-G')
    
    Out[23]:
In [24]:
    
n_data_patterns.smaller_than('ltl2tgba-G', 'ltl3tela')
    
    Out[24]:
In [25]:
    
nd_tmp = pd.merge(nd_rand, nd_lit, suffixes=('.random','.literature'),on='tool')
nd_tmp
    
    Out[25]:
In [26]:
    
nd = split_cols(nd_tmp,'.').swaplevel(axis=1)
nd
    
    Out[26]:
In [27]:
    
n_data_patterns.get_error_count()
    
    Out[27]:
In [28]:
    
n_data_rand.get_error_count()
    
    Out[28]:
In [29]:
    
det
    
    Out[29]:
In [30]:
    
#Merge det & nondet
merged = pd.concat([det,nd],keys=["deterministic","nondeterministic"],join='outer',sort=False)
merged
    
    Out[30]:
In [31]:
    
filename = 'colored_res.tex'
merged_high = highlight_by_level(merged, high_min)
cummulative_to_latex(merged_high, filename)
fix_latex(merged_high, filename)
    
In [32]:
    
d_lit_c = len(d_data_patterns.values.dropna())
n_lit_c = len(n_data_patterns.values.dropna())
print('Number of formulas without errors:\n' +
      '   det: {}\nnondet: {}'.format(d_lit_c, n_lit_c))
    
    
In the following tables, the value in (row, col) is the number of cases where tool row delivers a better automaton than tool col. Better means strictly smaller in the lexicographic order on (#states, #acc marks, #edges). The last column in each row sums the number of such victories of the corresponding tool row.
In [55]:
    
d_data_patterns.cross_compare(include_fails=False,props=['states','acc','edges'])
    
    Out[55]:
In [56]:
    
d_data_rand.cross_compare(include_fails=False,props=['states','acc','edges'])
    
    Out[56]:
In [57]:
    
n_data_patterns.cross_compare(include_fails=False,props=['states','acc','edges'])
    
    Out[57]:
In [58]:
    
n_data_rand.cross_compare(include_fails=False,props=['states','acc','edges'])
    
    Out[58]:
The following tables contain results on formulae excluded from the evaluation due to error (timeout or parse error) in some translation. In case of successful translation, number of states is shown in the table.
The first and second table contain data for deterministic and nondeterministic translators, respectively.
In [33]:
    
d_fails = d_data_patterns.values[d_data_patterns.values.isnull().any(axis = 1)]['states']\
    .join(d_data_patterns.exit_status, lsuffix = '.states', rsuffix = '.response')
for tool in d_order:
    d_fails[tool] = d_fails[tool + '.states'].combine_first(d_fails[tool + '.response'])
d_fails_out = d_fails[d_order]
d_fails_out
    
    Out[33]:
In [35]:
    
n_fails = n_data_patterns.values[n_data_patterns.values.isnull().any(axis = 1)]['states']\
    .join(n_data_patterns.exit_status, lsuffix = '.states', rsuffix = '.response')
for tool in n_order:
    n_fails[tool] = n_fails[tool + '.states'].combine_first(n_fails[tool + '.response'])
n_fails_out = n_fails[n_order]
n_fails_out
    
    Out[35]:
Finally, we are interested in formulae where the difference between best and worst translator (in terms of states count) is larger than some threshold. Both tables compare deterministic translators over patterns (with threshold 20) and random formulae (threshold 10). Such statistics can be produced by calling large_diff(dataset, list_of_interesting_tools, threshold).
In [46]:
    
def large_diff(res, tools, threshold):
    df = res.values.dropna()['states']
    df['diff'] = df.loc[:, tools].max(axis = 1) - df.loc[:, tools].min(axis = 1)
    return df[df['diff'] > threshold][tools]
    
In [52]:
    
large_diff(d_data_patterns, d_tools, 20)
    
    Out[52]:
In [54]:
    
large_diff(d_data_rand, d_tools, 10)
    
    Out[54]: