In [1]:
from ltlcross_runner import LtlcrossRunner
from IPython.display import display
import pandas as pd
import spot
import sys
import re
spot.setup(show_default='.a')
pd.options.display.float_format = '{: .0f}'.format
pd.options.display.latex.multicolumn_format = 'c'
from tables_utils import split_cols, high_min, high_max, highlight_by_level
from tables_utils import fix_latex, fix_type, fix_tool
from tables_utils import cummulative_to_latex
With rerun = True
, all experiments are executed again (takes several hours). With False
, the data are taken from the *.csv files:
In [2]:
rerun = False
In [3]:
%%bash
ltl3ba -v
ltl3tela -v
ltl2tgba --version
delag --version
ltl2dgra --version # Rabinizer 4
The generate
function may be used to generate random formulae. Uncomment the function call below to generate different set of formulae.
In [4]:
def generate(n=1000,func=(lambda x: True),filename=None,priorities='',ap=['a','b','c','d','e']):
if filename is None:
file_h = sys.stdout
else:
file_h = open(filename,'w')
f = spot.randltl(ap,
ltl_priorities=priorities,
simplify=3,tree_size=15).relabel_bse(spot.Abc)
i = 0
printed = set()
while(i < n):
form = next(f)
if form in printed:
continue
if func(form) and not form.is_tt() and not form.is_ff():
print(form,file=file_h)
printed.add(form)
i += 1
In [5]:
f_rand = 'formulae/atva19/rand.ltl'
f_patterns = 'formulae/atva19/patterns.ltl'
# generate(1000, filename = f_rand)
We compare output of ltl3tela -D1
, ltl2tgba -DG
(this setting guarantees that the output will be deterministic), Delag and Rabinizer 4.
In [6]:
d_tools = {
"ltl3tela-D1": "ltl3tela -D1 -f %f > %O",
"ltl2tgba-DG": "ltl2tgba -DG %f > %O",
"delag": "delag %f > %O",
"rabinizer4": "ltl2dgra %f > %O"
}
d_order = ["ltl3tela-D1", "ltl2tgba-DG", "delag", "rabinizer4"]
d_cols = ["states", "edges", "acc"]
In [7]:
d_csv_rand = 'formulae/atva19/det.rand.csv'
d_data_rand = LtlcrossRunner(d_tools, formula_files = [f_rand], res_filename = d_csv_rand, cols = d_cols)
if rerun:
d_data_rand.run_ltlcross(automata = False, timeout = '60')
d_data_rand.parse_results()
The sum of states, edges and acceptance sets over the set of 1000 random formulae:
In [8]:
det_rand = d_data_rand.cummulative(col = d_cols).unstack(level = 0).loc[d_order, d_cols]
det_rand
Out[8]:
In [9]:
d_csv_patterns = 'formulae/atva19/det.patterns.csv'
d_data_patterns = LtlcrossRunner(d_tools, formula_files = [f_patterns], res_filename = d_csv_patterns, cols = d_cols)
if rerun:
d_data_patterns.run_ltlcross(automata = False, timeout = '60')
d_data_patterns.parse_results()
The sum of states, edges and acceptance sets over the set of patterns and literature formulae, including timeouts (TO) and parse errors (PE). The latter means that ltlcross
was unable to parse the produced automaton due to excessive number of acceptance sets.
In [10]:
det_to = pd.DataFrame(d_data_patterns.get_error_count(),columns=['TO.literature'])
det_err = pd.DataFrame(d_data_patterns.get_error_count('parse error',False),columns=['PE.literature'])
det_lit = d_data_patterns.cummulative(col = d_cols).unstack(level = 0).loc[d_order, d_cols]
det_lit = pd.concat([det_lit,det_to,det_err],axis=1,join='inner',sort=False)
det_lit
Out[10]:
In [11]:
to = d_data_rand.exit_status
to[to != "ok"].dropna(how='all')
Out[11]:
Comparison of ltl3tela
and ltl2tgba
on patterns formulae where the produced automata have different numbers of states:
In [12]:
d_data_patterns.smaller_than('ltl3tela-D1', 'ltl2tgba-DG')
Out[12]:
In [13]:
d_data_patterns.smaller_than('ltl2tgba-DG', 'ltl3tela-D1')
Out[13]:
Merge the tables to get the TeX output later:
In [15]:
det_tmp = pd.merge(det_rand, det_lit, suffixes=('.random','.literature'),on='tool')
det_tmp
Out[15]:
In [16]:
det = split_cols(det_tmp,'.').swaplevel(axis=1)
det
Out[16]:
Since ltl3ba
sometimes produces slighlty incorrect HOAF, be more tolerant about this (otherwise ltlcross
refuses to parse its output):
In [16]:
import os
os.environ['SPOT_HOA_TOLERANT']='TRUE'
The following computations work the same way as for deterministic automata.
In [17]:
n_tools = {
"ltl3tela": "ltl3tela -f %f > %O",
"ltl2tgba": "ltl2tgba %f > %O",
"ltl2tgba-G": "ltl2tgba -G %f > %O",
"ltl3ba": "ltldo 'ltl3ba -H2' -f %f > %O",
}
n_order = ["ltl3tela", "ltl2tgba-G", "ltl2tgba", "ltl3ba"]
n_cols = ["states", "edges", "acc"]
In [18]:
n_csv_rand = 'formulae/atva19/nondet.rand.csv'
n_data_rand = LtlcrossRunner(n_tools, formula_files = [f_rand], res_filename = n_csv_rand, cols = n_cols)
if rerun:
n_data_rand.run_ltlcross(automata = False, timeout = '60')
n_data_rand.parse_results()
In [19]:
nd_rand = n_data_rand.cummulative(col = n_cols).unstack(level = 0).loc[n_order, n_cols]
nd_rand
Out[19]:
In [20]:
n_csv_patterns = 'formulae/atva19/nondet.patterns.csv'
In [21]:
n_data_patterns = LtlcrossRunner(n_tools, formula_files = [f_patterns], res_filename = n_csv_patterns, cols = n_cols)
if rerun:
n_data_patterns.run_ltlcross(automata = False, timeout = '60')
n_data_patterns.parse_results()
In [22]:
nd_to = pd.DataFrame(n_data_patterns.get_error_count(),columns=['TO.literature'])
nd_err = pd.DataFrame(n_data_patterns.get_error_count('parse error',False),columns=['PE.literature'])
nd_lit = n_data_patterns.cummulative(col = n_cols).unstack(level = 0).loc[n_order, n_cols]
nd_lit = pd.concat([nd_lit,nd_to,nd_err],axis=1,join='inner',sort=False)
nd_lit
Out[22]:
In [23]:
n_data_patterns.smaller_than('ltl3tela', 'ltl2tgba-G')
Out[23]:
In [24]:
n_data_patterns.smaller_than('ltl2tgba-G', 'ltl3tela')
Out[24]:
In [25]:
nd_tmp = pd.merge(nd_rand, nd_lit, suffixes=('.random','.literature'),on='tool')
nd_tmp
Out[25]:
In [26]:
nd = split_cols(nd_tmp,'.').swaplevel(axis=1)
nd
Out[26]:
In [27]:
n_data_patterns.get_error_count()
Out[27]:
In [28]:
n_data_rand.get_error_count()
Out[28]:
In [29]:
det
Out[29]:
In [30]:
#Merge det & nondet
merged = pd.concat([det,nd],keys=["deterministic","nondeterministic"],join='outer',sort=False)
merged
Out[30]:
In [31]:
filename = 'colored_res.tex'
merged_high = highlight_by_level(merged, high_min)
cummulative_to_latex(merged_high, filename)
fix_latex(merged_high, filename)
In [32]:
d_lit_c = len(d_data_patterns.values.dropna())
n_lit_c = len(n_data_patterns.values.dropna())
print('Number of formulas without errors:\n' +
' det: {}\nnondet: {}'.format(d_lit_c, n_lit_c))
In the following tables, the value in (row, col)
is the number of cases where tool row
delivers a better automaton than tool col
. Better means strictly smaller in the lexicographic order on (#states, #acc marks, #edges)
. The last column in each row sums the number of such victories of the corresponding tool row
.
In [55]:
d_data_patterns.cross_compare(include_fails=False,props=['states','acc','edges'])
Out[55]:
In [56]:
d_data_rand.cross_compare(include_fails=False,props=['states','acc','edges'])
Out[56]:
In [57]:
n_data_patterns.cross_compare(include_fails=False,props=['states','acc','edges'])
Out[57]:
In [58]:
n_data_rand.cross_compare(include_fails=False,props=['states','acc','edges'])
Out[58]:
The following tables contain results on formulae excluded from the evaluation due to error (timeout or parse error) in some translation. In case of successful translation, number of states is shown in the table.
The first and second table contain data for deterministic and nondeterministic translators, respectively.
In [33]:
d_fails = d_data_patterns.values[d_data_patterns.values.isnull().any(axis = 1)]['states']\
.join(d_data_patterns.exit_status, lsuffix = '.states', rsuffix = '.response')
for tool in d_order:
d_fails[tool] = d_fails[tool + '.states'].combine_first(d_fails[tool + '.response'])
d_fails_out = d_fails[d_order]
d_fails_out
Out[33]:
In [35]:
n_fails = n_data_patterns.values[n_data_patterns.values.isnull().any(axis = 1)]['states']\
.join(n_data_patterns.exit_status, lsuffix = '.states', rsuffix = '.response')
for tool in n_order:
n_fails[tool] = n_fails[tool + '.states'].combine_first(n_fails[tool + '.response'])
n_fails_out = n_fails[n_order]
n_fails_out
Out[35]:
Finally, we are interested in formulae where the difference between best and worst translator (in terms of states count) is larger than some threshold. Both tables compare deterministic translators over patterns (with threshold 20) and random formulae (threshold 10). Such statistics can be produced by calling large_diff(dataset, list_of_interesting_tools, threshold)
.
In [46]:
def large_diff(res, tools, threshold):
df = res.values.dropna()['states']
df['diff'] = df.loc[:, tools].max(axis = 1) - df.loc[:, tools].min(axis = 1)
return df[df['diff'] > threshold][tools]
In [52]:
large_diff(d_data_patterns, d_tools, 20)
Out[52]:
In [54]:
large_diff(d_data_rand, d_tools, 10)
Out[54]: