In [0]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)



In [0]:

    
!pip install scikit_posthocs orange3
!git clone https://github.com/google/fuzzbench.git

import sys
sys.path.append("fuzzbench")
from analysis import data_utils



In [0]:

    
#@title Report data source
report_directory = "2020-05-11"  #@param ["2020-05-11", "2020-04-21", "202-04-14", "2020-05-20-aflplusplus-2"] {allow-input: true}
data_url = f"https://www.fuzzbench.com/reports/{report_directory}/data.csv.gz"
df = pd.read_csv(data_url)



In [0]:

    
exp_snapshot_df = data_utils.get_experiment_snapshots(df)
exp_pivot_df = data_utils.experiment_pivot_table(exp_snapshot_df, data_utils.benchmark_rank_by_median)

Median Edge Coverage



In [16]:

    
exp_snapshot_df.pivot_table(index='benchmark', columns='fuzzer', values='edges_covered', aggfunc='median')









    Out[16]:







  
    
      fuzzer
      afl
      aflfast
      aflplusplus
      aflsmart
      eclipser
      entropic
      fairfuzz
      fastcgs_lm
      honggfuzz
      lafintel
      libfuzzer
      mopt
    
    
      benchmark
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      bloaty_fuzz_target
      5486
      5177
      5205
      5426
      4220
      4807
      5023
      5667
      5511
      4971
      4466
      5648
    
    
      curl_curl_fuzzer_http
      5421
      5320
      5370
      5430
      4424
      5001
      4846
      5405
      5409
      5320
      4777
      5386
    
    
      freetype2-2017
      5377
      5225
      5148
      5303
      4510
      5591
      5348
      5335
      7167
      5138
      4395
      5372
    
    
      harfbuzz-1.3.2
      4282
      4182
      4111
      4262
      3383
      4235
      3621
      4268
      4368
      4099
      4107
      4254
    
    
      jsoncpp_jsoncpp_fuzzer
      634
      634
      632
      634
      588
      635
      634
      634
      635
      630
      635
      634
    
    
      lcms-2017-03-21
      1109
      910
      1131
      1167
      492
      1309
      1159
      851
      1008
      1119
      1211
      896
    
    
      libjpeg-turbo-07-2017
      1444
      1437
      1436
      1442
      1048
      1453
      1109
      1438
      1434
      1419
      1365
      1439
    
    
      libpcap_fuzz_both
      21
      21
      21
      18
      880
      1736
      21
      19
      1953
      1721
      1592
      21
    
    
      libpng-1.2.56
      631
      629
      630
      674
      512
      647
      633
      525
      677
      644
      630
      525
    
    
      libxml2-v2.9.2
      4665
      4381
      4169
      4673
      1680
      4522
      3445
      3839
      4564
      4288
      4287
      3602
    
    
      mbedtls_fuzz_dtlsclient
      1674
      1601
      1657
      1689
      1413
      1632
      1663
      1687
      1680
      1593
      1605
      1673
    
    
      openssl_x509
      4076
      4073
      4077
      4076
      4056
      4077
      4054
      4077
      4070
      4070
      4069
      4075
    
    
      openthread-2019-12-23
      1727
      1690
      1710
      1734
      1672
      1539
      1139
      1715
      1729
      1524
      1536
      1713
    
    
      php_php-fuzz-parser
      11209
      11118
      11143
      11229
      9888
      11069
      10961
      11332
      11495
      10979
      10231
      11207
    
    
      proj4-2017-08-14
      2153
      2036
      2149
      2149
      177
      2355
      2048
      1964
      3279
      2102
      2255
      1997
    
    
      re2-2014-12-09
      2272
      2269
      2262
      2272
      1956
      2296
      2268
      2249
      2296
      2257
      2303
      2252
    
    
      sqlite3_ossfuzz
      17246
      16656
      16652
      17297
      5620
      11199
      8805
      17714
      12617
      11898
      8629
      17248
    
    
      systemd_fuzz-link-parser
      990
      986
      982
      989
      936
      982
      904
      989
      1002
      981
      968
      989
    
    
      vorbis-2017-12-11
      1010
      999
      988
      1014
      892
      1007
      1001
      1014
      998
      979
      790
      1014
    
    
      woff2-2016-05-06
      1046
      968
      1046
      1044
      835
      1046
      950
      1034
      1118
      1022
      1007
      1077
    
    
      zlib_zlib_uncompress_fuzzer
      330
      326
      329
      330
      315
      338
      331
      329
      334
      329
      335
      329

Current Fuzzbench default report ranking

use the mean edges covered per benchmark
rank each fuzzer by their mean ranking for all benchmarks



In [5]:

    
default_report_rank = data_utils.experiment_level_ranking(
    exp_snapshot_df, 
    data_utils.benchmark_rank_by_mean, 
    data_utils.experiment_rank_by_average_rank)
default_report_rank









    Out[5]:





fuzzer
honggfuzz       3.309524
afl             3.952381
aflsmart        4.119048
entropic        4.761905
fastcgs_lm      6.023810
mopt            6.047619
aflplusplus     6.833333
aflfast         7.452381
lafintel        7.904762
libfuzzer       7.928571
fairfuzz        8.476190
eclipser       11.190476
Name: average rank, dtype: float64

Other ranking measures

exp_pivot_df is the result of using the median coverage as a benchmark ranking algorithm

Number of firsts (best median coverage for a benchmark)
Percent coverage (median coverage / max coverage per benchmark)
Average rank (simple mean of benchmark ranking)
Statistical tests wins (only count cases where the coverage improvement was statistically significant (p_value < 0.05))



In [6]:

    
firsts_ranked = data_utils.experiment_rank_by_num_firsts(exp_pivot_df)
firsts_ranked









    Out[6]:





fuzzer
honggfuzz      8.0
aflsmart       4.0
entropic       3.0
fastcgs_lm     2.0
libfuzzer      1.0
mopt           0.0
lafintel       0.0
fairfuzz       0.0
eclipser       0.0
aflplusplus    0.0
aflfast        0.0
afl            0.0
Name: number of wins, dtype: float64



In [7]:

    
percent_coverage = data_utils.experiment_rank_by_average_normalized_score(exp_pivot_df)
percent_coverage









    Out[7]:





fuzzer
honggfuzz      97.018742
entropic       92.400031
aflsmart       90.725944
lafintel       90.410083
afl            90.248494
aflplusplus    88.693044
fastcgs_lm     87.601417
mopt           87.521381
aflfast        87.497333
libfuzzer      87.071347
fairfuzz       81.420302
eclipser       71.055928
Name: average normalized score, dtype: float64



In [8]:

    
average_rank = data_utils.experiment_rank_by_average_rank(exp_pivot_df)
average_rank









    Out[8]:





fuzzer
honggfuzz       3.285714
aflsmart        3.976190
afl             4.095238
entropic        4.666667
fastcgs_lm      5.785714
mopt            5.976190
aflplusplus     6.857143
aflfast         7.523810
libfuzzer       7.785714
lafintel        8.309524
fairfuzz        8.452381
eclipser       11.285714
Name: average rank, dtype: float64



In [9]:

    
stats_wins = data_utils.experiment_level_ranking(
    exp_snapshot_df,
    data_utils.benchmark_rank_by_stat_test_wins,
    data_utils.experiment_rank_by_average_rank
)
stats_wins









    Out[9]:





fuzzer
honggfuzz       3.166667
afl             3.904762
aflsmart        4.357143
entropic        4.761905
fastcgs_lm      5.738095
mopt            5.904762
aflplusplus     6.904762
aflfast         7.642857
libfuzzer       7.738095
lafintel        8.166667
fairfuzz        8.452381
eclipser       11.261905
Name: average rank, dtype: float64

Ranking comparison chart



In [17]:

    
rankings = {
    "Default Ranking (mean coverage)": default_report_rank,
    "Stats Wins (p_value wins)": stats_wins,
    "% Coverage Ranking": percent_coverage,
    "Average Rank (median coverage)": average_rank
}

fig, axes = plt.subplots(1,len(rankings), figsize=(30,7))
for i, (title, ranking_series) in enumerate(rankings.items()):
  ax = sns.barplot(x=ranking_series.values, y=ranking_series.index, ax=axes[i])
  ax.set_title(title)
  ax.set_ylabel("")
fig.suptitle("Comparison of Ranking Methods")
fig.show()



In [0]:

fuzzer	afl	aflfast	aflplusplus	aflsmart	eclipser	entropic	fairfuzz	fastcgs_lm	honggfuzz	lafintel	libfuzzer	mopt
benchmark
bloaty_fuzz_target	5486	5177	5205	5426	4220	4807	5023	5667	5511	4971	4466	5648
curl_curl_fuzzer_http	5421	5320	5370	5430	4424	5001	4846	5405	5409	5320	4777	5386
freetype2-2017	5377	5225	5148	5303	4510	5591	5348	5335	7167	5138	4395	5372
harfbuzz-1.3.2	4282	4182	4111	4262	3383	4235	3621	4268	4368	4099	4107	4254
jsoncpp_jsoncpp_fuzzer	634	634	632	634	588	635	634	634	635	630	635	634
lcms-2017-03-21	1109	910	1131	1167	492	1309	1159	851	1008	1119	1211	896
libjpeg-turbo-07-2017	1444	1437	1436	1442	1048	1453	1109	1438	1434	1419	1365	1439
libpcap_fuzz_both	21	21	21	18	880	1736	21	19	1953	1721	1592	21
libpng-1.2.56	631	629	630	674	512	647	633	525	677	644	630	525
libxml2-v2.9.2	4665	4381	4169	4673	1680	4522	3445	3839	4564	4288	4287	3602
mbedtls_fuzz_dtlsclient	1674	1601	1657	1689	1413	1632	1663	1687	1680	1593	1605	1673
openssl_x509	4076	4073	4077	4076	4056	4077	4054	4077	4070	4070	4069	4075
openthread-2019-12-23	1727	1690	1710	1734	1672	1539	1139	1715	1729	1524	1536	1713
php_php-fuzz-parser	11209	11118	11143	11229	9888	11069	10961	11332	11495	10979	10231	11207
proj4-2017-08-14	2153	2036	2149	2149	177	2355	2048	1964	3279	2102	2255	1997
re2-2014-12-09	2272	2269	2262	2272	1956	2296	2268	2249	2296	2257	2303	2252
sqlite3_ossfuzz	17246	16656	16652	17297	5620	11199	8805	17714	12617	11898	8629	17248
systemd_fuzz-link-parser	990	986	982	989	936	982	904	989	1002	981	968	989
vorbis-2017-12-11	1010	999	988	1014	892	1007	1001	1014	998	979	790	1014
woff2-2016-05-06	1046	968	1046	1044	835	1046	950	1034	1118	1022	1007	1077
zlib_zlib_uncompress_fuzzer	330	326	329	330	315	338	331	329	334	329	335	329