sched-evaluation-full


WLTests Results

Analyses and visualises results generated by a wltest


In [ ]:
from lisa.utils import setup_logging
setup_logging()

In [ ]:
import logging
import pandas as pd

from IPython.display import display

from lisa.wa_results_collector import WaResultsCollector

%pylab inline

Results analysis and metrics collection


In [ ]:
collector = WaResultsCollector(
    
    # WLTests results folder:
    base_dir='../../results/wltests/', # Base path of your results folders
    #wa_dirs='(substring_to_match)',   # Parse only folder matching this regexp
    
    # Results to collect:
    parse_traces=False,                # Enable trace parsing only to get more metrics
                                       # NOTE: results generation will take more times
    
    # Kernel tree used for the tests
    kernel_repo_path='/path/to/your/linux/sources/tree'
)

Collected metrics


In [ ]:
df = collector.results_df
logging.info("Metrics available for plots and analysis:")
for metric in df.metric.unique().tolist():
    logging.info("   %s", metric)

Jankbench

Total Frame Duration


In [ ]:
for test in collector.tests(workload='jankbench'):
    logging.info("Results for: %s", test)
    collector.report(workload='jankbench', metric='frame_total_duration',
                     test="^{}$".format(test), sort_on='99%', ascending=True)

Energy


In [ ]:
for test in collector.tests(workload='jankbench'):
    logging.info("Results for: %s", test)
    collector.report(workload='jankbench', metric='device_total_energy',
                     test="^{}$".format(test), sort_on='mean', ascending=True)

Frames Duration CDF


In [ ]:
for test in collector.tests(workload='jankbench'):
    logging.info("Results for: %s", test)
    collector.plot_cdf(workload='jankbench', metric='frame_total_duration',
                       test="^{}$".format(test), threshold=16)

Exoplayer

Dropper Frames


In [ ]:
for test in collector.tests(workload='exoplayer'):
    logging.info("Results for: %s", test)
    collector.report(workload='exoplayer', metric='exoplayer_dropped_frames',
                     test=test, sort_on='99%', ascending=True)

Energy


In [ ]:
for test in collector.tests(workload='exoplayer'):
    logging.info("Results for: %s", test)
    collector.report(workload='exoplayer', metric='device_total_energy',
                     test=test, sort_on='mean', ascending=True)

Homescreen


In [ ]:
for test in collector.tests(workload='homescreen'):
    logging.info("Results for: %s", test)
    collector.report(workload='homescreen', metric='device_total_energy',
                     test=test, sort_on='mean', ascending=True)

Geekbench

Overall scores


In [ ]:
for metric in [ 'Single-Core_score', 'Multi-Core_score']:
    collector.report(workload='geekbench', metric=metric,
                     sort_on='99%', ascending=False)

Detailed scores


In [ ]:
# Get Geekbench scores
df = collector.results_df
gb_scores_db = df[df.test == 'geekbench']

# Group scores
grouped_df = gb_scores_db.groupby(['test', 'kernel', 'metric'])

# Get stats for grouped scores
stats_df = pd.DataFrame(grouped_df.value.describe(percentiles=[.95, .99])).reset_index()
#stats_df = stats_df.reset_index().rename(columns={'level_4': 'stats'})

Single Core Scores


In [ ]:
single_score_df = stats_df[stats_df.metric.str.match("Single.*")]
single_score_df.loc[:, "metric"] = single_score_df.metric.apply(lambda s : s.replace('Single-Core_', '').replace('_score', ''))
single_score_df = single_score_df.set_index(['kernel', 'test', 'metric'])
logging.info("Detailed SINGLE core scores:")
single_score_df

Multi Core Scores


In [ ]:
multi_score_df = stats_df[stats_df.metric.str.match("Multi.*")]
multi_score_df.loc[:, "metric"] = multi_score_df.metric.apply(lambda s : s.replace('Multi-Core_', '').replace('_score', ''))
multi_score_df = multi_score_df.set_index(['kernel', 'test', 'metric'])
logging.info("Detailed MULTI core scores:")
multi_score_df

PCMark Scores

Overall Scores


In [ ]:
pm_df = df[df.workload == 'pcmark']
pm_scores = [m for m in pm_df.metric.unique().tolist() if m.startswith('pcmark_')]
for metric in pm_scores:
    collector.report(workload='pcmark', metric=metric,
                     sort_on='99%', ascending=False)

Detailed Scores


In [ ]:
# Get PCMark scores
df = collector.results_df
pm_scores_db = df[df.workload == 'pcmark']

# Group scores
grouped_df = pm_scores_db.groupby(['test', 'tag', 'kernel', 'metric'])

# Get stats for grouped scores
stats_df = pd.DataFrame(grouped_df.value.describe(percentiles=[.95, .99])).reset_index()

In [ ]:
pm_score_df = stats_df[stats_df.metric.str.match('pcmark_.*')]
pm_score_df.loc[:, 'metric'] = pm_score_df.metric.apply(lambda s : s.replace('pcmark_', ''))
pm_score_df = pm_score_df.set_index(['kernel', 'test'])
logging.info("Detailed scores:")
pm_score_df

Generic comparison plots

plot_comparisons can be used to automatically discover metrics that changed between different kernel versions or tags.


In [ ]:
logging.info("Here is the list of kernels available:")
logging.info("  %s", ', '.join(df['kernel'].unique().tolist() ))

In [ ]:
# Select the baseline kernels for comparisions:
# by deafult we use the first available:
kernel_baseline = df['kernel'].iloc[0]
# Or defined here below one of the above reported kernels as baseline for comparisions
# kernel_baseline = "PutHereYourKernelName"

logging.info("Comparing against baseline kernel: %s", kernel_baseline)
collector.plot_comparisons(base_id=kernel_baseline, by='kernel')