Kernel Functions Profiling



In [1]:
import logging
reload(logging)
logging.basicConfig(
    format='%(asctime)-9s %(levelname)-8s: %(message)s',
    datefmt='%I:%M:%S')

# Enable logging at INFO level
logging.getLogger().setLevel(logging.INFO)

In [2]:
# Generate plots inline
%pylab inline

import json
import os

import re
import collections
import pandas

# Support to tests execution
from executor import Executor


Populating the interactive namespace from numpy and matplotlib

Tests configuration


In [3]:
# Setup a target configuration
target_conf = {

    # Platform and board to target
    "platform"    : "linux",
    "board"       : "juno",

    # Login credentials
    "host"        : "192.168.0.1",
    "username"    : "root",
    "password"    : "",

    # Local installation path
    "tftp"  : {
        "folder"    : "/var/lib/tftpboot",
        "kernel"    : "kern.bin",
        "dtb"       : "dtb.bin",
    },

    # RTApp calibration values (comment to let LISA do a calibration run)
    "rtapp-calib" :  {
        "0": 358, "1": 138, "2": 138, "3": 357, "4": 359, "5": 355
    },

}

tests_conf = {
    
    # Kernel functions to profile for all the test
    # configurations which have the "ftrace" flag enabled
    "ftrace"  : {
         "functions" : [
            "select_task_rq_fair",
            "enqueue_task_fair",
            "dequeue_task_fair",
         ],
         "buffsize" : 80 * 1024,
    },
    
    # Platform configurations to test
    "confs" : [
        {
            "tag"            : "base",
            "flags"          : "ftrace",
            "sched_features" : "NO_ENERGY_AWARE",
            "cpufreq"        : {
                "governor" : "performance",
            },
        },
        {
            "tag"            : "eas",
            "flags"          : "ftrace",
            "sched_features" : "ENERGY_AWARE",
            "cpufreq"        : {
                "governor" : "performance",
            },
        },
    ],
    
    # Workloads to run (on each platform configuration)
    "wloads" : {
        "rta" : {
            "type" : "rt-app",
            "conf" : {
                "class"  : "profile",
                "params"  : {
                    "p20" : {
                        "kind"   : "periodic",
                        "params" : {
                            "duty_cycle_pct" : 20,
                         },
                        "tasks" : "cpus",
                    },
                },
            },
        },
    },
    
    # Number of iterations for each configuration/workload pair
    "iterations" : 3,
    
    # Tools to deploy
    "tools" : [ "rt-app", 'trace-cmd' ],
    
    # Where results are collected
    # NOTE: this folder will be wiped before running the experiments
    "results_dir" : "KernelFunctionsProfilingExample",

    # Modules required by these experiments
    "exclude_modules" : [ "hwmon" ],

}

In [4]:
# Setup tests executions based on our configuration
executor = Executor(target_conf, tests_conf)


03:43:22  INFO    :         Target - Loading custom (inline) test configuration
03:43:22  INFO    :         Target - Using base path: /home/derkling/Code/lisa
03:43:22  INFO    :         Target - Loading custom (inline) target configuration
03:43:22  INFO    :         Target - Loading custom (inline) test configuration
03:43:22  INFO    :         Target - Devlib modules to load: ['bl', 'cpufreq']
03:43:22  INFO    :         Target - Connecting linux target:
03:43:22  INFO    :         Target -   username : root
03:43:22  INFO    :         Target -       host : 192.168.0.1
03:43:22  INFO    :         Target -   password : 
03:43:26  INFO    :         Target - Initializing target workdir:
03:43:26  INFO    :         Target -    /root/devlib-target
03:43:34  INFO    :         Target - Topology:
03:43:34  INFO    :         Target -    [[0, 3, 4, 5], [1, 2]]
03:43:36  INFO    :       Platform - Loading default EM:
03:43:36  INFO    :       Platform -    /home/derkling/Code/lisa/libs/utils/platforms/juno.json
03:43:38  INFO    :         FTrace - Enabled tracepoints:
03:43:38  INFO    :         FTrace -   sched:*
03:43:38  INFO    :         FTrace - Kernel functions profiled:
03:43:38  INFO    :         FTrace -   select_task_rq_fair
03:43:38  INFO    :         FTrace -   enqueue_task_fair
03:43:38  INFO    :         FTrace -   dequeue_task_fair
03:43:38  INFO    :    EnergyMeter - HWMON module not enabled
03:43:38  WARNING :    EnergyMeter - Energy sampling disabled by configuration
03:43:38  WARNING :         Target - Using configuration provided RTApp calibration
03:43:38  INFO    :         Target - Using RT-App calibration values:
03:43:38  INFO    :         Target -    {"0": 358, "1": 138, "2": 138, "3": 357, "4": 359, "5": 355}
03:43:38  WARNING :        TestEnv - Wipe previous contents of the results folder:
03:43:38  WARNING :        TestEnv -    /home/derkling/Code/lisa/results/KernelFunctionsProfilingExample
03:43:38  INFO    :        TestEnv - Set results folder to:
03:43:38  INFO    :        TestEnv -    /home/derkling/Code/lisa/results/KernelFunctionsProfilingExample
03:43:38  INFO    :        TestEnv - Experiment results available also in:
03:43:38  INFO    :        TestEnv -    /home/derkling/Code/lisa/results_latest
03:43:38  INFO    : 
03:43:38  INFO    : ################################################################################
03:43:38  INFO    :       Executor - Experiments configuration
03:43:38  INFO    : ################################################################################
03:43:38  INFO    :       Executor - Configured to run:
03:43:38  INFO    :       Executor -     2 targt configurations:
03:43:38  INFO    :       Executor -       base, eas
03:43:38  INFO    :       Executor -     1 workloads (3 iterations each)
03:43:38  INFO    :       Executor -       rta
03:43:38  INFO    :       Executor - Total: 6 experiments
03:43:38  INFO    :       Executor - Results will be collected under:
03:43:38  INFO    :       Executor -       /home/derkling/Code/lisa/results/KernelFunctionsProfilingExample

Tests execution


In [5]:
# Execute all the configured test
executor.run()


03:43:38  INFO    : 
03:43:38  INFO    : ################################################################################
03:43:38  INFO    :       Executor - Experiments execution
03:43:38  INFO    : ################################################################################
03:43:38  INFO    : 
03:43:38  INFO    : ================================================================================
03:43:38  INFO    :   TargetConfig - configuring target for [base] experiments
03:43:39  INFO    :  SchedFeatures - Set scheduler feature: NO_ENERGY_AWARE
03:43:39  INFO    :        CPUFreq - Configuring all CPUs to use [performance] governor
03:43:40  INFO    :          WlGen - Setup new workload rta
03:43:40  INFO    :          RTApp - Workload duration defined by longest task
03:43:40  INFO    :          RTApp - Default policy: SCHED_OTHER
03:43:40  INFO    :          RTApp - ------------------------
03:43:40  INFO    :          RTApp - task [task_p20], sched: using default policy
03:43:40  INFO    :          RTApp -  | calibration CPU: 1
03:43:40  INFO    :          RTApp -  | loops count: 1
03:43:40  INFO    :          RTApp - + phase_000001: duration 1.000000 [s] (10 loops)
03:43:40  INFO    :          RTApp - |  period   100000 [us], duty_cycle  20 %
03:43:40  INFO    :          RTApp - |  run_time  20000 [us], sleep_time  80000 [us]
03:43:41  INFO    : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
03:43:41  INFO    :       Executor - Experiment 1/6, [base:rta] 1/3
03:43:41  WARNING :       Executor - FTrace events collection enabled
03:43:46  INFO    :          WlGen - Workload execution START:
03:43:46  INFO    :          WlGen -    /root/devlib-target/bin/rt-app /root/devlib-target/run_dir/rta_00.json
03:43:51  INFO    :       Executor - Collected FTrace binary trace:
03:43:51  INFO    :       Executor -    <res_dir>/rtapp:base:rta/1/trace.dat
03:43:52  INFO    :       Executor - Collected FTrace function profiling:
03:43:52  INFO    :       Executor -    <res_dir>/rtapp:base:rta/1/trace_stat.json
03:43:52  INFO    : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
03:43:52  INFO    :       Executor - Experiment 2/6, [base:rta] 2/3
03:43:52  WARNING :       Executor - FTrace events collection enabled
03:43:58  INFO    :          WlGen - Workload execution START:
03:43:58  INFO    :          WlGen -    /root/devlib-target/bin/rt-app /root/devlib-target/run_dir/rta_00.json
03:44:02  INFO    :       Executor - Collected FTrace binary trace:
03:44:02  INFO    :       Executor -    <res_dir>/rtapp:base:rta/2/trace.dat
03:44:03  INFO    :       Executor - Collected FTrace function profiling:
03:44:03  INFO    :       Executor -    <res_dir>/rtapp:base:rta/2/trace_stat.json
03:44:03  INFO    : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
03:44:03  INFO    :       Executor - Experiment 3/6, [base:rta] 3/3
03:44:03  WARNING :       Executor - FTrace events collection enabled
03:44:09  INFO    :          WlGen - Workload execution START:
03:44:09  INFO    :          WlGen -    /root/devlib-target/bin/rt-app /root/devlib-target/run_dir/rta_00.json
03:44:14  INFO    :       Executor - Collected FTrace binary trace:
03:44:14  INFO    :       Executor -    <res_dir>/rtapp:base:rta/3/trace.dat
03:44:14  INFO    :       Executor - Collected FTrace function profiling:
03:44:14  INFO    :       Executor -    <res_dir>/rtapp:base:rta/3/trace_stat.json
03:44:14  INFO    : 
03:44:14  INFO    : ================================================================================
03:44:14  INFO    :   TargetConfig - configuring target for [eas] experiments
03:44:15  INFO    :  SchedFeatures - Set scheduler feature: ENERGY_AWARE
03:44:15  INFO    :        CPUFreq - Configuring all CPUs to use [performance] governor
03:44:16  INFO    :          WlGen - Setup new workload rta
03:44:16  INFO    :          RTApp - Workload duration defined by longest task
03:44:16  INFO    :          RTApp - Default policy: SCHED_OTHER
03:44:16  INFO    :          RTApp - ------------------------
03:44:16  INFO    :          RTApp - task [task_p20], sched: using default policy
03:44:16  INFO    :          RTApp -  | calibration CPU: 1
03:44:16  INFO    :          RTApp -  | loops count: 1
03:44:16  INFO    :          RTApp - + phase_000001: duration 1.000000 [s] (10 loops)
03:44:16  INFO    :          RTApp - |  period   100000 [us], duty_cycle  20 %
03:44:16  INFO    :          RTApp - |  run_time  20000 [us], sleep_time  80000 [us]
03:44:16  INFO    : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
03:44:16  INFO    :       Executor - Experiment 4/6, [eas:rta] 1/3
03:44:16  WARNING :       Executor - FTrace events collection enabled
03:44:22  INFO    :          WlGen - Workload execution START:
03:44:22  INFO    :          WlGen -    /root/devlib-target/bin/rt-app /root/devlib-target/run_dir/rta_00.json
03:44:28  INFO    :       Executor - Collected FTrace binary trace:
03:44:28  INFO    :       Executor -    <res_dir>/rtapp:eas:rta/1/trace.dat
03:44:28  INFO    :       Executor - Collected FTrace function profiling:
03:44:28  INFO    :       Executor -    <res_dir>/rtapp:eas:rta/1/trace_stat.json
03:44:28  INFO    : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
03:44:28  INFO    :       Executor - Experiment 5/6, [eas:rta] 2/3
03:44:28  WARNING :       Executor - FTrace events collection enabled
03:44:34  INFO    :          WlGen - Workload execution START:
03:44:34  INFO    :          WlGen -    /root/devlib-target/bin/rt-app /root/devlib-target/run_dir/rta_00.json
03:44:40  INFO    :       Executor - Collected FTrace binary trace:
03:44:40  INFO    :       Executor -    <res_dir>/rtapp:eas:rta/2/trace.dat
03:44:40  INFO    :       Executor - Collected FTrace function profiling:
03:44:40  INFO    :       Executor -    <res_dir>/rtapp:eas:rta/2/trace_stat.json
03:44:40  INFO    : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
03:44:40  INFO    :       Executor - Experiment 6/6, [eas:rta] 3/3
03:44:40  WARNING :       Executor - FTrace events collection enabled
03:44:46  INFO    :          WlGen - Workload execution START:
03:44:46  INFO    :          WlGen -    /root/devlib-target/bin/rt-app /root/devlib-target/run_dir/rta_00.json
03:44:52  INFO    :       Executor - Collected FTrace binary trace:
03:44:52  INFO    :       Executor -    <res_dir>/rtapp:eas:rta/3/trace.dat
03:44:52  INFO    :       Executor - Collected FTrace function profiling:
03:44:52  INFO    :       Executor -    <res_dir>/rtapp:eas:rta/3/trace_stat.json
03:44:52  INFO    : 
03:44:52  INFO    : ################################################################################
03:44:52  INFO    :       Executor - Experiments execution completed
03:44:52  INFO    : ################################################################################
03:44:52  INFO    :       Executor - Results available in:
03:44:52  INFO    :       Executor -       /home/derkling/Code/lisa/results/KernelFunctionsProfilingExample

In [6]:
res_dir = "/home/derkling/Code/lisa/results/KernelFunctionsProfilingExample"
out_dir = "/home/derkling/Code/lisa/results/KernelFunctionsProfilingExample/rtapp:eas:rta/2/trace.dat"
out_dir.replace(res_dir, "<res_dir>")
print executor.te.res_dir


/home/derkling/Code/lisa/results/KernelFunctionsProfilingExample

In [7]:
# Check content of the output folder
res_dir = executor.te.res_dir
logging.info('Content of the output folder %s', res_dir)
!tree {res_dir}


03:44:52  INFO    : Content of the output folder /home/derkling/Code/lisa/results/KernelFunctionsProfilingExample
/home/derkling/Code/lisa/results/KernelFunctionsProfilingExample
├── rtapp:base:rta
│   ├── 1
│   │   ├── output.log
│   │   ├── rta_00.json
│   │   ├── rt-app-task_p20-0.log
│   │   ├── trace.dat
│   │   └── trace_stat.json
│   ├── 2
│   │   ├── output.log
│   │   ├── rta_00.json
│   │   ├── rt-app-task_p20-0.log
│   │   ├── trace.dat
│   │   └── trace_stat.json
│   ├── 3
│   │   ├── output.log
│   │   ├── rta_00.json
│   │   ├── rt-app-task_p20-0.log
│   │   ├── trace.dat
│   │   └── trace_stat.json
│   ├── kernel.config
│   ├── kernel.version
│   └── platform.json
└── rtapp:eas:rta
    ├── 1
    │   ├── output.log
    │   ├── rta_00.json
    │   ├── rt-app-task_p20-0.log
    │   ├── trace.dat
    │   └── trace_stat.json
    ├── 2
    │   ├── output.log
    │   ├── rta_00.json
    │   ├── rt-app-task_p20-0.log
    │   ├── trace.dat
    │   └── trace_stat.json
    ├── 3
    │   ├── output.log
    │   ├── rta_00.json
    │   ├── rt-app-task_p20-0.log
    │   ├── trace.dat
    │   └── trace_stat.json
    ├── kernel.config
    ├── kernel.version
    └── platform.json

8 directories, 36 files

Load function profiling data


In [8]:
def autodict():
    return collections.defaultdict(autodict)

def parse_perf_stat(res_dir):
    TEST_DIR_RE = re.compile(r'.*/([^:]*):([^:]*):([^:]*)')
    profiling_data = autodict()

    for test_idx in sorted(os.listdir(res_dir)):
        test_dir = os.path.join(res_dir, test_idx)
        if not os.path.isdir(test_dir):
            continue
        match = TEST_DIR_RE.search(test_dir)
        if not match:
            continue
        wtype = match.group(1)
        tconf = match.group(2)
        wload = match.group(3)

        #logging.info('Processing %s:%s:%s', wtype, tconf, wload)
        trace_stat_file = os.path.join(test_dir, '1', 'trace_stat.json')
        if not os.path.isfile(trace_stat_file):
            continue
        with open(trace_stat_file, 'r') as fh:
            data = json.load(fh)
        for cpu_id, cpu_stats in sorted(data.items()):
            for fname in cpu_stats:
                profiling_data[cpu_id][tconf][fname] = cpu_stats[fname]

    return profiling_data
  
profiling_data = parse_perf_stat(res_dir)
#logging.info("Profiling data:\n%s", json.dumps(profiling_data, indent=4))
#profiling_data

Build Pandas DataFrame from profiling data


In [9]:
def get_df(profiling_data):
    cpu_ids = []
    cpu_frames = []
    for cpu_id, cpu_data in sorted(profiling_data.items()):
        cpu_ids.append(cpu_id)
        conf_ids = []
        conf_frames = []
        for conf_id, conf_data in cpu_data.iteritems():
            conf_ids.append(conf_id)
            function_data = pandas.DataFrame.from_dict(conf_data, orient='index')
            conf_frames.append(function_data)
        df = pandas.concat(conf_frames, keys=conf_ids)
        cpu_frames.append(df)
    df = pandas.concat(cpu_frames, keys=cpu_ids)
    #df.head()
    return df

stats_df = get_df(profiling_data)
#stats_df

Plot profiling data per function and CPU


In [10]:
def plot_stats(df, fname, axes=None):
    func_data = df.xs(fname, level=2)
    func_stats = func_data.xs(['avg', 's_2'], axis=1)
    #func_stats
    func_avg = func_stats.unstack(level=1)['avg']
    func_std = func_stats.unstack(level=1)['s_2'].apply(numpy.sqrt)
    func_avg.plot(kind='bar', title=fname, yerr=func_std, ax=axes);

#plot_stats(stats_df, 'select_task_rq_fair')

In [11]:
def plot_all_functions(df):
    functions = df.index.get_level_values(2).unique()
    fcount = len(functions)

    fig, pltaxes = plt.subplots(fcount, 1, figsize=(16, 8*fcount))

    fig_id = 0
    for fname in functions:
        logging.info("Plotting stats for [%s] function", fname)
        if fcount > 1:
            axes = pltaxes[fig_id]
        else:
            axes = pltaxes
        plot_stats(df, fname, axes)
        fig_id = fig_id + 1
        
plot_all_functions(stats_df)


03:44:53  INFO    : Plotting stats for [dequeue_task_fair] function
03:44:53  INFO    : Plotting stats for [enqueue_task_fair] function
03:44:53  INFO    : Plotting stats for [select_task_rq_fair] function