This notebook shows how to build an energy model of a HiKey960 platform running an Android Linux kernel.
It can be used as a reference implementation of an energy model building flow for platforms
where it's possible to measure the energy consumption at system level, that is either at battery
level or as close as possible to the clusters.
In this case, it is not enough to isolate the CPUs of the target cluster, but we also have to make
sure that all tasks (except the essential ones) are frozen to avoid affecting battery power
measurements. This will be achieved by exploiting the cgroup freezer
controller.
In [1]:
import logging
from conf import LisaLogging
LisaLogging.setup()
In [2]:
%matplotlib inline
import devlib
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import trappy
from collections import OrderedDict
from csv import DictWriter
from env import TestEnv
from matplotlib.ticker import FormatStrFormatter, MaxNLocator
from scipy.stats import linregress
from scipy import polyfit
from time import sleep
from trappy.plotter.ColorMap import ColorMap
# Support for trace events analysis
from trace import Trace
# Import support for Android devices
from android import Screen, Workload, System
In [3]:
# Setup a target configuration
my_conf = {
# Target platform and board
"platform" : 'android',
"board" : 'hikey960',
"device" : "HEATERPRO",
"results_dir" : "EnergyModel_SystemEnergy",
# Energy Meters Configuration for BayLibre's ACME Cape
"emeter" : {
"instrument" : "acme",
"conf" : {
'ip_address' : '192.168.0.1'
},
"channel_map" : {
"Shunt" : "1"
}
},
# Tools required by the experiments
"tools" : ['trace-cmd', 'sysbench'],
"modules" : ['cpufreq', 'cpuidle', 'hotplug', 'cgroups', 'thermal'],
# FTrace events to collect for all the tests configuration which have
# the "ftrace" flag enabled
"ftrace" : {
"events" : [
"cpu_frequency",
"cpu_idle",
"sched_switch"
],
"buffsize" : 200 * 1024,
},
}
In [4]:
!adb root
In [5]:
# Initialize a test environment using:
# the provided target configuration (my_conf)
te = TestEnv(target_conf=my_conf, force_new=True)
target = te.target
Energy measured of a cluster at system-level is affected by noise due the other clusters running in the system. To limit effect of this noise we exploit the freezer cpuset
controller to freeze the tasks already running in the system. However, we need to be careful not to freeze those tasks that allow us to communicate with the target.
Hence, we define here-below a list of tasks called CRITICAL that must not be frozen.
In [6]:
# Lists of critical tasks for each platform
CRITICAL_TASKS = {
'linux': ["init", "sh"],
'android': ["/system/bin/sh", "adbd", "/init"]
}
In [7]:
clusters = {}
for cpu, cluster in enumerate(target.core_clusters):
if cluster not in clusters:
clusters[cluster] = {}
clusters[cluster]["cpus"] = []
clusters[cluster]["cpus"].append(cpu)
for cluster in clusters.itervalues():
first_cpu = cluster["cpus"][0]
cluster["freqs"] = target.cpufreq.list_frequencies(first_cpu)
cluster["idles"] = range(len(target.cpuidle.get_states(first_cpu)))
In [8]:
for cluster, values in clusters.iteritems():
print "===== cluster {} =====".format(cluster)
print "CPUs {}".format(values["cpus"])
print "freqs {}".format(values["freqs"])
print "idle states {}".format(values["idles"])
In [9]:
if len(my_conf["emeter"]["channel_map"]) == 1:
em_channel = my_conf["emeter"]["channel_map"].keys()[0]
logging.info("Will use channel '{}' for energy measurements".format(em_channel))
else:
logging.info("More than one emeter channel, please specify which one to use")
In [10]:
target.thermal.disable_all_zones()
In [11]:
temperature_sensor = target.thermal.zones[0]
In [ ]:
In [12]:
class EM_workload(object):
"""
Energy Model workload class
:param target: Devlib target to run the workload on
:type target: devlib.target
:param binary: Path to the workload binary on the target
:type binary: str
:param duration: Execution duration of the workload
:type duration: int
:param args: Additional arguments to be given to the binary call
:type args: str or list(str)
"""
def __init__(self, target, binary, duration, args=None):
self.target = target
self.binary = binary
self.duration = duration
self.args = args
def run(self, cpus):
"""
Launch len('cpus') executions of 'binary', one per cpu
:param cpus: CPU list of CPUs that will run this workload
:type cpus: list(int)
:returns: The cumulative score of each workload as a dict(cpu : score)
"""
self.procs = {}
for cpu in cpus:
self.procs[cpu] = self.target.background_invoke(binary=self.binary, args=self.args, on_cpus=[cpu])
stderr = {}
stdout = {}
for cpu in cpus:
stderr[cpu], stdout[cpu] = self.procs[cpu].communicate()
score = {}
for cpu in cpus:
score[cpu] = self._parse_score(stderr[cpu], stdout[cpu])
return score
def _parse_score(self, stderr, stdout):
raise NotImplemented("_parse_score must be implemented")
In [13]:
class EM_dhrystone(EM_workload):
def __init__(self, target, binary, duration, args=None):
EM_workload.__init__(self, target, binary, duration)
self.args = duration
def _parse_score(self, stderr, stdout):
match = re.search('(?P<score>[0-9]+) loops', stdout)
return float(match.group('score'))
In [14]:
class EM_sysbench(EM_workload):
def __init__(self, target, binary, duration, args=None):
EM_workload.__init__(self, target, binary, duration)
self.args = '--test=cpu --max-time={} run'.format(duration)
def _parse_score(self, stderr, stdout):
match = re.search('total number of events:\s*(?P<events>[0-9]+)', stderr)
return float(match.group('events'))
In [15]:
class power_perf_recorder(object):
"""
Data collector for C-states analysis
:param clusters: Dictionnary describing the clusters to analyze
Format: {
cluster_name : {
"cpus" : [0, 1],
"freqs" : [100, 200]
}
}
:type clusters: dict
:param benchmark: instance of an EM_workload subclass
:type benchmark: EM_workload
:param channel: Name of the energy meter channel to use for energy collection
:type channel: str
"""
def __init__(self, clusters, benchmark, channel):
self.clusters = clusters
self.benchmark = benchmark
self.channel = channel
@property
def online_cpus(self):
return target.list_online_cpus()
def _read_energy(self):
energy = 0.0
perf = { cpu : 0.0 for cpu in self.online_cpus }
for i in xrange(self.loop_cnt):
te.emeter.reset()
for cpu, value in self.benchmark.run(self.online_cpus).iteritems():
perf[cpu] += value
nrg = te.emeter.report(te.res_dir).channels
energy += float(nrg[self.channel])
# Wait for temperature to drop down under a given threshold
while temperature_sensor.get_temperature() > self.test_temperature:
sleep(1)
for cpu in perf.iterkeys():
perf[cpu] = perf[cpu] / self.loop_cnt
energy = energy / self.loop_cnt
return (perf, energy)
def _set_and_check_freq(self, cpu, freq):
target.cpufreq.set_frequency(cpu, freq)
rfreq = target.cpufreq.get_frequency(cpu)
if rfreq != freq:
raise RuntimeError("Requested freq {}, got {}".format(freq, rfreq))
def _analysis_setup(self):
# Make sure all CPUs are online
target.hotplug.online_all()
# Set cpufreq governor to userpace to allow manual frequency scaling
target.cpufreq.set_all_governors('userspace')
# Freeze all userspace tasks
target.cgroups.freeze(exclude=CRITICAL_TASKS['android'])
self.data = []
# Prepare backup file writer
self._fh = open(self.bkp_file, 'w')
self._writer = DictWriter(
self._fh,
fieldnames=['cluster', 'cpus', 'freq', 'perf', 'energy', 'power']
)
# Read current temperature
self.test_temperature = 0
for i in range(10):
self.test_temperature += temperature_sensor.get_temperature()
self.test_temperature /= 10
logging.info("Test ambient temperature determined as: {}".format(self.test_temperature))
def _analysis_teardown(self):
# Restore device status
target.cgroups.freeze(thaw=True)
target.hotplug.online_all()
# Cleanup backup fh
self._fh.close()
def _measure_noise(self):
if self.noise_cpu < 0:
self.noise_nrg = 0
return
onlines = self.online_cpus
# Offline all cpus but noise cpu
target.hotplug.online(self.noise_cpu)
for cpu in onlines:
if cpu != self.noise_cpu:
target.hotplug.offline(cpu)
# Measure energy with noise cpu
self._set_and_check_freq(self.noise_cpu, clusters[self.noise_cluster]["freqs"][0])
_, self.noise_nrg = self._read_energy()
# Restore previously online cpus
for cpu in onlines:
target.hotplug.online(cpu)
def _setup_analysis_cluster(self, cluster_idx):
self.cur_cluster = cluster_idx
online_cluster = self.clusters[cluster_idx]
online_cpu = online_cluster["cpus"][0]
# Make sure one core of the cluster is online
target.hotplug.online(online_cpu)
# Offline all other cores
for cluster in self.clusters.itervalues():
for cpu in cluster["cpus"]:
if cpu != online_cpu:
target.hotplug.offline(cpu)
# See if we can use another cluster for energy comparison
if len(self.clusters) < 2:
self.noise_cluster = -1
self.noise_cpu = -1
else:
self.noise_cluster = (cluster_idx + 1) % len(self.clusters)
self.noise_cpu = self.clusters[self.noise_cluster]["cpus"][0]
self._measure_noise()
def _setup_analysis_cpu(self, cpu):
target.hotplug.online(cpu)
def _analyse(self, freq):
self._set_and_check_freq(self.clusters[self.cur_cluster]["cpus"][0], freq)
# Subtract noise energy
perf, energy = self._read_energy()
energy -= self.noise_nrg
# Remove performance result from noise-measurement CPU
perf.pop(self.noise_cpu, None)
res = {
'cluster': self.cur_cluster,
'cpus': len(perf),
'freq': freq,
'perf': sum(perf.values()),
'energy' : energy,
'power': energy / self.benchmark.duration
}
# Save data to backup file
self._writer.writerow(res)
# Save data locally
self.data.append(res)
def run_analysis(self, loop_cnt, bkp_file='pstates.csv'):
"""
This will record energy values for the P-states of the target.
Energy will be recorded with an increasing amount of active CPUs (from 1 to all of the cluster's CPUs),
for all available frequencies. This will be done on each cluster.
:param loop_cnt: Number of iterations for each benchmark execution
:type loop_cnt: int
:param bkp_file: File in which the recorded data will be appended
Useful to resume the analysis after closing the Notebook
:type bkp_file: str
"""
self.loop_cnt = loop_cnt
self.bkp_file = bkp_file
try:
self._analysis_setup()
# A) For each cluster
for cluster_idx in self.clusters.iterkeys():
logging.info("Cluster={}".format(cluster_idx))
self._setup_analysis_cluster(cluster_idx)
# B) For each core of that cluster
for cpu in clusters[cluster_idx]["cpus"]:
self._setup_analysis_cpu(cpu)
logging.info("Online cpus={}".format(self.online_cpus))
# C) For each supported OPP
for freq in clusters[cluster_idx]["freqs"]:
logging.info("Freq={}".format(freq))
self._analyse(freq)
finally:
self._analysis_teardown()
df = pd.DataFrame(self.data)
return df.set_index(['cluster', 'freq', 'cpus']).sort_index(level='cluster')
In [16]:
sysbench = EM_sysbench(target, target.which("sysbench"), 5)
pp_recorder = power_perf_recorder(clusters, sysbench, em_channel)
In [17]:
#dhry = EM_dhrystone(target, target.which("dhry2"), 2)
#pp_recorder = power_perf_recorder(clusters, dhry, em_channel)
In [18]:
power_perf = pp_recorder.run_analysis(loop_cnt=25)
In [19]:
power_perf
Out[19]:
In this first part, the energy evolution will be plotted. If at some point an energy curve's gradient is smaller than at a previous point, it means the readings have been impacted.
In [20]:
def plot_bench_nrg(power_perf_df, cluster):
fig, ax = plt.subplots(1, 1, figsize=(16, 5))
for cpus, df in power_perf_df.loc[cluster].groupby(level='cpus'):
freqs = df.index.get_level_values("freq").tolist()
energy = df["energy"].tolist()
p = ax.plot(freqs, energy)
ax.scatter(freqs, energy, label="{} active cpus".format(cpus), color=p[-1].get_color())
plt.xticks(freqs)
ax.set_title("Cluster {} energy consumption evolution".format(cluster), fontsize=15)
ax.set_ylabel("Energy reading (bogo-joule)", fontsize=15)
ax.set_xlabel("Frequency (Hz)", fontsize=15)
ax.legend(loc="upper left")
ax.grid(True)
In [21]:
for cluster in clusters:
plot_bench_nrg(power_perf, cluster)
In this second part, we'll remove the lines that have this gradient anomaly.
In [22]:
def filter_bench_nrg(power_perf_df):
def comp_delta(a, b):
return (b[1] - a[1]) / (b[0] - a[0])
expunge = []
for cluster in power_perf_df.index.levels[0].tolist():
cl_power_df = power_perf_df.loc[cluster].reset_index()
for cpu, df in cl_power_df.groupby('cpus'):
first_point = [df.iloc[0]["freq"], df.iloc[0]["energy"]]
prev_point = [df.iloc[1]["freq"], df.iloc[1]["energy"]]
delta_prev = comp_delta(first_point, prev_point)
for idx in range(len(df))[2:]:
cur_point = [df.iloc[idx]["freq"], df.iloc[idx]["energy"]]
delta = comp_delta(prev_point, cur_point)
if delta_prev > delta * 1.05:
if (cluster, cpu) not in expunge:
expunge.append((cluster, cpu))
delta_prev = delta
prev_point = cur_point
tmp = power_perf_df.reset_index()
for couple in expunge:
# Remove entire lines where at least one anomaly was found
tmp = tmp[~((tmp["cpus"] == couple[1]) & (tmp["cluster"] == couple[0]))]
tmp = tmp.set_index(['cluster', 'freq', 'cpus']).sort_index(level='cluster')
return tmp
In [23]:
power_perf_new = filter_bench_nrg(power_perf)
In [24]:
for cluster in clusters:
plot_bench_nrg(power_perf_new, cluster)
In [25]:
def power_perf_stats(power_perf_df):
"""
For each cluster compute per-OPP power and performance statistics.
:param power_perf_df: dataframe containing power and performance numbers
:type power_perf_df: :mod:`pandas.DataFrame`
"""
clusters = power_perf_df.index.get_level_values('cluster')\
.unique().tolist()
stats = {
"avg": {
"func": "mean",
"args": [],
},
"std": {
"func": "std",
"args": [],
},
"min": {
"func": "min",
"args": [],
},
"max": {
"func": "max",
"args": [],
},
"c99": {
"func": "quantile",
"args": [0.99],
},
}
data = []
for cl in clusters:
cl_power_df = power_perf_df.loc[cl].reset_index()
for freq, df in cl_power_df.groupby('freq'):
perf = df['perf'] / df['cpus']
power = df['power'] / df['cpus']
energy = df['energy'] / df['cpus']
for name, info in stats.iteritems():
data.append({
'cluster': cl,
'freq': freq,
'stats': name,
'perf': getattr(perf, info["func"])(*info["args"]),
'power': getattr(power, info["func"])(*info["args"]),
'energy': getattr(energy, info["func"])(*info["args"])
})
stats_df = pd.DataFrame(data).set_index(['cluster', 'freq', 'stats'])\
.sort_index(level='cluster')
return stats_df.unstack()
In [26]:
pp_stats = power_perf_stats(power_perf_new)
In [27]:
def plot_power_perf(pp_stats, clusters):
cmap = ColorMap(len(clusters) + 1)
color_map = map(cmap.cmap, range(len(clusters) + 1))
fig, ax = plt.subplots(1, 1, figsize=(16, 10))
max_perf = pp_stats.perf['avg'].max()
max_power = pp_stats.power['avg'].max()
for i, cl in enumerate(clusters):
cl_df = pp_stats.loc[i]
norm_perf_df = cl_df.perf['avg'] * 100.0 / max_perf
norm_power_df = cl_df.power['avg'] * 100.0 / max_power
x = norm_perf_df.values.tolist()
y = norm_power_df.values.tolist()
ax.plot(x, y, color=color_map[i], marker='o', label=i)
# Plot upper bounds
norm_perf_df = cl_df.perf['max'] * 100.0 / max_perf
norm_power_df = cl_df.power['max'] * 100.0 / max_power
x = norm_perf_df.values.tolist()
y = norm_power_df.values.tolist()
ax.plot(x, y, '--', color=color_map[-1])
# Plot lower bounds
norm_perf_df = cl_df.perf['min'] * 100.0 / max_perf
norm_power_df = cl_df.power['min'] * 100.0 / max_power
x = norm_perf_df.values.tolist()
y = norm_power_df.values.tolist()
ax.plot(x, y, '--', color=color_map[-1])
ax.set_title('Power VS Performance curves', fontsize=16)
ax.legend()
ax.set_xlabel('Performance [%]')
ax.set_ylabel('Power [%]')
ax.set_xlim(0, 105)
ax.set_ylim(0, 105)
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
ax.grid(True)
In [28]:
plot_power_perf(pp_stats, clusters)
In [29]:
def linfit(x, y):
slope, intercept, r, p, stderr = linregress(x, y)
return slope, intercept
In [30]:
def pstates_model_df(clusters, pp_stats, power_perf_df, metric='avg'):
"""
Build two data frames containing data to create the energy model for each
cluster given as input.
:param clusters: list of clusters to profile
:type clusters: list(namedtuple(ClusterDescription))
:param pp_stats: power and performance statistics
:type pp_stats: :mod:`pandas.DataFrame`
:param power_perf_df: power and performance data
:type power_perf_df: :mod:`pandas.DataFrame`
:param metric: Statistical metric to apply on measured energy
:type metric: str
"""
max_score = pp_stats.perf[metric].max()
core_cap_energy = []
cluster_cap_energy = []
for cl in clusters:
# ACTIVE Energy
grouped = power_perf_df.loc[cl].groupby(level='freq')
for freq, df in grouped:
# Get <metric> energy at OPP freq for 1 CPU
energy_freq_1 = pp_stats.loc[cl].loc[freq]['energy'][metric]
# Get cluster energy at OPP freq
x = df.index.get_level_values('cpus').tolist()
y = df.energy.tolist()
slope, intercept = linfit(x, y)
# Energy can't be negative but the regression line may intercept the
# y-axis at a negative value. In this case cluster energy can be
# assumed to be 0.
cluster_energy = intercept if intercept >= 0.0 else 0.0
core_energy = energy_freq_1 - cluster_energy
# Get score at OPP freq
score_freq = pp_stats.loc[cl].loc[freq]['perf'][metric]
capacity = int(score_freq * 1024 / max_score)
core_cap_energy.append({'cluster' : cl,
'freq': freq,
'cap': capacity,
'energy': core_energy})
cluster_cap_energy.append({'cluster': cl,
'freq': freq,
'cap': capacity,
'energy': cluster_energy})
core_cap_nrg_df = pd.DataFrame(core_cap_energy)
cluster_cap_nrg_df = pd.DataFrame(cluster_cap_energy)
return core_cap_nrg_df, cluster_cap_nrg_df
The energy measured while running with $n$ CPUs is not $n \times Energy\_of\_one\_CPU$. For this reason, we will use a different metric, like for instance the 99th percentile.
In [31]:
core_cap_nrg_df, cluster_cap_nrg_df = pstates_model_df(clusters,
pp_stats,
power_perf_new,
metric='c99'
)
In [32]:
core_cap_nrg_df
Out[32]:
In [33]:
cluster_cap_nrg_df
Out[33]:
There can still be issues in the modelling, and sometimes energy model values are non-monotonic. For this reason, the energy costs will be post-processed and monotinized if needed.
In [34]:
def monotonize_costs(nrg_costs_df):
res = nrg_costs_df.copy()
for cluster in nrg_costs_df["cluster"].unique():
costs = res[res.cluster == cluster]
erroneous = []
prev = 0
for row in costs.iterrows():
idx = row[0]
cap = row[1]["cap"]
energy = row[1]["energy"]
freq = row[1]["freq"]
if energy < prev:
erroneous.append((idx, cap))
logging.info("cluster={} capacity={} has non-monotonic energy value, adjusting...".format(cluster, cap))
else:
prev = energy
# All values were monotic, move to next cluster
if not erroneous:
continue
valid = costs[~costs.cap.isin([grp[1] for grp in erroneous])]
for idx, cap in erroneous:
x = valid["cap"].tolist()
y = valid["energy"].tolist()
# Create a polynomial plot with values that are known to be valid
# TODO: polynomial fit may not be monotonic in some weird corner cases, need to improve this
p = polyfit(x, y, 2)
# Fit "invalid" data onto this curve
fix = sum(p * [cap * cap, cap, 1])
res.set_value(idx, 'energy', fix)
return res
In [35]:
core_cap_nrg_df = monotonize_costs(core_cap_nrg_df)
In [36]:
cluster_cap_nrg_df = monotonize_costs(cluster_cap_nrg_df)
In [37]:
core_cap_nrg_df
Out[37]:
In [38]:
cluster_cap_nrg_df
Out[38]:
In [39]:
def compute_idle_power(clusters, loop_cnt, sleep_duration, bkp_file='cstates.csv'):
"""
Perform C-States profiling on each input cluster.
Data will be saved into a CSV file at each iteration such that if something
goes wrong the user can restart the experiment considering only idle_states
that had not been processed.
:param clusters: list of clusters to profile
:type clusters: list(namedtuple(ClusterDescription))
:param loop_cnt: number of loops for each experiment
:type loop_cnt: int
:param sleep_duration: sleep time in seconds
:type sleep_duration: int
:param bkp_file: CSV file name
:type bkp_file: str
"""
# Make sure all CPUs are online
target.hotplug.online_all()
with open(bkp_file, 'w') as csvfile:
writer = DictWriter(csvfile, fieldnames=['cluster', 'cpus', 'idle_state',
'energy', 'power'])
# Disable frequency scaling by setting cpufreq governor to userspace
target.cpufreq.set_all_governors('userspace')
# Freeze all tasks but the ones to communicate with the target
target.cgroups.freeze(exclude=CRITICAL_TASKS['android'])
all_cpus = set(range(target.number_of_cpus))
idle_power = []
for cl_idx, cl in clusters.iteritems():
# In current cluster, hotplug OFF all CPUs but the first one
# At least one CPU must be online
target.hotplug.online(cl["cpus"][0])
for cpu in cl["cpus"][1:]:
target.hotplug.offline(cpu)
other_cpus = list(all_cpus - set(cl["cpus"]))
# CPUs in the other clusters will be kept hotplugged OFF
# to not affect measurements on the current cluster
for cpu in other_cpus:
target.hotplug.offline(cpu)
# B) For each additional cluster's plugged in CPU...
for cnt, cpu in enumerate(cl["cpus"]):
# Hotplug ON one more CPU
target.hotplug.online(cpu)
cl_cpus = set(target.list_online_cpus()).intersection(set(cl["cpus"]))
logging.info('Cluster {:8} (Online CPUs : {})'\
.format(cl_idx, list(cl_cpus)))
for idle in cl["idles"]:
# Disable all idle states but the current one
logging.info("Disabling all idle states other than state {}".format(idle))
for c in cl["cpus"]:
target.cpuidle.disable_all(cpu=c)
target.cpuidle.enable(idle, cpu=c)
sleep(3)
# Sleep for the specified duration each time collecting a sample
# of energy consumption and reported performance
energy = 0.0
for i in xrange(loop_cnt):
te.emeter.reset()
sleep(sleep_duration)
nrg = te.emeter.report(te.res_dir).channels
energy += float(nrg[em_channel])
# Compute average energy and performance for the current number of
# active CPUs all idle at the current OPP
energy = energy / loop_cnt
power = energy / SLEEP_DURATION
# Keep track of this new C-State profiling point
new_row = {'cluster': cl_idx,
'cpus': cnt + 1,
'idle_state': idle,
'energy': energy,
'power': power}
idle_power.append(new_row)
# Save data in a CSV file
writer.writerow(new_row)
# C) profile next C-State
# B) add one more CPU (for the current frequency domain)
# A) profile next cluster (i.e. frequency domain)
# Thaw all tasks in the freezer cgroup
target.cgroups.freeze(thaw=True)
target.hotplug.online_all()
idle_df = pd.DataFrame(idle_power)
return idle_df.set_index(['cluster', 'idle_state', 'cpus']).sort_index(level='cluster')
In [40]:
SLEEP_DURATION = 10
loop_cnt = 10
idle_df = compute_idle_power(clusters, loop_cnt, SLEEP_DURATION)
In [41]:
WFI = 0
CORE_OFF = 1
def idle_power_stats(idle_df):
"""
For each cluster compute per idle state power statistics.
:param idle_df: dataframe containing power numbers
:type idle_df: :mod:`pandas.DataFrame`
"""
stats = []
for cl in clusters.iterkeys():
cl_df = idle_df.loc[cl].reset_index()
# Start from deepest idle state
cl_df = cl_df.sort_values('idle_state', ascending=False)
grouped = cl_df.groupby('idle_state', sort=False)
for state, df in grouped:
energy = df.energy
power = df.power
state_name = "C{}_CLUSTER".format(state)
if state == CORE_OFF:
core_off_nrg_avg = energy.mean()
core_off_pwr_avg = power.mean()
if state == WFI:
energy = df.energy.diff()
energy[0] = df.energy[0] - core_off_nrg_avg
power = df.power.diff()
power[0] = df.power[0] - core_off_pwr_avg
state_name = "C0_CORE"
avg_row = {'cluster': cl,
'idle_state': state_name,
'stats': 'avg',
'energy': energy.mean(),
'power': power.mean()
}
std_row = {'cluster': cl,
'idle_state': state_name,
'stats': 'std',
'energy': energy.std(),
'power': power.std()
}
min_row = {'cluster' : cl,
'idle_state' : state_name,
'stats' : 'min',
'energy' : energy.min(),
'power' : power.min()
}
max_row = {'cluster' : cl,
'idle_state' : state_name,
'stats' : 'max',
'energy' : energy.max(),
'power' : power.max()
}
c99_row = {'cluster' : cl,
'idle_state' : state_name,
'stats' : 'c99',
'energy' : energy.quantile(q=0.99),
'power' : power.quantile(q=0.99)
}
stats.append(avg_row)
stats.append(std_row)
stats.append(min_row)
stats.append(max_row)
stats.append(c99_row)
stats_df = pd.DataFrame(stats).set_index(
['cluster', 'idle_state', 'stats']).sort_index(level='cluster')
return stats_df.unstack()
In [42]:
idle_stats = idle_power_stats(idle_df)
In [43]:
idle_df
Out[43]:
In [44]:
def plot_cstates(idle_power_df, clusters):
"""
Plot C-States profiling for the specified cluster.
:param idle_power_df: dataframe reporting power values in each idle state
:type idle_power_df: :mod:`pandas.DataFrame`
:param cluster: cluster description
:type cluster: namedtuple(ClusterDescription)
"""
for cl_idx, cluster in clusters.iteritems():
n_cpus = len(cluster["cpus"])
cmap = ColorMap(len(cluster["idles"]))
color_map = map(cmap.cmap, cluster["idles"])
color_map = [c for c in color_map for i in xrange(n_cpus)]
cl_df = idle_power_df.loc[cl_idx]
ax = cl_df.power.plot.bar(figsize=(16,8), color=color_map, alpha=0.5,
legend=False, table=True)
idx = 0
grouped = cl_df.groupby(level=0)
for state, df in grouped:
x = df.index.get_level_values('cpus').tolist()
y = df.power.tolist()
print x
print y
slope, intercept = linfit(x, y)
y = [slope * v + intercept for v in x]
x = range(n_cpus * idx, n_cpus * (idx + 1))
print x
print y
ax.plot(x, y, color=color_map[idx*n_cpus], linewidth=4)
idx += 1
ax.grid(True)
ax.get_xaxis().set_visible(False)
ax.set_ylabel("Idle Power [$\mu$W]")
ax.set_title("{} cluster C-states profiling"\
.format(cluster), fontsize=16)
In [45]:
def plot_cstates(idle_power_df, clusters):
"""
Plot C-States profiling for the specified cluster.
:param idle_power_df: dataframe reporting power values in each idle state
:type idle_power_df: :mod:`pandas.DataFrame`
:param cluster: cluster description
:type cluster: namedtuple(ClusterDescription)
"""
n_clusters = len(clusters)
fig, ax = plt.subplots(nrows = n_clusters, figsize = (16, 8 * n_clusters))
for cl_idx, cluster in clusters.iteritems():
n_cpus = len(cluster["cpus"])
cmap = ColorMap(len(cluster["idles"]))
color_map = map(cmap.cmap, cluster["idles"])
color_map = [c for c in color_map for i in xrange(n_cpus)]
_ax = ax[cl_idx] if n_clusters > 1 else ax
cl_df = idle_power_df.loc[cl_idx]
cl_df.power.plot.bar(figsize=(16, 8 * n_clusters), color=color_map, alpha=0.5,
legend=False, table=True, ax=_ax)
idx = 0
grouped = cl_df.groupby(level=0)
for state, df in grouped:
x = df.index.get_level_values('cpus').tolist()
y = df.power.tolist()
slope, intercept = linfit(x, y)
y = [slope * v + intercept for v in x]
x = range(n_cpus * idx, n_cpus * (idx + 1))
_ax.plot(x, y, color=color_map[idx*n_cpus], linewidth=4)
idx += 1
_ax.grid(True)
_ax.get_xaxis().set_visible(False)
_ax.set_ylabel("Idle Power [$\mu$W]")
_ax.set_title("{} cluster C-states profiling"\
.format(cluster), fontsize=16)
In [46]:
plot_cstates(idle_df, clusters)
In [47]:
core_cap_nrg_df["cap"]
Out[47]:
In [48]:
grouped = core_cap_nrg_df.groupby('cluster')
In [49]:
for cl, df in grouped:
for row in df.itertuples():
print row
In [50]:
def energy_model_dict(clusters, core_cap_nrg_df, cluster_cap_nrg_df, metric='avg'):
n_states = len(clusters[0]["idles"])
nrg_dict = {}
grouped = core_cap_nrg_df.groupby('cluster')
for cl, df in grouped:
nrg_dict[cl] = {
"opps" : {},
"core": {
"busy-cost": OrderedDict(),
"idle-cost": OrderedDict()
},
"cluster": {
"busy-cost": OrderedDict(),
"idle-cost": OrderedDict()
}
}
# Core COSTS
# ACTIVE costs
for row in df.itertuples():
nrg_dict[cl]["opps"][row.cap] = row.freq
nrg_dict[cl]["core"]["busy-cost"][row.cap] = int(row.energy)
# IDLE costs
wfi_nrg = idle_stats.loc[cl].energy[metric][0]
# WFI
nrg_dict[cl]["core"]["idle-cost"][0] = int(wfi_nrg)
# All remaining states are zeroes
for i in xrange(1, n_states):
nrg_dict[cl]["core"]["idle-cost"][i] = 0
# Cluster COSTS
cl_data = cluster_cap_nrg_df[cluster_cap_nrg_df.cluster == cl]
# ACTIVE costs
for row in cl_data.itertuples():
nrg_dict[cl]["cluster"]["busy-cost"][row.cap] = int(row.energy)
# IDLE costs
# Core OFF is the first valid idle cost for cluster
idle_data = idle_stats.loc[cl].energy[metric]
# WFI (same as Core OFF)
nrg_dict[cl]["cluster"]["idle-cost"][0] = int(idle_data[1])
# All other idle states (from CORE OFF down)
for i in xrange(1, n_states):
nrg_dict[cl]["cluster"]["idle-cost"][i] = int(idle_data[i])
return nrg_dict
In [51]:
nrg_dict = energy_model_dict(clusters, core_cap_nrg_df, cluster_cap_nrg_df)
In [52]:
def dump_device_tree(nrg_dict, outfile='sched-energy.dtsi'):
"""
Generate device tree energy model file.
:param nrg_dict: dictionary describing the energy model
:type nrg_dict: dict
:param outfile: output file name
:type outfile: str
"""
with open(os.path.join(te.res_dir, outfile), 'w') as out:
out.write("energy-costs {\n")
idx = 0
for cl_name in nrg_dict.keys():
core = nrg_dict[cl_name]["core"]
# Dump Core costs
out.write("\tCPU_COST_{}: core_cost{} {}\n"\
.format(idx, idx, '{'))
# ACTIVE costs
out.write("\t\tbusy-cost-data = <\n")
for cap, nrg in core["busy-cost"].iteritems():
out.write("\t\t\t{} {}\n".format(cap, nrg))
out.write("\t\t>;\n")
# IDLE costs
out.write("\t\tidle-cost-data = <\n")
# arch idle
out.write("\t\t\t{}\n".format(core["idle-cost"][0]))
for nrg in core["idle-cost"].values():
out.write("\t\t\t{}\n".format(nrg))
out.write("\t\t>;\n")
out.write("\t};\n")
# Dump Cluster costs
cl = nrg_dict[cl_name]["cluster"]
out.write("\tCLUSTER_COST_{}: cluster_cost{} {}\n"\
.format(cl_name, idx, '{'))
# ACTIVE costs
out.write("\t\tbusy-cost-data = <\n")
for cap, nrg in cl["busy-cost"].iteritems():
out.write("\t\t\t{} {}\n".format(cap, nrg))
out.write("\t\t>;\n")
# IDLE costs
out.write("\t\tidle-cost-data = <\n")
# arch idle
out.write("\t\t\t{}\n".format(cl["idle-cost"][0]))
for nrg in cl["idle-cost"].values():
out.write("\t\t\t{}\n".format(nrg))
out.write("\t\t>;\n")
out.write("\t};\n")
idx += 1
out.write("};")
In [53]:
dump_device_tree(nrg_dict)
In [54]:
!cat {te.res_dir}/sched-energy.dtsi