In [ ]:
%matplotlib inline 
import pandas
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cmx
import matplotlib.colors

In [ ]:
from analysis import prepare_df, prepare_usage_df, score_card, running_concurrently, waiting_over_time, point_in_time_analysis

Single simulation analysis


In [ ]:
out_trace_file = "../example-out-trace.csv"
cycle_time_ms = 30000

df_a = prepare_df(pandas.read_csv(out_trace_file))
user_running_a = df_a.groupby("user").apply(running_concurrently).reset_index().sort_values("time_ms")
user_waiting_a = df_a.groupby("user").apply(waiting_over_time).reset_index().sort_values("time_ms")
usage_df_a = prepare_usage_df(user_running_a, user_waiting_a, cycle_time_ms)
df_a.head()

In [ ]:
score_card(df_a, user_running_a, user_waiting_a, cycle_time_ms).transpose()

Point in time analysis


In [ ]:
out_trace_file = "../example-out-trace.csv"
df_a = prepare_df(pandas.read_csv(out_trace_file))
[per_host, per_user, waiting, running_at, df_a] = point_in_time_analysis(df_a, df_a.start_time_ms.median())

In [ ]:
per_host.mem.describe()

In [ ]:
per_user.sort_values("mem")

In [ ]:
waiting

In [ ]:
running_at

Comparing simulation runs


In [ ]:
cycle_time_ms = 30000

df_a = prepare_df(pandas.read_csv("../example-out-trace.csv"))
user_running_a = df_a.groupby("user").apply(running_concurrently).reset_index().sort_values("time_ms")
user_waiting_a = df_a.groupby("user").apply(waiting_over_time).reset_index().sort_values("time_ms")
usage_df_a = prepare_usage_df(user_running_a, user_waiting_a, cycle_time_ms)

df_b = prepare_df(pandas.read_csv("../example-out-trace.csv"))
user_running_b = df_b.groupby("user").apply(running_concurrently).reset_index().sort_values("time_ms")
user_waiting_b = df_b.groupby("user").apply(waiting_over_time).reset_index().sort_values("time_ms")
usage_df_b = prepare_usage_df(user_running_b, user_waiting_b, cycle_time_ms)

In [ ]:
scores = pandas.concat([score_card(df_a, user_running_a, user_waiting_a, cycle_time_ms),
                        score_card(df_b, user_running_b, user_waiting_b, cycle_time_ms)]).transpose()
scores.columns = ["a", "b"]
scores['improvement_a_to_b'] = (scores.b - scores.a)/scores.a
scores

In [ ]:
bins = np.linspace(0,1,20)

ax = usage_df_a[usage_df_a.fair_ratio > 0].fair_ratio.hist(bins=bins, label="a", alpha=0.8)
usage_df_b[usage_df_b.fair_ratio > 0].fair_ratio.hist(bins=bins, ax=ax, label="b", alpha=0.8)
plt.xlim([0.,0.99])
plt.legend()
plt.xlabel("memory running over fair allocation")
plt.ylabel("frequency")
plt.title("distribution of memory running over fair allocation")

In [ ]:
ax = usage_df_a[usage_df_a.fair_ratio > 0].groupby("time_ms").fair_ratio.median().plot(label="a", alpha=0.8)
usage_df_b[usage_df_b.fair_ratio > 0].groupby("time_ms").fair_ratio.median().plot(ax=ax, label="b", alpha=0.8)
plt.legend()
plt.xlabel("time from beginning of sim (milliseconds)")
plt.ylabel("median memory running over fair allocation")
plt.title("memory running over fair allocation over time")

In [ ]:
bins = 100
ax = usage_df_a[usage_df_a.starved_mem_gb > 0].starved_mem_gb.hist(bins=bins, label="a", alpha=0.8)
usage_df_b[usage_df_b.starved_mem_gb > 0].starved_mem_gb.hist(bins=bins, ax=ax, label="b", alpha=0.8)
plt.legend()
plt.xlabel("Starved memory (gb)")
plt.ylabel("frequency")
plt.title("distribution of starvation")

In [ ]:
ax = usage_df_a[usage_df_a.starved_mem_gb > 0].groupby('time_ms').starved_mem_log10.median().plot(label="a", alpha=0.8)
usage_df_b[usage_df_b.starved_mem_gb > 0].groupby('time_ms').starved_mem_log10.median().plot(label="b", alpha=0.8)
plt.legend()
plt.xlabel("time from beginning of sim (milliseconds)")
plt.ylabel("median log starved memory (gb)")
plt.title("log starvation over time")

In [ ]:
bins = range(20)
plt.hist(df_a.overhead/cycle_time_ms, label="a", alpha=0.8, bins = bins)
plt.hist(df_b.overhead/cycle_time_ms, label="b", alpha=0.8, bins = bins)
plt.legend()
plt.xlabel("Cycles until scheduled")
plt.ylabel("frequency")
plt.title("Distribution of cycles until scheduled")