In [ ]:
%matplotlib inline
import pandas
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cmx
import matplotlib.colors
In [ ]:
from analysis import prepare_df, prepare_usage_df, score_card, running_concurrently, waiting_over_time, point_in_time_analysis
In [ ]:
out_trace_file = "../example-out-trace.csv"
cycle_time_ms = 30000
df_a = prepare_df(pandas.read_csv(out_trace_file))
user_running_a = df_a.groupby("user").apply(running_concurrently).reset_index().sort_values("time_ms")
user_waiting_a = df_a.groupby("user").apply(waiting_over_time).reset_index().sort_values("time_ms")
usage_df_a = prepare_usage_df(user_running_a, user_waiting_a, cycle_time_ms)
df_a.head()
In [ ]:
score_card(df_a, user_running_a, user_waiting_a, cycle_time_ms).transpose()
In [ ]:
out_trace_file = "../example-out-trace.csv"
df_a = prepare_df(pandas.read_csv(out_trace_file))
[per_host, per_user, waiting, running_at, df_a] = point_in_time_analysis(df_a, df_a.start_time_ms.median())
In [ ]:
per_host.mem.describe()
In [ ]:
per_user.sort_values("mem")
In [ ]:
waiting
In [ ]:
running_at
In [ ]:
cycle_time_ms = 30000
df_a = prepare_df(pandas.read_csv("../example-out-trace.csv"))
user_running_a = df_a.groupby("user").apply(running_concurrently).reset_index().sort_values("time_ms")
user_waiting_a = df_a.groupby("user").apply(waiting_over_time).reset_index().sort_values("time_ms")
usage_df_a = prepare_usage_df(user_running_a, user_waiting_a, cycle_time_ms)
df_b = prepare_df(pandas.read_csv("../example-out-trace.csv"))
user_running_b = df_b.groupby("user").apply(running_concurrently).reset_index().sort_values("time_ms")
user_waiting_b = df_b.groupby("user").apply(waiting_over_time).reset_index().sort_values("time_ms")
usage_df_b = prepare_usage_df(user_running_b, user_waiting_b, cycle_time_ms)
In [ ]:
scores = pandas.concat([score_card(df_a, user_running_a, user_waiting_a, cycle_time_ms),
score_card(df_b, user_running_b, user_waiting_b, cycle_time_ms)]).transpose()
scores.columns = ["a", "b"]
scores['improvement_a_to_b'] = (scores.b - scores.a)/scores.a
scores
In [ ]:
bins = np.linspace(0,1,20)
ax = usage_df_a[usage_df_a.fair_ratio > 0].fair_ratio.hist(bins=bins, label="a", alpha=0.8)
usage_df_b[usage_df_b.fair_ratio > 0].fair_ratio.hist(bins=bins, ax=ax, label="b", alpha=0.8)
plt.xlim([0.,0.99])
plt.legend()
plt.xlabel("memory running over fair allocation")
plt.ylabel("frequency")
plt.title("distribution of memory running over fair allocation")
In [ ]:
ax = usage_df_a[usage_df_a.fair_ratio > 0].groupby("time_ms").fair_ratio.median().plot(label="a", alpha=0.8)
usage_df_b[usage_df_b.fair_ratio > 0].groupby("time_ms").fair_ratio.median().plot(ax=ax, label="b", alpha=0.8)
plt.legend()
plt.xlabel("time from beginning of sim (milliseconds)")
plt.ylabel("median memory running over fair allocation")
plt.title("memory running over fair allocation over time")
In [ ]:
bins = 100
ax = usage_df_a[usage_df_a.starved_mem_gb > 0].starved_mem_gb.hist(bins=bins, label="a", alpha=0.8)
usage_df_b[usage_df_b.starved_mem_gb > 0].starved_mem_gb.hist(bins=bins, ax=ax, label="b", alpha=0.8)
plt.legend()
plt.xlabel("Starved memory (gb)")
plt.ylabel("frequency")
plt.title("distribution of starvation")
In [ ]:
ax = usage_df_a[usage_df_a.starved_mem_gb > 0].groupby('time_ms').starved_mem_log10.median().plot(label="a", alpha=0.8)
usage_df_b[usage_df_b.starved_mem_gb > 0].groupby('time_ms').starved_mem_log10.median().plot(label="b", alpha=0.8)
plt.legend()
plt.xlabel("time from beginning of sim (milliseconds)")
plt.ylabel("median log starved memory (gb)")
plt.title("log starvation over time")
In [ ]:
bins = range(20)
plt.hist(df_a.overhead/cycle_time_ms, label="a", alpha=0.8, bins = bins)
plt.hist(df_b.overhead/cycle_time_ms, label="b", alpha=0.8, bins = bins)
plt.legend()
plt.xlabel("Cycles until scheduled")
plt.ylabel("frequency")
plt.title("Distribution of cycles until scheduled")