In [1]:
import pandas as pd
# visualization libraries
import matplotlib.pyplot as plt
from matplotlib import gridspec
# plot the visuals in ipython
%matplotlib inline
In [2]:
probs=[
# "koza_01",
# "koza_02",
# "koza_03",
# "lipson_01",
# "lipson_02",
# "lipson_03",
# "nguyen_01",
# "nguyen_02",
# "nguyen_03",
"nguyen_04",
"nguyen_05",
# "nguyen_06",
# "nguyen_07",
# "nguyen_08",
"nguyen_09",
# "nguyen_10",
# "nguyen_11",
"nguyen_12",
# "korns_01",
# "korns_02",
"korns_03",
# "korns_04",
# "korns_05",
# "korns_06",
# "korns_07",
# "korns_08",
# "korns_09",
# "korns_10",
# "korns_11",
# "korns_12",
# "korns_13",
# "korns_14",
# "korns_15"
]
In [10]:
directory = "/Users/tony/pypge/experiments/output/eval_speedup/"
def process_setting_dir(s):
cdata = {}
for p in probs:
prob_out_dir = directory + "config_explicit_" + s + ".yml/clean/" + p + "/out/"
cdf = pd.read_csv(prob_out_dir + "pge_errs.log", delim_whitespace=True)
cdata[p] = cdf
pdf = pd.Panel(cdata)
return pdf
In [25]:
settings = [
"01",
"02",
"03",
"04",
"06",
"08",
"12",
"16",
]
time_data = pd.DataFrame()
for i,s in enumerate(settings):
sdata = process_setting_dir(s)
tdata = sdata.major_xs(24).T
ts = tdata["elapsed_seconds"]
# print ts
time_data[s] = ts
# ys = tdata.minor_xs("elapsed_seconds")
# print ys.iloc(-1)
time_data = time_data.T
print time_data
# td2 = time_data / time_data.iloc
time_data.plot()
print "done"
In [98]:
def process_timing_files(s):
cdata = {}
for p in probs:
prob_out_dir = directory + "config_explicit_" + s + ".yml/clean/" + p + "/out/"
fname = prob_out_dir + "pge_main.log"
with open(fname) as f:
content = f.readlines()
content = [line for line in content if "-1" not in line]
cdata[p] = content
return cdata
def cdata_to_df(cdata):
cols = ["loop_sum", "expd_sum","eval_sum","heap_sum","other_sum", "non_eval_sum"]
df = pd.DataFrame(index=cols)
for key in cdata:
content = cdata[key]
loop_lines = [line for line in content if "total loop time" in line]
loop_times = []
for line in loop_lines:
tokens = [token.strip() for token in line.strip().split()]
loop_times.append(tokens[-5])
loop_sum = 0.0
for t in loop_times:
loop_sum += float(t)
expd_lines = [line for line in content if "popped" in line]
expd_times = []
for line in expd_lines:
tokens = [token.strip() for token in line.strip().split()]
expd_times.append(tokens[-5])
expd_sum = 0.0
for t in expd_times:
expd_sum += float(t)
eval_lines = [line for line in content if "evaling" in line]
eval_times = []
for line in eval_lines:
tokens = [token.strip() for token in line.strip().split()]
eval_times.append(tokens[-5])
eval_sum = 0.0
for t in eval_times:
eval_sum += float(t)
heap_lines = [line for line in content if "popping" in line]
heap_times = []
for line in heap_lines:
tokens = [token.strip() for token in line.strip().split()]
heap_times.append(tokens[-5])
heap_sum = 0.0
for t in heap_times:
heap_sum += float(t)
other_sum = loop_sum - (expd_sum+eval_sum+heap_sum)
non_eval_sum = loop_sum - eval_sum
df[key] = [loop_sum, expd_sum,eval_sum,heap_sum,other_sum, non_eval_sum]
df = df.T
return df
data = {}
for i,s in enumerate(settings):
cdata = process_timing_files(s)
df = cdata_to_df(cdata)
# print df
data[s] = df
panel = pd.Panel.from_dict(data, orient='items')
# for p in panel:
# print panel[p]
total_df = panel.minor_xs("loop_sum").T
total_df["LINEAR (y=x)"] = [16,8,16/3.,4,16/6.,2,16/12.,1]
eval_df = panel.minor_xs("eval_sum").T
eval_df["LINEAR (y=x)"] = [16,8,16/3.,4,16/6.,2,16/12.,1]
def plot_speedup(df, title):
dfT = df.T
su = pd.DataFrame()
for key in dfT:
su[key] = dfT["01"] / dfT[key]
fig = plt.figure()
fig.set_size_inches(8,6)
gs = gridspec.GridSpec(1,1)
ax = fig.add_subplot(gs[:,:])
ax.set_xlabel('cores')
ax.set_ylabel('ratio')
su.T.plot(ax=ax)
plt.tight_layout()
plt.savefig("images/"+title+".png", dpi=200)
plt.show()
plot_speedup(total_df, "total_speedup")
plot_speedup(eval_df, "eval_speedup")
In [123]:
print panel
pT = panel.transpose(2, 0, 1)
print pT
cols = ["expd_sum","eval_sum","heap_sum","other_sum"]
fig = plt.figure()
fig.set_size_inches(12,8)
gs = gridspec.GridSpec(2,2)
for i,key in enumerate(["01","02","03","04"]):
x,y = i%2, i//2
ax = fig.add_subplot(gs[y,x])
ax.set_title(key + " cores")
# ax.set_ylim([0,100])
df = pT.major_xs(key)
df1 = df[cols]
# for key in df1:
# df1[key] = df1[key] / df["loop_sum"] * 100.0
df1.plot(kind='bar', stacked=True, ax=ax, legend=False);
lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.tight_layout()
plt.savefig("images/speedup_timings_low.png", dpi=200, bbox_extra_artists=(lgd,), bbox_inches='tight')
plt.show()
fig = plt.figure()
fig.set_size_inches(12,8)
gs = gridspec.GridSpec(2,2)
for i,key in enumerate(["06","08","12","16"]):
x,y = i%2, i//2
ax = fig.add_subplot(gs[y,x])
ax.set_title(key + " cores")
# ax.set_ylim([0,100])
df = pT.major_xs(key)
df1 = df[cols]
# for key in df1:
# df1[key] = df1[key] / df["loop_sum"] * 100.0
df1.plot(kind='bar', stacked=True, ax=ax, legend=False);
lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.tight_layout()
plt.savefig("images/speedup_timings_high.png", dpi=200, bbox_extra_artists=(lgd,), bbox_inches='tight')
plt.show()
In [130]:
pT2 = panel.transpose(1, 0, 2)
print pT2
NAME = "nguyen_12"
df = pT2[NAME]
print df
cols = ["expd_sum","eval_sum","heap_sum","other_sum"]
fig = plt.figure()
fig.set_size_inches(8,6)
gs = gridspec.GridSpec(2,2)
ax = fig.add_subplot(gs[:,:])
df[cols].plot(kind='bar', stacked=True, ax=ax)
plt.tight_layout()
plt.savefig("images/speedup_timings_" + NAME + ".png", dpi=200)
plt.show()