In [189]:
%run ./msgc_experiments_CMBBEIV_init.ipynb
In [190]:
import pandas as pd
import seaborn as sns
In [247]:
df = pd.read_csv(fnamenew)
df["image voxel number"] = df["data size"].pow(3)
df["relative object size"] = df["data object size px"] / df["image voxel number"]
df["log(relative object size)"] = log(df["data object size px"] / df["image voxel number"])
# df.rename(columns={"msgc time": "MSGC time"})
# dfs = df[(df["data seedsz"]==3) & (df["data offset"] == 3) & (df["data radius"] == 10) & (df["experiment"] == "image size")]
dfs = df[df["experiment"] == "image size"]
dfs_plus = dfs[dfs['data size'] > 160]
sns.set_context("paper")
sns.set_style("white")
In [192]:
df.keys()
Out[192]:
In [193]:
sns.boxplot(data=dfs_plus, y="time", x="method", showfliers=False)
Out[193]:
In [194]:
# df
In [195]:
uu = pd.melt(dfs_plus.rename(columns={"gc time": "gc", "time": "total"}), value_vars=["gc", "total"], id_vars=["method"], var_name="time type", value_name="time")
# uu = pd.melt(dfs, value_vars=["gc time", "time"], id_vars=["method"], var_name="type", value_name="time")
# uu
In [196]:
bp = sns.boxplot(data=uu, hue="time type",y="time", x="method", showfliers=False)
# bp.
bp.set(yscale="log")
# ax.set(xscale="log", yscale="log")
plt.savefig(op.join(latex_dir, "msgc_time_boxplot.pdf"), dpi=1000)
In [197]:
bp.set(yscale="log")
Out[197]:
In [198]:
# sns.boxplot(data=dfs, y="error", x="method")
In [199]:
lm = sns.lmplot(data=df, x="data size", y="time", hue="method", order=3, scatter_kws={"s": 3, "marker": "x", "alpha": 0.5})
axes = lm.axes
axes[0,0].set_xlim(30,200)
axes[0,0].set_ylim(0,50)
lines = lm.ax.get_lines()
line = lines[0]
line.set_linestyle("--")
# line.set_marker("s")
plt.savefig(op.join(latex_dir, "msgc_time_datasize_plot.pdf"), dpi=1000)
# axes[0,1].set_ylim(0,)
# lm.ax.get_lines()
In [200]:
# test better melt
dfm = pd.DataFrame({'City': ['Houston', 'Austin', 'Hoover'],
'State': ['Texas', 'Texas', 'Alabama'],
'Name':['Aria', 'Penelope', 'Niko'],
'Mango':[4, 10, 90],
'Orange': [10, 8, 14],
'Watermelon':[40, 99, 43],
'Gin':[16, 200, 34],
'Vodka':[20, 33, 18]},
columns=['City', 'State', 'Name', 'Mango', 'Orange', 'Watermelon', 'Gin', 'Vodka'])
melt(dfm, id_vars=['City', 'State'], value_vars=[['Mango', 'Orange', 'Watermelon'], ['Gin', 'Vodka']],
var_name=['Fruit', 'Drink'], value_name=['Pounds', 'Ounces'])
Out[200]:
In [201]:
# df.keys()
In [202]:
# df.rename(columns={"gc time": "gc", "time": "total"})[["gc", "total", "data size"]]
In [203]:
# df.rename(columns={"gc time": "gc", "time": "total"})
In [221]:
dfs = df[df["experiment"] == "image size"]
uu = melt(dfs.rename(columns={"gc time": "gc", "time": "total"}), value_vars=["gc", "total"], id_vars=["method", "data size", "image voxel number", "relative object size"], var_name=["time type"], value_name=["Time [s]"])
uu["mth"] = uu["method"] + " " + uu["time type"]
# uu
In [205]:
current_palette = sns.color_palette()
sns.palplot(current_palette)
In [222]:
white_palette = [(1., 1., 1., 0.5)]
lm = sns.lmplot(data=uu, x="data size", y="Time [s]", hue="mth", order=3,
scatter_kws={"s": 3, "marker": "x", "alpha": 0.5},
# line_kws={"alpha": 1},
# palette=current_palette[:3]
palette=white_palette
)
axes = lm.axes
axes[0,0].set_xlim(30,200)
axes[0,0].set_ylim(0,50)
current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
In [224]:
white_palette = [(1., 1., 1., 0.5)]
lm = sns.lmplot(
# data=uu[uu["time type"] == "total"],
data=uu,
x="data size", y="Time [s]", hue="mth", order=2,
scatter_kws={"s": 3, "marker": "x", "alpha": 0.5},
# line_kws={"alpha": 1},
# palette=current_palette[:3],
palette=white_palette,
legend=None,
# legend="off"
)
axes = lm.axes
axes[0,0].set_xlim(30,200)
axes[0,0].set_ylim(0,50)
current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
line1.set_color(line2.get_color())
# body
sc = sns.scatterplot(data=uu[uu["time type"] == "total"], x="data size", y="Time [s]", hue="mth", ax=lm.ax, alpha=0.8, s=3)
texts = lm.ax.legend_.get_texts()
leglines = lm.ax.legend_.get_lines()
# texts[0].set_text("short")
# texts.pop(0)
# texts.pop(0)
# plt.legend(lm.ax.legend_[3:6])
from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color=current_palette[0], lw=4),
Line2D([0], [0], color=current_palette[1], lw=4),
Line2D([0], [0], color=current_palette[2], lw=4)]
lm.ax.legend(
custom_lines,
[tx.get_text()[:-3] for tx in texts[:3]],
# ['Cold', 'Medium', 'Hot'],
)
# lg = lm.ax.legend()
Out[224]:
In [159]:
texts[0].get_text()
Out[159]:
In [160]:
# dir(lm.ax.legend_)
In [161]:
# L=plt.legend()
lm.ax.legend_.get_texts()
# L.get_texts()[0].set_text('make it short')
Out[161]:
In [227]:
rename_time = {
"time": "Time [s]"
}
# uu =
current_palette = sns.color_palette()
white_palette = [
(1., 1., 1., 0.5),
(1., 1., 1., 0.5),
(1., 1., 1., 0.5),
current_palette[0],
current_palette[1],
current_palette[2],
]
lm = sns.lmplot(
# data=uu[uu["time type"] == "total"],
data=uu,
x="image voxel number", y="Time [s]", hue="mth", order=3,
scatter_kws={"s": 3, "marker": "x", "alpha": 0.5},
# line_kws={"alpha": 1},
# palette=current_palette[:3],
palette=white_palette,
legend=None,
# legend="off"
)
axes = lm.axes
axes[0,0].set_xlim(30,uu["image voxel number"].max())
axes[0,0].set_ylim(0,50)
current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
line1.set_color(line2.get_color())
# body
# sc = sns.scatterplot(
# data=uu[uu["time type"] == "total"], x="image voxel number", y="time", hue="mth",
# ax=lm.ax, alpha=1.0, s=5
# )
# texts = lm.ax.legend_.get_texts()
# leglines = lm.ax.legend_.get_lines()
# plt.legend(lm.ax.legend_[3:6])
from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color=current_palette[0], lw=4),
Line2D([0], [0], color=current_palette[1], lw=4),
Line2D([0], [0], color=current_palette[2], lw=4)]
# lm.ax.legend(
# custom_lines,
# [tx.get_text()[:-3] for tx in texts[:3]],
# )
# lg = lm.ax.legend()
lm.ax.legend(
custom_lines,
labels
# [tx.get_text()[:-3] for tx in texts[:3]],
)
Out[227]:
In [228]:
uu
Out[228]:
In [163]:
xx = "relative object size"
white_palette = [(1., 1., 1., 0.5)]
lm = sns.lmplot(
# data=uu[uu["time type"] == "total"],
data=uu,
x=xx, y="time", hue="mth",
order=5,
scatter_kws={"s": 3, "marker": "x", "alpha": 0.5},
# line_kws={"alpha": 1},
# palette=current_palette[:3],
palette=white_palette,
legend=None,
# logx=True
# legend="off"
)
axes = lm.axes
# axes[0,0].set_xlim(30,uu[xx].max())
# axes[0,0].set_ylim(0,50)
# lm.set(
# xscale="log",
# # yscale="log"
# )
current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
line1.set_color(line2.get_color())
# body
sc = sns.scatterplot(
data=uu[uu["time type"] == "total"], x=xx, y="time", hue="mth",
ax=lm.ax, alpha=1.0, s=5
)
texts = lm.ax.legend_.get_texts()
leglines = lm.ax.legend_.get_lines()
# texts[0].set_text("short")
# texts.pop(0)
# texts.pop(0)
# plt.legend(lm.ax.legend_[3:6])
from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color=current_palette[0], lw=4),
Line2D([0], [0], color=current_palette[1], lw=4),
Line2D([0], [0], color=current_palette[2], lw=4)]
lm.ax.legend(
custom_lines,
[tx.get_text()[:-3] for tx in texts[:3]],
# ['Cold', 'Medium', 'Hot'],
)
# lg = lm.ax.legend()
Out[163]:
In [164]:
sc = sns.scatterplot(
data=uu[uu["time type"] == "total"], x=xx, y="time", hue="mth",
# ax=lm.ax,
alpha=1.0, s=5
)
sc.set(xscale="log",
# yscale="log"
)
# grid.set(xscale="log", )
Out[164]:
In [165]:
lm.ax
Out[165]:
In [166]:
dir(lm.ax)
Out[166]:
In [167]:
lm = sns.lmplot(data=uu, x="data size", y="time", hue="time type", order=3, scatter_kws={"s": 3, "marker": "x", "alpha": 0.5})
axes = lm.axes
axes[0,0].set_xlim(30,200)
axes[0,0].set_ylim(0,50)
lines = lm.ax.get_lines()
line = lines[0]
line.set_linestyle("--")
# line.set_marker("s")
plt.savefig(op.join(latex_dir, "msgc_time_datasize_plot.pdf"), dpi=1000)
# axes[0,1].set_ylim(0,)
# lm.ax.get_lines()
In [ ]:
In [168]:
# this work jus for not duplicit values of data siz
# sns.tsplot(data=df, time="data size", value="time", unit="method", condition="method")
# plt.savefig(op.join(latex_dir, "msgc_size_time.pdf"), dpi=1000)
In [169]:
line.set_marker("s")
In [170]:
# df
In [171]:
# df["method"]
In [276]:
# uu = df[df["experiment"]=="object size"]
In [277]:
dfs = df[df["experiment"] == "object size"]
uu = melt(dfs.rename(columns={"gc time": "gc", "time": "total"}), value_vars=["gc", "total"], id_vars=["method", "data size", "image voxel number", "data object size px", "relative object size", "log(relative object size)"], var_name=["time type"], value_name=["Time [s]"])
uu["mth"] = uu["method"] + " " + uu["time type"]
# uu
In [278]:
dfs.keys()
Out[278]:
In [279]:
white_palette = [(1., 1., 1., 0.5)]
lm = sns.lmplot(
# data=uu[uu["time type"] == "total"],
data=uu,
# x="image voxel number",
x="data object size px",
y="Time [s]", hue="mth", order=2,
scatter_kws={"s": 3, "marker": "x", "alpha": 0.5},
# line_kws={"alpha": 1},
# palette=current_palette[:3],
palette=white_palette,
legend=None,
# legend="off"
)
axes = lm.axes
axes[0,0].set_xlim(30,uu["data object size px"].max())
axes[0,0].set_ylim(0,50)
current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
line1.set_color(line2.get_color())
# body
sc = sns.scatterplot(
data=uu[uu["time type"] == "total"], x="data object size px", y="Time [s]", hue="mth",
ax=lm.ax, alpha=1.0, s=5
)
texts = lm.ax.legend_.get_texts()
leglines = lm.ax.legend_.get_lines()
# texts[0].set_text("short")
# texts.pop(0)
# texts.pop(0)
# plt.legend(lm.ax.legend_[3:6])
from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color=current_palette[0], lw=4),
Line2D([0], [0], color=current_palette[1], lw=4),
Line2D([0], [0], color=current_palette[2], lw=4)]
lm.ax.legend(
custom_lines,
[tx.get_text()[:-3] for tx in texts[:3]],
# ['Cold', 'Medium', 'Hot'],
)
# lg = lm.ax.legend()
plt.savefig(op.join(latex_dir, "msgc_time_datasize_plot.pdf"), dpi=1000)
In [280]:
uu
Out[280]:
In [281]:
xx = "relative object size"
white_palette = [(1., 1., 1., 0.5)]
lm = sns.lmplot(
# data=uu[uu["time type"] == "total"],
data=uu,
# x="image voxel number",
x=xx,
y="Time [s]", hue="mth",
# order=1,
scatter_kws={"s": 3, "marker": "x", "alpha": 0.5},
# line_kws={"alpha": 1},
# palette=current_palette[:3],
palette=white_palette,
legend=None,
logx=True,
# logy=True
# legend="off"
)
axes = lm.axes
# axes[0,0].set_xlim(30,uu[xx].max())
# axes[0,0].set_ylim(0,50)
# yscale="log"
lm.set(
xscale="log",
# # yscale="log"
)
current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
line1.set_color(line2.get_color())
# body
sc = sns.scatterplot(
data=uu[uu["time type"] == "total"], x=xx, y="Time [s]", hue="mth",
ax=lm.ax, alpha=1.0, s=5
)
texts = lm.ax.legend_.get_texts()
leglines = lm.ax.legend_.get_lines()
# texts[0].set_text("short")
# texts.pop(0)
# texts.pop(0)
# plt.legend(lm.ax.legend_[3:6])
from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color=current_palette[0], lw=4),
Line2D([0], [0], color=current_palette[1], lw=4),
Line2D([0], [0], color=current_palette[2], lw=4)]
lm.ax.legend(
custom_lines,
[tx.get_text()[:-3] for tx in texts[:3]],
# ['Cold', 'Medium', 'Hot'],
)
# lg = lm.ax.legend()
plt.savefig(op.join(latex_dir, "msgc_time_datasize_plot.pdf"), dpi=1000)
In [282]:
xx = "log(relative object size)"
white_palette = [(1., 1., 1., 0.5)]
lm = sns.lmplot(
# data=uu[uu["time type"] == "total"],
data=uu,
# x="image voxel number",
x=xx,
y="Time [s]", hue="mth", order=2,
scatter_kws={"s": 3, "marker": "x", "alpha": 0.5},
# line_kws={"alpha": 1},
# palette=current_palette[:3],
palette=white_palette,
legend=None,
# legend="off"
)
axes = lm.axes
axes[0,0].set_xlim(uu[xx].min(),uu[xx].max())
axes[0,0].set_ylim(0,50)
current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())
n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
line1.set_color(line2.get_color())
# body
sc = sns.scatterplot(
data=uu[uu["time type"] == "total"], x=xx, y="Time [s]", hue="mth",
ax=lm.ax, alpha=1.0, s=5
)
texts = lm.ax.legend_.get_texts()
leglines = lm.ax.legend_.get_lines()
# texts[0].set_text("short")
# texts.pop(0)
# texts.pop(0)
# plt.legend(lm.ax.legend_[3:6])
from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color=current_palette[0], lw=4),
Line2D([0], [0], color=current_palette[1], lw=4),
Line2D([0], [0], color=current_palette[2], lw=4)]
lm.ax.legend(
custom_lines,
[tx.get_text()[:-3] for tx in texts[:3]],
# ['Cold', 'Medium', 'Hot'],
)
# lg = lm.ax.legend()
plt.savefig(op.join(latex_dir, "msgc_time_datasize_plot.pdf"), dpi=1000)
In [283]:
## Volume ration
In [284]:
from scipy import stats
dfs_plus_describe = dfs_plus.describe()
display(dfs_plus_describe)
print("pokud je pvalue mensi nez zvolena hladina vyznamnosti (0.01=1%), je vsechno ok")
tt = stats.ttest_rel(dfs_plus.loc[dfs_plus["method"] == "ssgc"]['time'], dfs_plus.loc[dfs_plus["method"] == "msgc_lo2hi"]['time'])
# tt
In [285]:
ssgc_rows = dfs_plus[dfs_plus["method"].str.contains(labels[0])]
ssgc_hi2lo_rows = dfs_plus[dfs_plus["method"].str.contains(labels[1])]
ssgc_lo2hi_rows = dfs_plus[dfs_plus["method"].str.contains(labels[2])]
pp0 = stats.ttest_rel(ssgc_rows["time"], ssgc_hi2lo_rows["time"])
pp1 = stats.ttest_rel(ssgc_rows["time"], ssgc_lo2hi_rows["time"])
pp2 = stats.ttest_rel(ssgc_hi2lo_rows["time"], ssgc_lo2hi_rows["time"])
print("pokud je pvalue mensi nez zvolena hladina vyznamnosti (0.01=1%), je vsechno ok")
#mozna staci i dvojnasobek hladiny vyzamnosi
print("statistic musi byt vetsi nez 0")
display(pp0)
display(pp1)
display(pp2)
float_to_latex_file(pp0.pvalue, op.join(latex_dir, "ttest_pvalue_ssgc_msgc_hi2lo.tex"))
float_to_latex_file(pp1.pvalue, op.join(latex_dir, "ttest_pvalue_ssgc_msgc_lo2hi.tex"))
float_to_latex_file(pp2.pvalue, op.join(latex_dir, "ttest_pvalue_msgc_hi2lo_msgc_lo2hi.tex"))
In [286]:
# dfs_plus["method"] == "ssgc "
In [287]:
table = pd.pivot_table(
dfs, values=['gc time', 'time', "t2", "t3", "t3.1", "t3.2", "t3.3", "t4", "t5", "t6", "t7", "t8", "low level object voxels", "low level image voxels"], index=['method'],
aggfunc=np.mean
)
table
Out[287]:
In [288]:
table = pd.pivot_table(
dfs, values=['gc time', 'time', "t graph 01", "t graph 10", "t graph 11", "t graph 13", "t graph 14", "low level image voxels", "low level object voxels"], index=['method'],
aggfunc=np.mean
)
table
Out[288]:
In [289]:
dfs_describe = dfs.describe()
display(dfs_describe)
dfs_plus_describe = dfs_plus.describe()
display(dfs_plus_describe)
In [290]:
dfs_plus_size = int(len(dfs_plus) / len(labels))
to_file(str(dfs_plus_size), op.join(latex_dir, "msgc_dataset_subset_size.tex"))
In [291]:
# df[["GC total time", "MSGC total time", "GC time", "MSGC time"]]
df.keys()
Out[291]:
In [292]:
table = pd.pivot_table(
df, values=['gc time', 'time'], index=['method'],
aggfunc={'gc time': np.mean, 'time': [min, max, np.mean]}
)
table
Out[292]:
In [293]:
df_mn = df[["GC total time", "MSGC total time", "GC time", "MSGC time"]].describe()
display(df_mn)
to_latex_file(df_mn, "../includes/exp062-all2data_size.tex")
dfs_mn = dfs[["GC total time", "MSGC total time", "GC time", "MSGC time"]].describe()
display(dfs_mn)
to_latex_file(dfs_mn, "../includes/exp062-selection2data_size.tex")
In [ ]:
dfs_plus[["method"]]