In [3]:
%run ./msgc_experiments_CMBBEIV_init.ipynb


Populating the interactive namespace from numpy and matplotlib

In [4]:
import pandas as pd
import seaborn as sns

Data processing graphs, statistics

Seaborn graphs


In [5]:
df = pd.read_csv(fnamenew)
df["image voxel number"] = df["data size"].pow(3)
df["relative object size"] = df["data object size px"] / df["image voxel number"]
df["log(relative object size)"] = log(df["data object size px"] / df["image voxel number"])
# df.rename(columns={"msgc time": "MSGC time"})
# dfs = df[(df["data seedsz"]==3) & (df["data offset"] == 3) & (df["data radius"] == 10) & (df["experiment"] == "image size")]
dfs = df[df["experiment"] == "image size"]
dfs_plus = dfs[dfs['data size'] > 160]

sns.set_context("paper")
sns.set_style("white")

In [6]:
df.keys()


Out[6]:
Index(['_create_nlinks time', 'block size', 'data image size px',
       'data object size px', 'data offset', 'data radius', 'data seedsz',
       'data size', 'edge number', 'error', 'experiment',
       'experiment iteration start time', 'gc time', 'low level image voxels',
       'low level object voxels', 'machine hostname', 'method',
       'nlinks number', 't graph 01', 't graph 10', 't graph 11', 't graph 13',
       't graph 14', 't graph high', 't graph low', 't split 01', 't split 02',
       't split 03', 't split 04', 't split 05', 't split 06', 't split 07',
       't split 08', 't split 081', 't split 082', 't split 0821',
       't split 09', 't split 10', 't1', 't10', 't2', 't3', 't3.1', 't3.2',
       't3.3', 't4', 't5', 't6', 't7', 't8', 't9', 'time', 'tlinks number',
       'image voxel number', 'relative object size',
       'log(relative object size)'],
      dtype='object')

In [7]:
sns.boxplot(data=dfs_plus, y="time", x="method", showfliers=False)


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x1e720351dd8>

In [8]:
# df
mfmc_label = "Min-Cut"
rename_bp = {"gc time": mfmc_label, "time": "total"}

In [9]:
uu = pd.melt(dfs_plus.rename(columns=rename_bp), value_vars=[mfmc_label, "total"], id_vars=["method"], var_name="time type", value_name="Time [s]")
# uu = pd.melt(dfs, value_vars=["gc time", "time"], id_vars=["method"], var_name="type", value_name="time")
# uu

Paper


In [10]:
mfmc_label = "Min-Cut"
rename_bp = {"gc time": mfmc_label, "time": "total"}

bp = sns.boxplot(data=uu, hue="time type",y="Time [s]", x="method", 
#                  showfliers=False
                 hue_order=["total", mfmc_label],
                )
# bp.
bp.set(yscale="log")
# ax.set(xscale="log", yscale="log")
plt.savefig(op.join(latex_dir, "msgc_time_boxplot.pdf"), dpi=1000)



In [11]:
bp.set(yscale="log")


Out[11]:
[None]

In [12]:
# sns.boxplot(data=dfs, y="error", x="method")

In [13]:
lm = sns.lmplot(data=df, x="data size", y="time", hue="method", order=3, scatter_kws={"s": 3, "marker": "x", "alpha": 0.5})
axes = lm.axes
axes[0,0].set_xlim(30,200)
axes[0,0].set_ylim(0,50)

lines = lm.ax.get_lines()
line = lines[0]
line.set_linestyle("--")
# line.set_marker("s")

plt.savefig(op.join(latex_dir, "msgc_time_datasize_plot.pdf"), dpi=1000)
# axes[0,1].set_ylim(0,)

# lm.ax.get_lines()


Vykreslení dvou lmplotů do jednoho není tak snadné


In [14]:
# test better melt

dfm = pd.DataFrame({'City': ['Houston', 'Austin', 'Hoover'],
                   'State': ['Texas', 'Texas', 'Alabama'],
                   'Name':['Aria', 'Penelope', 'Niko'],
                   'Mango':[4, 10, 90],
                   'Orange': [10, 8, 14], 
                   'Watermelon':[40, 99, 43],
                   'Gin':[16, 200, 34],
                   'Vodka':[20, 33, 18]},
                 columns=['City', 'State', 'Name', 'Mango', 'Orange', 'Watermelon', 'Gin', 'Vodka'])
melt(dfm, id_vars=['City', 'State'], value_vars=[['Mango', 'Orange', 'Watermelon'], ['Gin', 'Vodka']], 
     var_name=['Fruit', 'Drink'], value_name=['Pounds', 'Ounces'])


Out[14]:
City State Fruit Pounds Drink Ounces
0 Houston Texas Mango 4 Gin 16.0
1 Austin Texas Mango 10 Gin 200.0
2 Hoover Alabama Mango 90 Gin 34.0
3 Houston Texas Orange 10 Vodka 20.0
4 Austin Texas Orange 8 Vodka 33.0
5 Hoover Alabama Orange 14 Vodka 18.0
6 Houston Texas Watermelon 40 nan NaN
7 Austin Texas Watermelon 99 nan NaN
8 Hoover Alabama Watermelon 43 nan NaN

In [15]:
# df.keys()

In [16]:
# df.rename(columns={"gc time": "gc", "time": "total"})[["gc", "total", "data size"]]

In [17]:
# df.rename(columns={"gc time": "gc", "time": "total"})

In [18]:
dfs = df[df["experiment"] == "image size"]
uu = melt(dfs.rename(columns={"gc time": "gc", "time": "total"}), value_vars=["gc", "total"], id_vars=["method", "data size", "image voxel number", "relative object size"], var_name=["time type"], value_name=["Time [s]"])
uu["mth"] = uu["method"] + " " + uu["time type"]
# uu

In [19]:
current_palette = sns.color_palette()
sns.palplot(current_palette)


Artifical data - constant object size


In [20]:
white_palette = [(1., 1., 1., 0.5)]
lm = sns.lmplot(data=uu, x="data size", y="Time [s]", hue="mth", order=3, 
                scatter_kws={"s": 3, "marker": "x", "alpha": 0.5}, 
#                 line_kws={"alpha": 1},
#                 palette=current_palette[:3]
                palette=white_palette
               )
axes = lm.axes
axes[0,0].set_xlim(30,200)
axes[0,0].set_ylim(0,50)

current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())



In [21]:
white_palette = [(1., 1., 1., 0.5)]
lm = sns.lmplot(
#     data=uu[uu["time type"] == "total"], 
    data=uu, 
    x="data size", y="Time [s]", hue="mth", order=3, 
                scatter_kws={"s": 3, "marker": "x", "alpha": 0.5}, 
#                 line_kws={"alpha": 1},
#                 palette=current_palette[:3],
                palette=white_palette,
                legend=None,
#     legend="off"
               )
axes = lm.axes
axes[0,0].set_xlim(30,200)
axes[0,0].set_ylim(0,50)

current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
line1.set_color(line2.get_color())


# body
sc = sns.scatterplot(data=uu[uu["time type"] == "total"], x="data size", y="Time [s]", hue="mth", ax=lm.ax, alpha=0.8, s=3)
texts = lm.ax.legend_.get_texts()
leglines = lm.ax.legend_.get_lines()

# texts[0].set_text("short")
# texts.pop(0)
# texts.pop(0)

# plt.legend(lm.ax.legend_[3:6])

from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color=current_palette[0], lw=4),
                Line2D([0], [0], color=current_palette[1], lw=4),
                Line2D([0], [0], color=current_palette[2], lw=4)]
lm.ax.legend(
    custom_lines, 
    [tx.get_text()[:-3] for tx in texts[:3]],
#     ['Cold', 'Medium', 'Hot'],
)
# lg = lm.ax.legend()


Out[21]:
<matplotlib.legend.Legend at 0x1e7219cc2b0>

In [22]:
texts[0].get_text()


Out[22]:
'ssgc  gc'

In [23]:
# dir(lm.ax.legend_)

In [24]:
# L=plt.legend()
lm.ax.legend_.get_texts()
# L.get_texts()[0].set_text('make it short')


Out[24]:
<a list of 3 Text objects>

Paper - With voxel number


In [25]:
rename_time = {
    "time": "Time [s]"
}
# uu = 
current_palette = sns.color_palette()
white_palette = [
    (1., 1., 1., 0.5),
    (1., 1., 1., 0.5),
    (1., 1., 1., 0.5),
    current_palette[0],
    current_palette[1],
    current_palette[2],
]
lm = sns.lmplot(
#     data=uu[uu["time type"] == "total"], 
    data=uu, 
    x="image voxel number", y="Time [s]", hue="mth", order=3, 
                scatter_kws={"s": 3, "marker": "x", "alpha": 0.5}, 
#                 line_kws={"alpha": 1},
#                 palette=current_palette[:3],
                palette=white_palette,
                legend=None,
#     legend="off"
               )
axes = lm.axes
axes[0,0].set_xlim(30,uu["image voxel number"].max())
axes[0,0].set_ylim(0,50)

current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
line1.set_color(line2.get_color())


# body
# sc = sns.scatterplot(
#     data=uu[uu["time type"] == "total"], x="image voxel number", y="time", hue="mth", 
#     ax=lm.ax, alpha=1.0, s=5
# )
# texts = lm.ax.legend_.get_texts()
# leglines = lm.ax.legend_.get_lines()


# plt.legend(lm.ax.legend_[3:6])

from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color=current_palette[0], lw=4),
                Line2D([0], [0], color=current_palette[1], lw=4),
                Line2D([0], [0], color=current_palette[2], lw=4)]
# lm.ax.legend(
#     custom_lines, 
#     [tx.get_text()[:-3] for tx in texts[:3]],
# )
# lg = lm.ax.legend()
lm.ax.legend(
    custom_lines, 
    labels
#     [tx.get_text()[:-3] for tx in texts[:3]],
)


plt.savefig(op.join(latex_dir, "msgc_time_image_voxel_number_plot.pdf"), dpi=1000)



In [26]:
uu


Out[26]:
method data size image voxel number relative object size time type Time [s] mth
0 ssgc 44 85184 0.093727 gc 0.330146 ssgc gc
1 msgc_hi2lo 44 85184 0.093727 gc 0.371008 msgc_hi2lo gc
2 msgc_lo2hi 44 85184 0.093727 gc 0.548532 msgc_lo2hi gc
3 ssgc 44 85184 0.093727 gc 0.327159 ssgc gc
4 msgc_hi2lo 44 85184 0.093727 gc 0.382976 msgc_hi2lo gc
5 msgc_lo2hi 44 85184 0.093727 gc 0.466750 msgc_lo2hi gc
6 ssgc 47 103823 0.078615 gc 0.387964 ssgc gc
7 msgc_hi2lo 47 103823 0.078615 gc 0.539556 msgc_hi2lo gc
8 msgc_lo2hi 47 103823 0.078615 gc 0.534571 msgc_lo2hi gc
9 ssgc 47 103823 0.078615 gc 0.459755 ssgc gc
10 msgc_hi2lo 47 103823 0.078615 gc 0.540556 msgc_hi2lo gc
11 msgc_lo2hi 47 103823 0.078615 gc 0.477721 msgc_lo2hi gc
12 ssgc 50 125000 0.068992 gc 0.444810 ssgc gc
13 msgc_hi2lo 50 125000 0.068992 gc 0.562531 msgc_hi2lo gc
14 msgc_lo2hi 50 125000 0.068992 gc 0.606413 msgc_lo2hi gc
15 ssgc 50 125000 0.068992 gc 0.444811 ssgc gc
16 msgc_hi2lo 50 125000 0.068992 gc 0.670211 msgc_hi2lo gc
17 msgc_lo2hi 50 125000 0.068992 gc 0.674193 msgc_lo2hi gc
18 ssgc 53 148877 0.060728 gc 0.620343 ssgc gc
19 msgc_hi2lo 53 148877 0.060728 gc 0.822799 msgc_hi2lo gc
20 msgc_lo2hi 53 148877 0.060728 gc 0.726058 msgc_lo2hi gc
21 ssgc 53 148877 0.058988 gc 0.499663 ssgc gc
22 msgc_hi2lo 53 148877 0.058988 gc 0.772935 msgc_hi2lo gc
23 msgc_lo2hi 53 148877 0.058988 gc 0.701123 msgc_lo2hi gc
24 ssgc 56 175616 0.051129 gc 0.563528 ssgc gc
25 msgc_hi2lo 56 175616 0.051129 gc 0.702153 msgc_hi2lo gc
26 msgc_lo2hi 56 175616 0.051129 gc 0.754981 msgc_lo2hi gc
27 ssgc 56 175616 0.051129 gc 0.641288 ssgc gc
28 msgc_hi2lo 56 175616 0.051129 gc 0.854715 msgc_hi2lo gc
29 msgc_lo2hi 56 175616 0.051129 gc 0.979398 msgc_lo2hi gc
... ... ... ... ... ... ... ...
918 ssgc 152 3511808 0.005038 total 13.689396 ssgc total
919 msgc_hi2lo 152 3511808 0.005038 total 15.491609 msgc_hi2lo total
920 msgc_lo2hi 152 3511808 0.005038 total 10.887919 msgc_lo2hi total
921 ssgc 155 3723875 0.004850 total 15.438683 ssgc total
922 msgc_hi2lo 155 3723875 0.004850 total 16.157799 msgc_hi2lo total
923 msgc_lo2hi 155 3723875 0.004850 total 8.189136 msgc_lo2hi total
924 ssgc 158 3944312 0.004744 total 16.157797 ssgc total
925 msgc_hi2lo 158 3944312 0.004744 total 17.363541 msgc_hi2lo total
926 msgc_lo2hi 158 3944312 0.004744 total 12.214330 msgc_lo2hi total
927 ssgc 161 4173281 0.004376 total 17.411446 ssgc total
928 msgc_hi2lo 161 4173281 0.004376 total 18.864563 msgc_hi2lo total
929 msgc_lo2hi 161 4173281 0.004376 total 10.693441 msgc_lo2hi total
930 ssgc 164 4410944 0.004366 total 18.455324 ssgc total
931 msgc_hi2lo 164 4410944 0.004366 total 19.835914 msgc_hi2lo total
932 msgc_lo2hi 164 4410944 0.004366 total 13.916789 msgc_lo2hi total
933 ssgc 167 4657463 0.004270 total 21.011818 ssgc total
934 msgc_hi2lo 167 4657463 0.004270 total 20.820364 msgc_hi2lo total
935 msgc_lo2hi 167 4657463 0.004270 total 15.304080 msgc_lo2hi total
936 ssgc 170 4913000 0.003961 total 22.141764 ssgc total
937 msgc_hi2lo 170 4913000 0.003961 total 21.030734 msgc_hi2lo total
938 msgc_lo2hi 170 4913000 0.003961 total 14.966982 msgc_lo2hi total
939 ssgc 173 5177717 0.003841 total 24.428684 ssgc total
940 msgc_hi2lo 173 5177717 0.003841 total 22.747177 msgc_hi2lo total
941 msgc_lo2hi 173 5177717 0.003841 total 15.659095 msgc_lo2hi total
942 ssgc 176 5451776 0.003760 total 26.212910 ssgc total
943 msgc_hi2lo 176 5451776 0.003760 total 23.712629 msgc_hi2lo total
944 msgc_lo2hi 176 5451776 0.003760 total 16.071030 msgc_lo2hi total
945 ssgc 179 5735339 0.003694 total 30.277045 ssgc total
946 msgc_hi2lo 179 5735339 0.003694 total 24.703947 msgc_hi2lo total
947 msgc_lo2hi 179 5735339 0.003694 total 17.161115 msgc_lo2hi total

948 rows × 7 columns


In [27]:
xx = "relative object size"
white_palette = [(1., 1., 1., 0.5)]
lm = sns.lmplot(
#     data=uu[uu["time type"] == "total"], 
    data=uu, 
    x=xx, y="Time [s]", hue="mth", 
#     order=2, 
                scatter_kws={"s": 3, "marker": "x", "alpha": 0.5}, 
#                 line_kws={"alpha": 1},
#                 palette=current_palette[:3],
                palette=white_palette,
                legend=None,
                logx=True
#     legend="off"
               )
axes = lm.axes
# axes[0,0].set_xlim(30,uu[xx].max())
axes[0,0].set_ylim(0,50)
# lm.set(
#     xscale="log",
# #     yscale="log"
# )

current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
line1.set_color(line2.get_color())


# body
sc = sns.scatterplot(
    data=uu[uu["time type"] == "total"], x=xx, y="Time [s]", hue="mth", 
    ax=lm.ax, alpha=1.0, s=5
)
texts = lm.ax.legend_.get_texts()
leglines = lm.ax.legend_.get_lines()

# texts[0].set_text("short")
# texts.pop(0)
# texts.pop(0)

# plt.legend(lm.ax.legend_[3:6])

from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color=current_palette[0], lw=4),
                Line2D([0], [0], color=current_palette[1], lw=4),
                Line2D([0], [0], color=current_palette[2], lw=4)]
lm.ax.legend(
    custom_lines, 
    [tx.get_text()[:-3] for tx in texts[:3]],
#     ['Cold', 'Medium', 'Hot'],
)
# lg = lm.ax.legend()


C:\Users\Jirik\Miniconda3\envs\lisa\lib\site-packages\seaborn\regression.py:279: RuntimeWarning: invalid value encountered in log
  grid = np.c_[np.ones(len(grid)), np.log(grid)]
Out[27]:
<matplotlib.legend.Legend at 0x1e721a401d0>

In [28]:
sc = sns.scatterplot(
    data=uu[uu["time type"] == "total"], x=xx, y="Time [s]", hue="mth", 
#     ax=lm.ax,
    alpha=1.0, s=5
)
sc.set(xscale="log",
      yscale="log"
      )
# grid.set(xscale="log", )


Out[28]:
[None, None]

In [29]:
lm.ax


Out[29]:
<matplotlib.axes._subplots.AxesSubplot at 0x1e721925908>

In [30]:
dir(lm.ax)


Out[30]:
['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_add_text',
 '_adjustable',
 '_agg_filter',
 '_alpha',
 '_anchor',
 '_animated',
 '_aspect',
 '_autoscaleXon',
 '_autoscaleYon',
 '_autotitlepos',
 '_axes',
 '_axes_class',
 '_axes_locator',
 '_axisbelow',
 '_cachedRenderer',
 '_clipon',
 '_clippath',
 '_connected',
 '_contains',
 '_current_image',
 '_facecolor',
 '_frameon',
 '_gci',
 '_gen_axes_patch',
 '_gen_axes_spines',
 '_get_axis_list',
 '_get_lines',
 '_get_patches_for_fill',
 '_get_view',
 '_gid',
 '_gridOn',
 '_hold',
 '_in_layout',
 '_init_axis',
 '_label',
 '_layoutbox',
 '_left_title',
 '_make_twin_axes',
 '_mouseover',
 '_mouseover_set',
 '_navigate',
 '_navigate_mode',
 '_oid',
 '_on_units_changed',
 '_originalPosition',
 '_path_effects',
 '_pcolorargs',
 '_picker',
 '_position',
 '_poslayoutbox',
 '_process_unit_info',
 '_prop_order',
 '_propobservers',
 '_quiver_units',
 '_rasterization_zorder',
 '_rasterized',
 '_remove_legend',
 '_remove_method',
 '_right_title',
 '_sci',
 '_set_artist_props',
 '_set_gc_clip',
 '_set_lim_and_transforms',
 '_set_position',
 '_set_title_offset_trans',
 '_set_view',
 '_set_view_from_bbox',
 '_shared_x_axes',
 '_shared_y_axes',
 '_sharex',
 '_sharey',
 '_sketch',
 '_snap',
 '_stale',
 '_sticky_edges',
 '_subplotspec',
 '_tight',
 '_transform',
 '_transformSet',
 '_twinned_axes',
 '_update_image_limits',
 '_update_line_limits',
 '_update_patch_limits',
 '_update_title_position',
 '_update_transScale',
 '_url',
 '_use_sticky_edges',
 '_validate_converted_limits',
 '_visible',
 '_xaxis_transform',
 '_xcid',
 '_xmargin',
 '_yaxis_transform',
 '_ycid',
 '_ymargin',
 'acorr',
 'add_artist',
 'add_callback',
 'add_child_axes',
 'add_collection',
 'add_container',
 'add_image',
 'add_line',
 'add_patch',
 'add_table',
 'aname',
 'angle_spectrum',
 'annotate',
 'apply_aspect',
 'arrow',
 'artists',
 'autoscale',
 'autoscale_view',
 'axes',
 'axhline',
 'axhspan',
 'axis',
 'axison',
 'axvline',
 'axvspan',
 'bar',
 'barbs',
 'barh',
 'bbox',
 'boxplot',
 'broken_barh',
 'bxp',
 'callbacks',
 'can_pan',
 'can_zoom',
 'change_geometry',
 'child_axes',
 'cla',
 'clabel',
 'clear',
 'clipbox',
 'cohere',
 'colNum',
 'collections',
 'containers',
 'contains',
 'contains_point',
 'contour',
 'contourf',
 'convert_xunits',
 'convert_yunits',
 'csd',
 'dataLim',
 'drag_pan',
 'draw',
 'draw_artist',
 'end_pan',
 'errorbar',
 'eventplot',
 'eventson',
 'figbox',
 'figure',
 'fill',
 'fill_between',
 'fill_betweenx',
 'findobj',
 'fmt_xdata',
 'fmt_ydata',
 'format_coord',
 'format_cursor_data',
 'format_xdata',
 'format_ydata',
 'get_adjustable',
 'get_agg_filter',
 'get_alpha',
 'get_anchor',
 'get_animated',
 'get_aspect',
 'get_autoscale_on',
 'get_autoscalex_on',
 'get_autoscaley_on',
 'get_axes_locator',
 'get_axisbelow',
 'get_children',
 'get_clip_box',
 'get_clip_on',
 'get_clip_path',
 'get_contains',
 'get_cursor_data',
 'get_data_ratio',
 'get_data_ratio_log',
 'get_default_bbox_extra_artists',
 'get_facecolor',
 'get_fc',
 'get_figure',
 'get_frame_on',
 'get_geometry',
 'get_gid',
 'get_gridspec',
 'get_images',
 'get_in_layout',
 'get_label',
 'get_legend',
 'get_legend_handles_labels',
 'get_lines',
 'get_navigate',
 'get_navigate_mode',
 'get_path_effects',
 'get_picker',
 'get_position',
 'get_rasterization_zorder',
 'get_rasterized',
 'get_renderer_cache',
 'get_shared_x_axes',
 'get_shared_y_axes',
 'get_sketch_params',
 'get_snap',
 'get_subplotspec',
 'get_tightbbox',
 'get_title',
 'get_transform',
 'get_transformed_clip_path_and_affine',
 'get_url',
 'get_visible',
 'get_window_extent',
 'get_xaxis',
 'get_xaxis_text1_transform',
 'get_xaxis_text2_transform',
 'get_xaxis_transform',
 'get_xbound',
 'get_xgridlines',
 'get_xlabel',
 'get_xlim',
 'get_xmajorticklabels',
 'get_xminorticklabels',
 'get_xscale',
 'get_xticklabels',
 'get_xticklines',
 'get_xticks',
 'get_yaxis',
 'get_yaxis_text1_transform',
 'get_yaxis_text2_transform',
 'get_yaxis_transform',
 'get_ybound',
 'get_ygridlines',
 'get_ylabel',
 'get_ylim',
 'get_ymajorticklabels',
 'get_yminorticklabels',
 'get_yscale',
 'get_yticklabels',
 'get_yticklines',
 'get_yticks',
 'get_zorder',
 'grid',
 'has_data',
 'have_units',
 'hexbin',
 'hist',
 'hist2d',
 'hitlist',
 'hlines',
 'ignore_existing_data_limits',
 'images',
 'imshow',
 'in_axes',
 'indicate_inset',
 'indicate_inset_zoom',
 'inset_axes',
 'invert_xaxis',
 'invert_yaxis',
 'is_figure_set',
 'is_first_col',
 'is_first_row',
 'is_last_col',
 'is_last_row',
 'is_transform_set',
 'label_outer',
 'legend',
 'legend_',
 'lines',
 'locator_params',
 'loglog',
 'magnitude_spectrum',
 'margins',
 'matshow',
 'minorticks_off',
 'minorticks_on',
 'mouseover',
 'mouseover_set',
 'name',
 'numCols',
 'numRows',
 'patch',
 'patches',
 'pchanged',
 'pcolor',
 'pcolorfast',
 'pcolormesh',
 'phase_spectrum',
 'pick',
 'pickable',
 'pie',
 'plot',
 'plot_date',
 'properties',
 'psd',
 'quiver',
 'quiverkey',
 'redraw_in_frame',
 'relim',
 'remove',
 'remove_callback',
 'reset_position',
 'rowNum',
 'scatter',
 'semilogx',
 'semilogy',
 'set',
 'set_adjustable',
 'set_agg_filter',
 'set_alpha',
 'set_anchor',
 'set_animated',
 'set_aspect',
 'set_autoscale_on',
 'set_autoscalex_on',
 'set_autoscaley_on',
 'set_axes_locator',
 'set_axis_off',
 'set_axis_on',
 'set_axisbelow',
 'set_clip_box',
 'set_clip_on',
 'set_clip_path',
 'set_contains',
 'set_facecolor',
 'set_fc',
 'set_figure',
 'set_frame_on',
 'set_gid',
 'set_in_layout',
 'set_label',
 'set_navigate',
 'set_navigate_mode',
 'set_path_effects',
 'set_picker',
 'set_position',
 'set_prop_cycle',
 'set_rasterization_zorder',
 'set_rasterized',
 'set_sketch_params',
 'set_snap',
 'set_subplotspec',
 'set_title',
 'set_transform',
 'set_url',
 'set_visible',
 'set_xbound',
 'set_xlabel',
 'set_xlim',
 'set_xmargin',
 'set_xscale',
 'set_xticklabels',
 'set_xticks',
 'set_ybound',
 'set_ylabel',
 'set_ylim',
 'set_ymargin',
 'set_yscale',
 'set_yticklabels',
 'set_yticks',
 'set_zorder',
 'specgram',
 'spines',
 'spy',
 'stackplot',
 'stale',
 'stale_callback',
 'start_pan',
 'stem',
 'step',
 'sticky_edges',
 'streamplot',
 'table',
 'tables',
 'text',
 'texts',
 'tick_params',
 'ticklabel_format',
 'title',
 'titleOffsetTrans',
 'transAxes',
 'transData',
 'transLimits',
 'transScale',
 'tricontour',
 'tricontourf',
 'tripcolor',
 'triplot',
 'twinx',
 'twiny',
 'update',
 'update_datalim',
 'update_datalim_bounds',
 'update_from',
 'update_params',
 'use_sticky_edges',
 'viewLim',
 'violin',
 'violinplot',
 'vlines',
 'xaxis',
 'xaxis_date',
 'xaxis_inverted',
 'xcorr',
 'yaxis',
 'yaxis_date',
 'yaxis_inverted',
 'zorder']

In [31]:
lm = sns.lmplot(data=uu, x="data size", y="Time [s]", hue="time type", order=3, scatter_kws={"s": 3, "marker": "x", "alpha": 0.5})
axes = lm.axes
axes[0,0].set_xlim(30,200)
axes[0,0].set_ylim(0,50)

lines = lm.ax.get_lines()
line = lines[0]
line.set_linestyle("--")
# line.set_marker("s")

# plt.savefig(op.join(latex_dir, "msgc_time_datasize_plot.pdf"), dpi=1000)
# axes[0,1].set_ylim(0,)

# lm.ax.get_lines()



In [ ]:


In [32]:
# this work jus for not duplicit values of data siz
# sns.tsplot(data=df, time="data size", value="time", unit="method", condition="method")
# plt.savefig(op.join(latex_dir, "msgc_size_time.pdf"), dpi=1000)

In [33]:
line.set_marker("s")

In [34]:
# df

In [35]:
# df["method"]

Experiment 2: fixed image size


In [36]:
# uu = df[df["experiment"]=="object size"]

In [37]:
dfs2 = df[df["experiment"] == "object size"]
uu2 = melt(dfs2.rename(columns={"gc time": "gc", "time": "total"}), value_vars=["gc", "total"], id_vars=["method", "data size", "image voxel number", "data object size px", "relative object size", "log(relative object size)"], var_name=["time type"], value_name=["Time [s]"])
uu2["mth"] = uu2["method"] + " " + uu2["time type"]
# uu

In [38]:
dfs2.keys()


Out[38]:
Index(['_create_nlinks time', 'block size', 'data image size px',
       'data object size px', 'data offset', 'data radius', 'data seedsz',
       'data size', 'edge number', 'error', 'experiment',
       'experiment iteration start time', 'gc time', 'low level image voxels',
       'low level object voxels', 'machine hostname', 'method',
       'nlinks number', 't graph 01', 't graph 10', 't graph 11', 't graph 13',
       't graph 14', 't graph high', 't graph low', 't split 01', 't split 02',
       't split 03', 't split 04', 't split 05', 't split 06', 't split 07',
       't split 08', 't split 081', 't split 082', 't split 0821',
       't split 09', 't split 10', 't1', 't10', 't2', 't3', 't3.1', 't3.2',
       't3.3', 't4', 't5', 't6', 't7', 't8', 't9', 'time', 'tlinks number',
       'image voxel number', 'relative object size',
       'log(relative object size)'],
      dtype='object')

In [39]:
xx="data object size px"
current_palette = sns.color_palette()
white_palette = [(1., 1., 1., 0.5)]
lm = sns.lmplot(
#     data=uu[uu["time type"] == "total"], 
    data=uu2, 
#     x="image voxel number", 
    x=xx,
    y="Time [s]", hue="mth", order=2, 
                scatter_kws={"s": 3, "marker": "x", "alpha": 0.5}, 
#                 line_kws={"alpha": 1},
                palette=current_palette[:3],
#                 palette=white_palette,
                legend=None,
#     legend="off"
               )
axes = lm.axes
axes[0,0].set_xlim(uu2[xx].min(), uu2[xx].max())
axes[0,0].set_ylim(0, 400)

current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
line1.set_color(line2.get_color())


# body
sc = sns.scatterplot(
    data=uu2[uu2["time type"] == "total"], x=xx, y="Time [s]", hue="mth", 
    ax=lm.ax, alpha=1.0, s=5
)
texts = lm.ax.legend_.get_texts()
leglines = lm.ax.legend_.get_lines()

# texts[0].set_text("short")
# texts.pop(0)
# texts.pop(0)

# plt.legend(lm.ax.legend_[3:6])

from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color=current_palette[0], lw=4),
                Line2D([0], [0], color=current_palette[1], lw=4),
                Line2D([0], [0], color=current_palette[2], lw=4)]
lm.ax.legend(
    custom_lines, 
    [tx.get_text()[:-3] for tx in texts[:3]],
#     ['Cold', 'Medium', 'Hot'],
)
# lg = lm.ax.legend()
plt.savefig(op.join(latex_dir, "msgc_time_object_size_plot.pdf"), dpi=1000)



In [40]:
uu2


Out[40]:
method data size image voxel number data object size px relative object size log(relative object size) time type Time [s] mth
0 ssgc 200 8000000 5654 0.000707 -7.254834 gc 31.700240 ssgc gc
1 msgc_hi2lo 200 8000000 5654 0.000707 -7.254834 gc 1.687488 msgc_hi2lo gc
2 msgc_lo2hi 200 8000000 5654 0.000707 -7.254834 gc 1.157905 msgc_lo2hi gc
3 ssgc 200 8000000 14722 0.001840 -6.297854 gc 31.278368 ssgc gc
4 msgc_hi2lo 200 8000000 14722 0.001840 -6.297854 gc 2.115344 msgc_hi2lo gc
5 msgc_lo2hi 200 8000000 14722 0.001840 -6.297854 gc 1.322432 msgc_lo2hi gc
6 ssgc 200 8000000 27598 0.003450 -5.669454 gc 33.441616 ssgc gc
7 msgc_hi2lo 200 8000000 27598 0.003450 -5.669454 gc 2.467438 msgc_hi2lo gc
8 msgc_lo2hi 200 8000000 27598 0.003450 -5.669454 gc 1.549857 msgc_lo2hi gc
9 ssgc 200 8000000 47289 0.005911 -5.130919 gc 33.118446 ssgc gc
10 msgc_hi2lo 200 8000000 47289 0.005911 -5.130919 gc 3.179531 msgc_hi2lo gc
11 msgc_lo2hi 200 8000000 47289 0.005911 -5.130919 gc 1.990675 msgc_lo2hi gc
12 ssgc 200 8000000 72397 0.009050 -4.705032 gc 32.783346 ssgc gc
13 msgc_hi2lo 200 8000000 72397 0.009050 -4.705032 gc 13.494916 msgc_hi2lo gc
14 msgc_lo2hi 200 8000000 72397 0.009050 -4.705032 gc 2.986015 msgc_lo2hi gc
15 ssgc 200 8000000 107141 0.013393 -4.313051 gc 33.363792 ssgc gc
16 msgc_hi2lo 200 8000000 107141 0.013393 -4.313051 gc 4.495979 msgc_hi2lo gc
17 msgc_lo2hi 200 8000000 107141 0.013393 -4.313051 gc 2.995989 msgc_lo2hi gc
18 ssgc 200 8000000 150317 0.018790 -3.974450 gc 34.987450 ssgc gc
19 msgc_hi2lo 200 8000000 150317 0.018790 -3.974450 gc 5.845373 msgc_hi2lo gc
20 msgc_lo2hi 200 8000000 150317 0.018790 -3.974450 gc 3.303167 msgc_lo2hi gc
21 ssgc 200 8000000 202377 0.025297 -3.677065 gc 41.066231 ssgc gc
22 msgc_hi2lo 200 8000000 202377 0.025297 -3.677065 gc 6.505608 msgc_hi2lo gc
23 msgc_lo2hi 200 8000000 202377 0.025297 -3.677065 gc 3.911508 msgc_lo2hi gc
24 ssgc 200 8000000 266438 0.033305 -3.402055 gc 35.604800 ssgc gc
25 msgc_hi2lo 200 8000000 266438 0.033305 -3.402055 gc 7.966698 msgc_hi2lo gc
26 msgc_lo2hi 200 8000000 266438 0.033305 -3.402055 gc 4.460077 msgc_lo2hi gc
27 ssgc 200 8000000 339024 0.042378 -3.161126 gc 33.340855 ssgc gc
28 msgc_hi2lo 200 8000000 339024 0.042378 -3.161126 gc 8.867294 msgc_hi2lo gc
29 msgc_lo2hi 200 8000000 339024 0.042378 -3.161126 gc 5.694742 msgc_lo2hi gc
... ... ... ... ... ... ... ... ... ...
270 ssgc 200 8000000 5654 0.000707 -7.254834 total 48.354741 ssgc total
271 msgc_hi2lo 200 8000000 5654 0.000707 -7.254834 total 31.445954 msgc_hi2lo total
272 msgc_lo2hi 200 8000000 5654 0.000707 -7.254834 total 12.425824 msgc_lo2hi total
273 ssgc 200 8000000 56107 0.007013 -4.959936 total 43.702777 ssgc total
274 msgc_hi2lo 200 8000000 56107 0.007013 -4.959936 total 34.284363 msgc_hi2lo total
275 msgc_lo2hi 200 8000000 56107 0.007013 -4.959936 total 28.776059 msgc_lo2hi total
276 ssgc 200 8000000 182047 0.022756 -3.782932 total 42.243080 ssgc total
277 msgc_hi2lo 200 8000000 182047 0.022756 -3.782932 total 39.491440 msgc_hi2lo total
278 msgc_lo2hi 200 8000000 182047 0.022756 -3.782932 total 79.791653 msgc_lo2hi total
279 ssgc 200 8000000 428533 0.053567 -2.926829 total 43.246367 ssgc total
280 msgc_hi2lo 200 8000000 428533 0.053567 -2.926829 total 43.210464 msgc_hi2lo total
281 msgc_lo2hi 200 8000000 428533 0.053567 -2.926829 total 171.862476 msgc_lo2hi total
282 ssgc 200 8000000 816188 0.102024 -2.282552 total 44.889974 ssgc total
283 msgc_hi2lo 200 8000000 816188 0.102024 -2.282552 total 54.154266 msgc_hi2lo total
284 msgc_lo2hi 200 8000000 816188 0.102024 -2.282552 total 349.147755 msgc_lo2hi total
285 ssgc 200 8000000 5654 0.000707 -7.254834 total 43.001019 ssgc total
286 msgc_hi2lo 200 8000000 5654 0.000707 -7.254834 total 31.801001 msgc_hi2lo total
287 msgc_lo2hi 200 8000000 5654 0.000707 -7.254834 total 9.312102 msgc_lo2hi total
288 ssgc 200 8000000 55085 0.006886 -4.978319 total 40.379034 ssgc total
289 msgc_hi2lo 200 8000000 55085 0.006886 -4.978319 total 34.881733 msgc_hi2lo total
290 msgc_lo2hi 200 8000000 55085 0.006886 -4.978319 total 28.357177 msgc_lo2hi total
291 ssgc 200 8000000 179201 0.022400 -3.798689 total 41.308549 ssgc total
292 msgc_hi2lo 200 8000000 179201 0.022400 -3.798689 total 37.511697 msgc_hi2lo total
293 msgc_lo2hi 200 8000000 179201 0.022400 -3.798689 total 65.575669 msgc_lo2hi total
294 ssgc 200 8000000 423029 0.052879 -2.939756 total 42.425529 ssgc total
295 msgc_hi2lo 200 8000000 423029 0.052879 -2.939756 total 47.291965 msgc_hi2lo total
296 msgc_lo2hi 200 8000000 423029 0.052879 -2.939756 total 160.700321 msgc_lo2hi total
297 ssgc 200 8000000 806947 0.100868 -2.293939 total 44.661588 ssgc total
298 msgc_hi2lo 200 8000000 806947 0.100868 -2.293939 total 51.946110 msgc_hi2lo total
299 msgc_lo2hi 200 8000000 806947 0.100868 -2.293939 total 353.250604 msgc_lo2hi total

300 rows × 9 columns


In [41]:
xx = "relative object size"
white_palette = [(1., 1., 1., 0.5)]
lm = sns.lmplot(
#     data=uu[uu["time type"] == "total"], 
    data=uu2, 
#     x="image voxel number", 
    x=xx,
    y="Time [s]", hue="mth", 
#     order=1, 
                scatter_kws={"s": 3, "marker": "x", "alpha": 0.5}, 
#                 line_kws={"alpha": 1},
#                 palette=current_palette[:3],
                palette=white_palette,
                legend=None,
    logx=True,
#     logy=True
#     legend="off"
               )
axes = lm.axes
# axes[0,0].set_xlim(30,uu2[xx].max())
# axes[0,0].set_ylim(0,50)
# yscale="log"

lm.set(
    xscale="log",
# #     yscale="log"
)

current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
line1.set_color(line2.get_color())


# body
sc = sns.scatterplot(
    data=uu2[uu2["time type"] == "total"], x=xx, y="Time [s]", hue="mth", 
    ax=lm.ax, alpha=1.0, s=5
)
texts = lm.ax.legend_.get_texts()
leglines = lm.ax.legend_.get_lines()

# texts[0].set_text("short")
# texts.pop(0)
# texts.pop(0)

# plt.legend(lm.ax.legend_[3:6])

from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color=current_palette[0], lw=4),
                Line2D([0], [0], color=current_palette[1], lw=4),
                Line2D([0], [0], color=current_palette[2], lw=4)]
lm.ax.legend(
    custom_lines, 
    [tx.get_text()[:-3] for tx in texts[:3]],
#     ['Cold', 'Medium', 'Hot'],
)
# lg = lm.ax.legend()
# plt.savefig(op.join(latex_dir, "msgc_time_datasize_plot.pdf"), dpi=1000)


Out[41]:
<matplotlib.legend.Legend at 0x1e721dcce48>

In [42]:
xx = "log(relative object size)"
white_palette = [(1., 1., 1., 0.5)]
lm = sns.lmplot(
#     data=uu[uu["time type"] == "total"], 
    data=uu2, 
#     x="image voxel number", 
    x=xx,
    y="Time [s]", hue="mth", order=2, 
                scatter_kws={"s": 3, "marker": "x", "alpha": 0.5}, 
#                 line_kws={"alpha": 1},
#                 palette=current_palette[:3],
                palette=white_palette,
                legend=None,
#     legend="off"
               )
axes = lm.axes
axes[0,0].set_xlim(uu2[xx].min(),uu2[xx].max())
axes[0,0].set_ylim(0,50)

current_palette = sns.color_palette()
nlines = 3
n = 0
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 1
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
# line1.set_color(line2.get_color())

n = 2
lines = lm.ax.get_lines()
line1 = lines[n]
line2 = lines[n + nlines]
line1.set_linestyle("--")
line1.set_color(current_palette[n])
line2.set_color(current_palette[n])
line1.set_color(line2.get_color())


# body
sc = sns.scatterplot(
    data=uu2[uu2["time type"] == "total"], x=xx, y="Time [s]", hue="mth", 
    ax=lm.ax, alpha=1.0, s=5
)
texts = lm.ax.legend_.get_texts()
leglines = lm.ax.legend_.get_lines()

# texts[0].set_text("short")
# texts.pop(0)
# texts.pop(0)

# plt.legend(lm.ax.legend_[3:6])

from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color=current_palette[0], lw=4),
                Line2D([0], [0], color=current_palette[1], lw=4),
                Line2D([0], [0], color=current_palette[2], lw=4)]
lm.ax.legend(
    custom_lines, 
    [tx.get_text()[:-3] for tx in texts[:3]],
#     ['Cold', 'Medium', 'Hot'],
)
# lg = lm.ax.legend()
# plt.savefig(op.join(latex_dir, "msgc_time_datasize_plot.pdf"), dpi=1000)


Out[42]:
<matplotlib.legend.Legend at 0x1e721e6a438>

In [43]:
## Volume ration

Statistics


In [44]:
from scipy import stats



dfs_plus_describe = dfs_plus.describe()
display(dfs_plus_describe)

print("pokud je pvalue mensi nez zvolena hladina vyznamnosti (0.01=1%), je vsechno ok")
tt = stats.ttest_rel(dfs_plus.loc[dfs_plus["method"] == "ssgc"]['time'], dfs_plus.loc[dfs_plus["method"] == "msgc_lo2hi"]['time'])
# tt


_create_nlinks time block size data image size px data object size px data offset data radius data seedsz data size edge number error ... t5 t6 t7 t8 t9 time tlinks number image voxel number relative object size log(relative object size)
count 123.000000 123.0 1.230000e+02 123.000000 123.000000 123.0 123.0 123.000000 1.230000e+02 123.0 ... 82.000000 82.000000 82.000000 41.000000 82.000000 123.000000 1.230000e+02 1.230000e+02 123.000000 123.000000
mean 1.460365 10.0 6.292836e+06 20970.365854 0.040000 10.0 3.0 182.439024 1.006576e+07 0.0 ... 7.812621 10.439942 11.246214 22.396002 17.912613 23.084985 2.521442e+06 6.191968e+06 0.003519 -5.662245
std 1.356393 0.0 1.534372e+06 1790.517286 0.009144 0.0 0.0 14.795406 1.122510e+07 0.0 ... 5.555318 3.722275 3.146358 5.885085 6.381470 9.096514 2.820764e+06 1.517977e+06 0.000560 0.161019
min 0.001994 10.0 4.251366e+06 18263.000000 0.030000 10.0 3.0 161.000000 2.023006e+06 0.0 ... 1.636625 4.761268 6.134597 14.872235 9.103665 10.693441 5.013340e+05 4.173281e+06 0.002604 -5.950865
25% 0.003990 10.0 5.000040e+06 19462.000000 0.030000 10.0 3.0 170.000000 2.474007e+06 0.0 ... 2.511262 7.271526 8.897183 17.652762 13.006748 16.092971 6.133455e+05 4.913000e+06 0.003025 -5.800855
50% 1.253683 10.0 5.831820e+06 20699.000000 0.040000 10.0 3.0 179.000000 2.712393e+06 0.0 ... 6.784342 10.393687 11.245465 20.948951 16.427074 20.940974 6.725010e+05 5.735339e+06 0.003503 -5.654180
75% 2.356696 10.0 7.414680e+06 22437.000000 0.050000 10.0 3.0 194.000000 1.991244e+07 0.0 ... 12.635707 12.989767 13.256064 25.679296 20.968905 26.544525 5.000040e+06 7.301384e+06 0.003961 -5.531176
max 5.320740 10.0 9.260790e+06 24638.000000 0.050000 10.0 3.0 209.000000 3.691086e+07 0.0 ... 19.852917 20.329675 20.329675 36.605654 36.647542 62.877877 9.260790e+06 9.129329e+06 0.004473 -5.409593

8 rows × 53 columns

pokud je pvalue mensi nez zvolena hladina vyznamnosti (0.01=1%), je vsechno ok

In [45]:
ssgc_rows = dfs_plus[dfs_plus["method"].str.contains(labels[0])]
ssgc_hi2lo_rows = dfs_plus[dfs_plus["method"].str.contains(labels[1])]
ssgc_lo2hi_rows = dfs_plus[dfs_plus["method"].str.contains(labels[2])]
pp0 = stats.ttest_rel(ssgc_rows["time"], ssgc_hi2lo_rows["time"])
pp1 = stats.ttest_rel(ssgc_rows["time"], ssgc_lo2hi_rows["time"])
pp2 = stats.ttest_rel(ssgc_hi2lo_rows["time"], ssgc_lo2hi_rows["time"])

print("pokud je pvalue mensi nez zvolena hladina vyznamnosti (0.01=1%), je vsechno ok") 
#mozna staci i dvojnasobek hladiny vyzamnosi
print("statistic musi byt vetsi nez 0")
display(pp0)
display(pp1)
display(pp2)

float_to_latex_file(pp0.pvalue, op.join(latex_dir, "ttest_pvalue_ssgc_msgc_hi2lo.tex"))
float_to_latex_file(pp1.pvalue, op.join(latex_dir, "ttest_pvalue_ssgc_msgc_lo2hi.tex"))
float_to_latex_file(pp2.pvalue, op.join(latex_dir, "ttest_pvalue_msgc_hi2lo_msgc_lo2hi.tex"))


pokud je pvalue mensi nez zvolena hladina vyznamnosti (0.01=1%), je vsechno ok
statistic musi byt vetsi nez 0
Ttest_relResult(statistic=4.197809231906577, pvalue=0.00014586011815140358)
Ttest_relResult(statistic=9.241651101451831, pvalue=1.784740931504529e-11)
Ttest_relResult(statistic=13.712192453675819, pvalue=1.0458893651364037e-16)

In [46]:
# dfs_plus["method"] == "ssgc "

Time evaluation


In [47]:
table = pd.pivot_table(
    dfs, values=['gc time', 'time', "t2", "t3", "t3.1", "t3.2", "t3.3", "t4", "t5", "t6", "t7", "t8", "low level object voxels", "low level image voxels"], index=['method'], 
    aggfunc=np.mean
)
table


Out[47]:
gc time low level image voxels low level object voxels t2 t3 t3.1 t3.2 t3.3 t4 t5 t6 t7 t8 time
method
msgc_hi2lo 1.805255 2916.303797 13.525316 0.066423 0.068504 NaN NaN NaN 0.524395 1.112787 3.228815 4.565952 10.152585 12.210139
msgc_lo2hi 1.244942 2916.303797 11.601266 0.082775 0.084915 0.084915 0.106336 7.377492 7.377492 8.203736 8.340086 8.340092 NaN 9.866378
ssgc 8.874517 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 11.190694

In [48]:
table = pd.pivot_table(
    dfs, values=['gc time', 'time', "t graph 01", "t graph 10", "t graph 11", "t graph 13", "t graph 14", "low level image voxels", "low level object voxels"], index=['method'], 
    aggfunc=np.mean
)
table


Out[48]:
gc time low level image voxels low level object voxels t graph 01 t graph 10 t graph 11 t graph 13 t graph 14 time
method
msgc_hi2lo 1.805255 2916.303797 13.525316 NaN NaN NaN NaN NaN 12.210139
msgc_lo2hi 1.244942 2916.303797 11.601266 0.020815 0.021415 0.021497 7.213204 7.292571 9.866378
ssgc 8.874517 NaN NaN NaN NaN NaN NaN NaN 11.190694

Datasets


In [49]:
dfs_describe = dfs.describe()
display(dfs_describe)
dfs_plus_describe = dfs_plus.describe()
display(dfs_plus_describe)


_create_nlinks time block size data image size px data object size px data offset data radius data seedsz data size edge number error ... t5 t6 t7 t8 t9 time tlinks number image voxel number relative object size log(relative object size)
count 474.000000 474.0 4.740000e+02 474.000000 474.000000 474.0 474.0 474.000000 4.740000e+02 474.0 ... 316.000000 316.000000 316.000000 158.000000 316.000000 474.000000 4.740000e+02 4.740000e+02 474.000000 474.000000
mean 0.628041 10.0 2.677025e+06 14931.170886 0.040000 10.0 3.0 122.132911 4.853243e+06 0.0 ... 4.658261 5.784450 6.453022 10.152585 9.264756 11.089070 1.215826e+06 2.625507e+06 0.017022 -4.599966
std 0.926779 0.0 2.491407e+06 4517.543448 0.008428 0.0 0.0 46.685775 7.073115e+06 0.0 ... 4.614842 4.422958 4.168487 8.524849 6.765083 9.210574 1.776536e+06 2.456507e+06 0.020462 0.975121
min 0.000953 10.0 9.108000e+04 7984.000000 0.030000 10.0 3.0 44.000000 3.582460e+05 0.0 ... 0.032879 0.085778 0.335139 0.475728 0.475730 0.332147 9.108000e+04 8.518400e+04 0.002604 -5.950865
25% 0.003011 10.0 5.926200e+05 11207.000000 0.030000 10.0 3.0 83.000000 1.666590e+06 0.0 ... 0.684423 1.696689 2.785044 3.691631 4.201267 4.400461 4.155102e+05 5.717870e+05 0.004473 -5.409593
50% 0.215392 10.0 1.860744e+06 14455.500000 0.040000 10.0 3.0 122.000000 2.281190e+06 0.0 ... 2.540690 5.323267 5.858352 7.313445 8.370618 9.157033 5.653340e+05 1.815848e+06 0.008163 -4.808105
75% 0.857464 10.0 4.251366e+06 18669.000000 0.050000 10.0 3.0 161.000000 2.887670e+06 0.0 ... 8.456605 9.070142 9.351213 14.938057 12.258962 14.932854 7.170060e+05 4.173281e+06 0.019675 -3.928398
max 5.320740 10.0 9.260790e+06 24638.000000 0.050000 10.0 3.0 209.000000 3.691086e+07 0.0 ... 19.852917 20.329675 20.329675 36.605654 36.647542 62.877877 9.260790e+06 9.129329e+06 0.095816 -2.345324

8 rows × 53 columns

_create_nlinks time block size data image size px data object size px data offset data radius data seedsz data size edge number error ... t5 t6 t7 t8 t9 time tlinks number image voxel number relative object size log(relative object size)
count 123.000000 123.0 1.230000e+02 123.000000 123.000000 123.0 123.0 123.000000 1.230000e+02 123.0 ... 82.000000 82.000000 82.000000 41.000000 82.000000 123.000000 1.230000e+02 1.230000e+02 123.000000 123.000000
mean 1.460365 10.0 6.292836e+06 20970.365854 0.040000 10.0 3.0 182.439024 1.006576e+07 0.0 ... 7.812621 10.439942 11.246214 22.396002 17.912613 23.084985 2.521442e+06 6.191968e+06 0.003519 -5.662245
std 1.356393 0.0 1.534372e+06 1790.517286 0.009144 0.0 0.0 14.795406 1.122510e+07 0.0 ... 5.555318 3.722275 3.146358 5.885085 6.381470 9.096514 2.820764e+06 1.517977e+06 0.000560 0.161019
min 0.001994 10.0 4.251366e+06 18263.000000 0.030000 10.0 3.0 161.000000 2.023006e+06 0.0 ... 1.636625 4.761268 6.134597 14.872235 9.103665 10.693441 5.013340e+05 4.173281e+06 0.002604 -5.950865
25% 0.003990 10.0 5.000040e+06 19462.000000 0.030000 10.0 3.0 170.000000 2.474007e+06 0.0 ... 2.511262 7.271526 8.897183 17.652762 13.006748 16.092971 6.133455e+05 4.913000e+06 0.003025 -5.800855
50% 1.253683 10.0 5.831820e+06 20699.000000 0.040000 10.0 3.0 179.000000 2.712393e+06 0.0 ... 6.784342 10.393687 11.245465 20.948951 16.427074 20.940974 6.725010e+05 5.735339e+06 0.003503 -5.654180
75% 2.356696 10.0 7.414680e+06 22437.000000 0.050000 10.0 3.0 194.000000 1.991244e+07 0.0 ... 12.635707 12.989767 13.256064 25.679296 20.968905 26.544525 5.000040e+06 7.301384e+06 0.003961 -5.531176
max 5.320740 10.0 9.260790e+06 24638.000000 0.050000 10.0 3.0 209.000000 3.691086e+07 0.0 ... 19.852917 20.329675 20.329675 36.605654 36.647542 62.877877 9.260790e+06 9.129329e+06 0.004473 -5.409593

8 rows × 53 columns

Dataset size


In [50]:
dfs_plus_size = int(len(dfs_plus) / len(labels))
to_file(str(dfs_plus_size), op.join(latex_dir, "msgc_dataset_subset_size.tex"))

In [51]:
dfs2_size = len(dfs2[dfs2["method"] == labels[0]])
to_file(str(dfs_plus_size), op.join(latex_dir, "msgc_dataset_subset2_size.tex"))

In [52]:
# df[["GC total time", "MSGC total time", "GC time", "MSGC time"]]
df.keys()


Out[52]:
Index(['_create_nlinks time', 'block size', 'data image size px',
       'data object size px', 'data offset', 'data radius', 'data seedsz',
       'data size', 'edge number', 'error', 'experiment',
       'experiment iteration start time', 'gc time', 'low level image voxels',
       'low level object voxels', 'machine hostname', 'method',
       'nlinks number', 't graph 01', 't graph 10', 't graph 11', 't graph 13',
       't graph 14', 't graph high', 't graph low', 't split 01', 't split 02',
       't split 03', 't split 04', 't split 05', 't split 06', 't split 07',
       't split 08', 't split 081', 't split 082', 't split 0821',
       't split 09', 't split 10', 't1', 't10', 't2', 't3', 't3.1', 't3.2',
       't3.3', 't4', 't5', 't6', 't7', 't8', 't9', 'time', 'tlinks number',
       'image voxel number', 'relative object size',
       'log(relative object size)'],
      dtype='object')

In [57]:
table = pd.pivot_table(
    df, values=['gc time', 'time', "error"],
    index=['method'], 
    aggfunc={'gc time': np.mean, 'time': [min, max, np.mean], "error":[min, max, np.mean]}
)
table


Out[57]:
error gc time time
max mean min mean max mean min
method
msgc_hi2lo 2.0 0.038462 0.0 3.324061 70.414725 19.704389 0.790861
msgc_lo2hi 2.0 0.038462 0.0 2.242018 412.448155 38.454763 0.825794
ssgc 2.0 0.038462 0.0 15.911705 76.873458 19.459985 0.332147

In [54]:
dfs.keys()


Out[54]:
Index(['_create_nlinks time', 'block size', 'data image size px',
       'data object size px', 'data offset', 'data radius', 'data seedsz',
       'data size', 'edge number', 'error', 'experiment',
       'experiment iteration start time', 'gc time', 'low level image voxels',
       'low level object voxels', 'machine hostname', 'method',
       'nlinks number', 't graph 01', 't graph 10', 't graph 11', 't graph 13',
       't graph 14', 't graph high', 't graph low', 't split 01', 't split 02',
       't split 03', 't split 04', 't split 05', 't split 06', 't split 07',
       't split 08', 't split 081', 't split 082', 't split 0821',
       't split 09', 't split 10', 't1', 't10', 't2', 't3', 't3.1', 't3.2',
       't3.3', 't4', 't5', 't6', 't7', 't8', 't9', 'time', 'tlinks number',
       'image voxel number', 'relative object size',
       'log(relative object size)'],
      dtype='object')

In [55]:
df_mn = df[["GC total time", "MSGC total time", "GC time", "MSGC time"]].describe()
display(df_mn)
to_latex_file(df_mn, "../includes/exp062-all2data_size.tex")

dfs_mn = dfs[["GC total time", "MSGC total time", "GC time", "MSGC time"]].describe()
display(dfs_mn)
to_latex_file(dfs_mn, "../includes/exp062-selection2data_size.tex")


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-55-e7e71b79ce86> in <module>
----> 1 df_mn = df[["GC total time", "MSGC total time", "GC time", "MSGC time"]].describe()
      2 display(df_mn)
      3 to_latex_file(df_mn, "../includes/exp062-all2data_size.tex")
      4 
      5 dfs_mn = dfs[["GC total time", "MSGC total time", "GC time", "MSGC time"]].describe()

~\Miniconda3\envs\lisa\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2932                 key = list(key)
   2933             indexer = self.loc._convert_to_indexer(key, axis=1,
-> 2934                                                    raise_missing=True)
   2935 
   2936         # take() does not accept boolean indexers

~\Miniconda3\envs\lisa\lib\site-packages\pandas\core\indexing.py in _convert_to_indexer(self, obj, axis, is_setter, raise_missing)
   1352                 kwargs = {'raise_missing': True if is_setter else
   1353                           raise_missing}
-> 1354                 return self._get_listlike_indexer(obj, axis, **kwargs)[1]
   1355         else:
   1356             try:

~\Miniconda3\envs\lisa\lib\site-packages\pandas\core\indexing.py in _get_listlike_indexer(self, key, axis, raise_missing)
   1159         self._validate_read_indexer(keyarr, indexer,
   1160                                     o._get_axis_number(axis),
-> 1161                                     raise_missing=raise_missing)
   1162         return keyarr, indexer
   1163 

~\Miniconda3\envs\lisa\lib\site-packages\pandas\core\indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing)
   1244                 raise KeyError(
   1245                     u"None of [{key}] are in the [{axis}]".format(
-> 1246                         key=key, axis=self.obj._get_axis_name(axis)))
   1247 
   1248             # We (temporarily) allow for some missing keys with .loc, except in

KeyError: "None of [Index(['GC total time', 'MSGC total time', 'GC time', 'MSGC time'], dtype='object')] are in the [columns]"

In [ ]:
dfs_plus[["method"]]