In [24]:
%pylab inline
import sys
import os.path as op
import shutil
# sys.path.insert(0, "/home/mjirik/projects/pyseg_base/")
sys.path.insert(0, op.abspath("../"))
import scipy
import time
import pandas as pd
import platform
import itertools
from pathlib import Path
import lisa
from imcut import pycut
import sed3
latex_dir = Path("../../papers/cmbbeiv19/tmp/")
fname = "exp062-multiscale4.csv"
fnamenew = "msgc_experiment4.csv"
rnd_seed=1
In [25]:
%pwd
Out[25]:
In [26]:
# block size bylo 10
segparams0 = {
'method':'graphcut',
# 'method':'multiscale_graphcut',
'use_boundary_penalties': True,
'boundary_dilatation_distance': 2,
'boundary_penalties_weight': 1,
'block_size': 10,
'tile_zoom_constant': 1,
"pairwise_alpha_per_square_unit": 45,
'return_only_object_with_seeds': True,
}
segparams1 = {
# 'method':'graphcut',
'method':'multiscale_graphcut_hi2lo',
'use_boundary_penalties': True,
'boundary_dilatation_distance': 2,
'boundary_penalties_weight': 1,
'block_size': 10,
'tile_zoom_constant': 1,
"pairwise_alpha_per_square_unit": 45,
'return_only_object_with_seeds': True,
}
segparams2 = {
# 'method':'graphcut',
'method':'multiscale_graphcut_lo2hi',
'use_boundary_penalties': True,
'boundary_dilatation_distance': 2,
'boundary_penalties_weight': 1,
'block_size': 10,
'tile_zoom_constant': 1,
"pairwise_alpha_per_square_unit": 45,
'return_only_object_with_seeds': True,
}
labels = [
"ssgc ",
"msgc_hi2lo ",
"msgc_lo2hi ",
]
segparamsTri = [segparams0, segparams1, segparams2]
In [27]:
def make_data(sz=32, offset=0, radius=7, seedsz=3):
#seedsz= int(sz/10)
space=2
rradius = radius / sz
# rseedsz = seedsz / sz
seeds = np.zeros([sz, sz+1, sz+2], dtype=np.int8)
center = [
0.3 + offset,
0.3 + offset,
0.4 + offset
]
seeds[
int(center[0] * sz),
int(center[1] * sz):int((center[1] + (0.2 * rradius) ) * sz),
int(center[2] * sz):int((center[2] + (0.15 * rradius) ) * sz),
] = 1
seeds[
int((center[0] + (0.2 * rradius)) * sz),
int((center[1] + (0.1 * rradius)) * sz):int((center[1] + (0.35 * rradius) + (0.03 * seedsz)) * sz),
int((center[2] + (0.02 * rradius)) * sz):int((center[2] + (0.15 * rradius) + (0.03 * seedsz)) * sz),
] = 1
img = np.ones([sz, sz+1, sz+2])
img = img - seeds
seeds[
int((center[0] - (rradius * 0.5)) * sz):int((center[0] + (rradius * 0.5 ))* sz),
# int(3 + (seedsz * rradius * 0.5 * sz)),
2:int(3 + (seedsz * 0.03 * sz)),
2:int(3 + (seedsz * 0.03 * sz))
] = 2
img = scipy.ndimage.morphology.distance_transform_edt(img)
segm = img < radius
img = (100 * segm + 80 * np.random.random(img.shape)).astype(np.uint8)
return img, segm, seeds
def make_data_old(sz=32, offset=0, radius=7, seedsz=3):
#seedsz= int(sz/10)
space=2
seeds = np.zeros([sz, sz+1, sz+2], dtype=np.int8)
xmin = radius + seedsz + offset + 2
ymin = radius + seedsz + offset + 6
seeds[ offset + 12, xmin + 3:xmin + 7 + seedsz, ymin:ymin+2] = 1
seeds[ offset + 20, xmin + 7:xmin + 12 + seedsz, ymin+5:ymin+7] = 1
img = np.ones([sz, sz+1, sz+2])
img = img - seeds
seeds[
2:10 + seedsz,
2:9+ seedsz,
2:3+ seedsz] = 2
img = scipy.ndimage.morphology.distance_transform_edt(img)
segm = img < radius
img = (100 * segm + 80 * np.random.random(img.shape)).astype(np.uint8)
return img, segm, seeds
In [31]:
def to_latex_file(df, fn):
with open(fn, "w") as f:
f.write(df.to_latex())
def latex_float(f, precision=4):
float_str = "{0:." + str(int(precision)) + "g}"
float_str = float_str.format(f)
if "e" in float_str:
base, exponent = float_str.split("e")
return r"{0} \times 10^{{{1}}}".format(base, int(exponent))
else:
return float_str
def float_to_latex_file(fl, fn, precision=4):
string = latex_float(fl, precision=precision)
with open(fn, "w") as f:
f.write(string)
def num2latex(num, filename=None, precision=4):
if type(num) is str:
float_str = num
else:
float_str = "{0:." + str(int(precision)) + "g}"
float_str = float_str.format(num)
if float_str[:4] == r"\num":
pass
else:
float_str = "\\num{" + float_str + "}"
if filename is not None:
with open(filename, "w") as f:
f.write(float_str)
return float_str
def to_file(text, fn):
with open(fn, "w") as f:
f.write(text)
In [32]:
# better melt
from pandas.core.dtypes.common import is_list_like
from pandas.core.frame import DataFrame
from pandas.core.index import MultiIndex
from pandas import compat
from IPython.display import display
from pandas.core.reshape.concat import concat
import re
from pandas.core.tools.numeric import to_numeric
from pandas.util._decorators import Appender
from pandas.core.frame import _shared_docs
import numpy as np
import pandas as pd
import pandas.util.testing as tm
def _melt(frame, id_vars=None, value_vars=None, var_name=None,
value_name='value', col_level=None, stubnames=False,
suffix=r'\d+', sep='', extra_group=0, var_end=None):
# TODO: what about the existing index?
def check_vars(frame, var, var_string):
for v in var:
if num_col_levels > 1:
if not isinstance(v, tuple):
raise ValueError('{} must be a list of tuples'
' when columns are a MultiIndex'
.format(var_string))
elif len(v) != num_col_levels:
raise ValueError('all tuples in {} must be length {}'
.format(var_string,
frame.columns.nlevels))
else:
if is_list_like(v) and len(v) > 1:
raise ValueError('DataFrame has only a single level of '
'columns. {} is not a column'.format(v))
if len(col_level) == 0:
num_col_levels = frame.columns.nlevels
else:
num_col_levels = len(col_level)
check_vars(frame, id_vars, 'id_vars')
check_vars(frame, value_vars, 'value_vars')
if var_name != [] and len(var_name) != num_col_levels:
raise ValueError('Length of var_name must match effective number of '
'column levels.')
if col_level != []:
droplevels = list(range(frame.columns.nlevels))
for level in col_level:
if isinstance(level, int):
droplevels.remove(level)
else:
droplevels.remove(frame.columns.names.index(level))
if droplevels != []:
frame = frame.copy()
frame.columns = frame.columns.droplevel(droplevels)
if stubnames and isinstance(frame.columns, MultiIndex):
raise ValueError('Stubnames only work with single-index DataFrames')
for iv in id_vars:
if iv not in frame.columns:
raise KeyError('{} not in columns'.format(iv))
if value_vars != []:
for vv in value_vars:
if vv not in frame.columns:
raise KeyError('{} not in columns'.format(vv))
if var_name == []:
names = list(frame.columns.names)
if len(names) == 1:
if names[0] is None:
var_name.append('variable')
else:
var_name.append(names[0])
elif names.count(None) == 1:
names[names.index(None)] = 'variable'
var_name = names
else:
missing_name_count = 0
for name in names:
if name is None:
var_name.append('variable_{}'.format(missing_name_count))
missing_name_count += 1
else:
var_name.append(name)
if var_end is not None:
var_name = [vn + '_' + str(var_end) for vn in var_name]
N = len(frame)
non_id_ilocs = []
if value_vars != []:
for v in value_vars:
for i, v1 in enumerate(frame.columns):
if v == v1:
non_id_ilocs.append(i)
else:
if id_vars == []:
non_id_ilocs = list(range(frame.shape[1]))
else:
for i, v in enumerate(frame.columns):
if v not in id_vars:
non_id_ilocs.append(i)
K = len(non_id_ilocs)
mdata = {}
mcolumns = []
for col in id_vars:
pandas_obj = frame[col]
if isinstance(pandas_obj, DataFrame):
for i in range(pandas_obj.shape[1]):
col_name = col + '_id_' + str(i)
mdata[col_name] = np.tile(pandas_obj.iloc[:, i].values, K + extra_group)
mcolumns.append(col_name)
else:
mdata[col] = np.tile(pandas_obj, K + extra_group)
mcolumns.append(col)
values = np.concatenate([frame.iloc[:, i] for i in non_id_ilocs])
if extra_group > 0:
values = np.concatenate((values, np.full([N * extra_group], np.nan)))
mdata[value_name[0]] = values
for i, col in enumerate(var_name):
values = frame.columns[non_id_ilocs]._get_level_values(i)
if stubnames:
regex = '^{0}{1}'.format(re.escape(value_name[0]), re.escape(sep))
values = to_numeric(values.str.replace(regex, ''), errors='ignore')
if isinstance(values, MultiIndex):
# asanyarray will keep the columns as an Index
values = np.asanyarray(values).repeat(N)
else:
data_list = []
for v in values.tolist():
data_list.extend([v] * N)
values = data_list
if extra_group > 0:
values = np.concatenate((values, np.full([N * extra_group], np.nan)))
mdata[col] = values
mcolumns += var_name + value_name
return mdata, mcolumns
@Appender(_shared_docs['melt'] %
dict(caller='pd.melt(df, ',
versionadded="",
other='DataFrame.melt'))
def melt(frame, id_vars=None, value_vars=None, var_name=None,
value_name='value', col_level=None, stubnames=False,
suffix=r'\d+', sep=''):
def convert_to_list(val):
if val is None:
return []
elif not is_list_like(val):
return [val]
else:
return list(val)
def get_var_names(df, stub, sep, suffix):
regex = '^{0}{1}{2}$'.format(re.escape(stub), re.escape(sep), suffix)
col_return = [col for col in df.columns if re.match(regex, col)]
if col_return == []:
raise ValueError('No stubname {}'.format(stub))
return col_return
id_vars = convert_to_list(id_vars)
value_vars = convert_to_list(value_vars)
var_name = convert_to_list(var_name)
value_name = convert_to_list(value_name)
col_level = convert_to_list(col_level)
if stubnames:
if value_vars == []:
raise ValueError('Must provide stubnames as a list to value_vars')
value_name = value_vars
value_vars = [get_var_names(frame, stub, sep, suffix)
for stub in value_vars]
if var_name == []:
var_name = ['variable_' + v for v in value_name]
if value_vars != [] and isinstance(value_vars[0], list):
if var_name != []:
if len(value_vars) != len(var_name):
raise ValueError('Number of inner lists of value_vars must '
'equal length of var_name '
'{} != {}'.format(len(value_vars),
len(var_name)))
else:
var_name = [[]] * len(value_vars)
if len(value_name) > 1:
if len(value_vars) != len(value_name):
raise ValueError('Number of inner lists of value_vars must '
'equal length of value_name '
'{} != {}'.format(len(value_vars),
len(value_name)))
elif not stubnames:
value_name = [value_name[0] + '_' + str(i) for i in range(len(value_vars))]
value_vars_length = []
for vv in value_vars:
count = 0
for col in frame.columns.values:
if col in vv:
count += 1
value_vars_length.append(count)
max_group_len = max(value_vars_length)
mdata_list = []
mcolumns_list = []
vars_zipped = zip(value_vars, var_name, value_name, value_vars_length)
for i, (val_v, var_n, val_n, vvl) in enumerate(vars_zipped):
var_n = convert_to_list(var_n)
val_n = convert_to_list(val_n)
id_vars_ = [] if i > 0 else id_vars
var_end = i if var_n == [] else None
md, mc = _melt(frame, id_vars=id_vars_, value_vars=val_v,
var_name=var_n, value_name=val_n,
col_level=col_level, stubnames=stubnames,
suffix=suffix, sep=sep,
extra_group=max_group_len - vvl,
var_end=var_end)
mdata_list.append(md)
mcolumns_list.append(mc)
mdata = {}
for d in mdata_list:
mdata.update(d)
mcolumns = [e for lst in mcolumns_list for e in lst]
return DataFrame(mdata, columns=mcolumns)
else:
mdata, mcolumns = _melt(frame, id_vars=id_vars, value_vars=value_vars,
var_name=var_name, value_name=value_name,
col_level=col_level, stubnames=stubnames,
suffix=suffix, sep=sep)
return DataFrame(mdata, columns=mcolumns)
In [33]:
def process_gc_stats(stats1, prefix=None):
if prefix is None:
prefix = ""
outstats = {}
for key in stats1:
outstats[prefix + key] = stats1[key]
outstats[prefix + "nlinks number"] = np.sum(np.asarray(outstats[prefix + "nlinks shape"]), axis=0)[0]
outstats[prefix + "tlinks number"] = np.sum(np.asarray(outstats[prefix + "tlinks shape"]), axis=0)[0]
outstats.pop(prefix + "tlinks shape")
outstats.pop(prefix + "nlinks shape")
outstats[prefix + "edge number"] = outstats[prefix + "nlinks number"] + outstats[prefix + "tlinks number"]
return outstats
def merge_stats(stats0, stats1, stats2, labels=None):
if labels is None:
labels = [""] * 3
stats0 = process_gc_stats(stats0, labels[0])
stats1 = process_gc_stats(stats1, labels[1])
stats2 = process_gc_stats(stats2, labels[2])
stats = {}
stats.update(stats0)
stats.update(stats1)
stats.update(stats2)
return stats
def run_gc_with_defined_setup(img, segparams, seeds, true_seg, voxelsize_mm, experiment_label=""):
start = time.time()
gc = pycut.ImageGraphCut(img, segparams=segparams)
gc.set_seeds(seeds)
gc.run()
sg1 = gc.segmentation
sg1 = (sg1 == 0).astype(np.int)
stats1 = gc.stats
elapsed1 = (time.time() - start)
err1 = np.sum(np.abs(true_seg - sg1))
stats1["time"] = elapsed1
stats1["error"] = err1
stats1["experiment"] = experiment_label
stats2 = lisa.volumetry_evaluation.compare_volumes(sg1, true_seg, voxelsize_mm)
stats1.update(stats2)
return stats1
# def run_gc_with_defined_setup(img, segparams):
# start = time.time()
# gc = pycut.ImageGraphCut(img, segparams=segparams)
# gc.set_seeds(seeds)
# gc.run()
# sg1 = gc.segmentation
# stats1 = gc.stats
# elapsed1 = (time.time() - start)
# err1 = np.sum(np.abs(seg - (1 - sg1)))
# stats1["time"] = elapsed1
# stats1["error"] = err1
# return stats1
def run_gc_3_times(data_params, segparamsTri, experiment_label, i, df, dfnew):
start = time.time()
voxelsize_mm = [1., 1., 1.]
img, seg, seeds = make_data(data_params[0], data_params[1], data_params[2], data_params[3])
stats0 = run_gc_with_defined_setup(img, segparamsTri[0], seeds, seg, voxelsize_mm, experiment_label=experiment_label)
stats1 = run_gc_with_defined_setup(img, segparamsTri[1], seeds, seg, voxelsize_mm, experiment_label=experiment_label)
stats2 = run_gc_with_defined_setup(img, segparamsTri[2], seeds, seg, voxelsize_mm, experiment_label=experiment_label)
stats = merge_stats(stats0, stats1, stats2, labels)
stats = add_data_and_algoritm_info(stats, data_params, segparams0, start, seg)
dfi = pd.DataFrame(stats, index=[i])
# display(df)
df = df.append(dfi, sort=True)
df.to_csv(fname, index=False)
dfinew = add_data_seaborn(stats0, data_params, segparamsTri[0], start, seg, i, labels[0])
dfnew = dfnew.append(dfinew, sort=True)
dfinew = add_data_seaborn(stats1, data_params, segparamsTri[1], start, seg, i, labels[1])
dfnew = dfnew.append(dfinew, sort=True)
dfinew = add_data_seaborn(stats2, data_params, segparamsTri[2], start, seg, i, labels[2])
dfnew = dfnew.append(dfinew, sort=True)
dfnew.to_csv(fnamenew, index=False)
return df, dfnew
def add_data_and_algoritm_info(stats, data_params, segparams, start, true_seg):
machine_hostname = platform.node()
# stats['msgc time'] = elapsed1
# stats['normal time'] = elapsed2
stats['data size'] = data_params[0]
stats['data offset'] = data_params[1]
stats['data radius'] = data_params[2]
stats["block size"] = segparams["block_size"]
stats["data seedsz"] = data_params[3]
stats["data image size px"] = np.prod(true_seg.shape)
stats["data object size px"] = np.sum(true_seg > 0)
# stats["GC error"] = err2
# stats["MSGC error"] = err1
stats['machine hostname'] = machine_hostname
stats['experiment iteration start time'] = start
return stats
def add_data_seaborn(stats, data_params, segparams, start, true_seg, i, label):
stats = process_gc_stats(stats, "")
stats = add_data_and_algoritm_info(stats, data_params, segparams, start, true_seg)
stats["method"] = label
dfinew = pd.DataFrame(stats, index=[i*3 + 0])
#dfnew = dfnew.append(dfinew, sort=True)
return dfinew