In [24]:
%pylab inline

import sys
import os.path as op
import shutil
# sys.path.insert(0, "/home/mjirik/projects/pyseg_base/")
sys.path.insert(0, op.abspath("../"))
import scipy
import time
import pandas as pd
import platform
import itertools
from pathlib import Path
import lisa
from imcut import pycut
import sed3

latex_dir = Path("../../papers/cmbbeiv19/tmp/")


fname = "exp062-multiscale4.csv"
fnamenew = "msgc_experiment4.csv"


rnd_seed=1


Populating the interactive namespace from numpy and matplotlib
C:\Users\Jirik\Miniconda3\envs\lisa\lib\site-packages\IPython\core\magics\pylab.py:160: UserWarning: pylab import has clobbered these variables: ['copy']
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"

In [25]:
%pwd


Out[25]:
'C:\\Users\\Jirik\\projects\\imcut\\examples'

Methods setup


In [26]:
# block size bylo 10
segparams0 = {
    'method':'graphcut',
#     'method':'multiscale_graphcut',
    'use_boundary_penalties': True,
    'boundary_dilatation_distance': 2,
    'boundary_penalties_weight': 1,
    'block_size': 10,
    'tile_zoom_constant': 1,
    "pairwise_alpha_per_square_unit": 45,
    'return_only_object_with_seeds': True,
    }

segparams1 = {
    # 'method':'graphcut',
    'method':'multiscale_graphcut_hi2lo',
    'use_boundary_penalties': True,
    'boundary_dilatation_distance': 2,
    'boundary_penalties_weight': 1,
    'block_size': 10,
    'tile_zoom_constant': 1,
    "pairwise_alpha_per_square_unit": 45,
    'return_only_object_with_seeds': True,
    }

segparams2 = {
    # 'method':'graphcut',
    'method':'multiscale_graphcut_lo2hi',
    'use_boundary_penalties': True,
    'boundary_dilatation_distance': 2,
    'boundary_penalties_weight': 1,
    'block_size': 10,
    'tile_zoom_constant': 1,
    "pairwise_alpha_per_square_unit": 45,
    'return_only_object_with_seeds': True,
    }


labels = [
    "ssgc ",
    "msgc_hi2lo ",
    "msgc_lo2hi ",
]

segparamsTri = [segparams0, segparams1, segparams2]

In [27]:
def make_data(sz=32, offset=0, radius=7, seedsz=3):
    #seedsz= int(sz/10)
    space=2
    rradius = radius / sz
#     rseedsz = seedsz / sz
    seeds = np.zeros([sz, sz+1, sz+2], dtype=np.int8)
    center = [
        0.3 + offset, 
        0.3 + offset, 
        0.4 + offset
    ]
    seeds[
        int(center[0] * sz), 
        int(center[1] * sz):int((center[1] + (0.2 * rradius) ) * sz), 
        int(center[2] * sz):int((center[2] + (0.15 * rradius) ) * sz), 
    ] = 1
    seeds[
        int((center[0] + (0.2 * rradius)) * sz), 
        int((center[1] + (0.1 * rradius)) * sz):int((center[1] + (0.35 * rradius) + (0.03 * seedsz)) * sz), 
        int((center[2] + (0.02 * rradius)) * sz):int((center[2] + (0.15 * rradius) + (0.03 * seedsz)) * sz), 
    ] = 1
    img = np.ones([sz, sz+1, sz+2])
    img = img - seeds


    seeds[
        int((center[0] - (rradius * 0.5)) * sz):int((center[0] + (rradius * 0.5 ))* sz),
#         int(3 + (seedsz * rradius * 0.5 * sz)), 
        2:int(3 + (seedsz * 0.03 * sz)), 
        2:int(3 + (seedsz * 0.03 * sz))
    ] = 2
    img = scipy.ndimage.morphology.distance_transform_edt(img)
    segm = img < radius
    img = (100 * segm + 80 * np.random.random(img.shape)).astype(np.uint8)
    return img, segm, seeds

def make_data_old(sz=32, offset=0, radius=7, seedsz=3):
    #seedsz= int(sz/10)
    space=2
    seeds = np.zeros([sz, sz+1, sz+2], dtype=np.int8)
    xmin = radius + seedsz + offset + 2
    ymin = radius + seedsz + offset + 6
    seeds[ offset + 12,  xmin + 3:xmin + 7 + seedsz, ymin:ymin+2] = 1
    seeds[ offset + 20, xmin + 7:xmin + 12 + seedsz, ymin+5:ymin+7] = 1
    img = np.ones([sz, sz+1, sz+2])
    img = img - seeds
    seeds[
        2:10 + seedsz, 
        2:9+ seedsz, 
        2:3+ seedsz] = 2
    img = scipy.ndimage.morphology.distance_transform_edt(img)
    segm = img < radius
    img = (100 * segm + 80 * np.random.random(img.shape)).astype(np.uint8)
    return img, segm, seeds

LaTeX export functions


In [31]:
def to_latex_file(df, fn):
    with open(fn, "w") as f:
        f.write(df.to_latex())
        
def latex_float(f, precision=4):
    float_str = "{0:." + str(int(precision)) + "g}"
    float_str = float_str.format(f)
    if "e" in float_str:
        base, exponent = float_str.split("e")
        return r"{0} \times 10^{{{1}}}".format(base, int(exponent))
    else:
        return float_str
    
def float_to_latex_file(fl, fn, precision=4):
    string = latex_float(fl, precision=precision)
    with open(fn, "w") as f:
        f.write(string)

def num2latex(num, filename=None, precision=4):
    if type(num) is str:
        float_str = num
    else:
        float_str = "{0:." + str(int(precision)) + "g}"
        float_str = float_str.format(num)
        
    if float_str[:4] == r"\num":
        pass
    else:
        float_str = "\\num{" + float_str + "}" 
    if filename is not None:
        with open(filename, "w") as f:
            f.write(float_str)
    return float_str

def to_file(text, fn):
    with open(fn, "w") as f:
        f.write(text)

Melt


In [32]:
# better melt
from pandas.core.dtypes.common import is_list_like
from pandas.core.frame import DataFrame
from pandas.core.index import MultiIndex
from pandas import compat
from IPython.display import display
from pandas.core.reshape.concat import concat
import re
from pandas.core.tools.numeric import to_numeric
from pandas.util._decorators import Appender
from pandas.core.frame import _shared_docs
import numpy as np
import pandas as pd
import pandas.util.testing as tm


def _melt(frame, id_vars=None, value_vars=None, var_name=None,
          value_name='value', col_level=None, stubnames=False,
          suffix=r'\d+', sep='', extra_group=0, var_end=None):
    # TODO: what about the existing index?

    def check_vars(frame, var, var_string):
        for v in var:
            if num_col_levels > 1:
                if not isinstance(v, tuple):
                    raise ValueError('{} must be a list of tuples'
                                     ' when columns are a MultiIndex'
                                     .format(var_string))
                elif len(v) != num_col_levels:
                    raise ValueError('all tuples in {} must be length {}'
                                     .format(var_string,
                                             frame.columns.nlevels))
            else:
                if is_list_like(v) and len(v) > 1:
                    raise ValueError('DataFrame has only a single level of '
                                     'columns. {} is not a column'.format(v))

    if len(col_level) == 0:
        num_col_levels = frame.columns.nlevels
    else:
        num_col_levels = len(col_level)

    check_vars(frame, id_vars, 'id_vars')
    check_vars(frame, value_vars, 'value_vars')

    if var_name != [] and len(var_name) != num_col_levels:
        raise ValueError('Length of var_name must match effective number of '
                         'column levels.')
    
    if col_level != []:
        droplevels = list(range(frame.columns.nlevels))
        for level in col_level:
            if isinstance(level, int):
                droplevels.remove(level)
            else:
                droplevels.remove(frame.columns.names.index(level))
        if droplevels != []:
            frame = frame.copy()
            frame.columns = frame.columns.droplevel(droplevels)

    if stubnames and isinstance(frame.columns, MultiIndex):
        raise ValueError('Stubnames only work with single-index DataFrames')
        
    for iv in id_vars:
        if iv not in frame.columns:
            raise KeyError('{} not in columns'.format(iv))

    if value_vars != []:
        for vv in value_vars:
            if vv not in frame.columns:
                raise KeyError('{} not in columns'.format(vv))
                
    if var_name == []:
        names = list(frame.columns.names)
        if len(names) == 1:
            if names[0] is None:
                var_name.append('variable')
            else:
                var_name.append(names[0])
        elif names.count(None) == 1:
            names[names.index(None)] = 'variable'
            var_name = names
        else:
            missing_name_count = 0
            for name in names:
                if name is None:
                    var_name.append('variable_{}'.format(missing_name_count))
                    missing_name_count += 1
                else:
                    var_name.append(name)
    if var_end is not None:
        var_name = [vn + '_' + str(var_end) for vn in var_name]
    
    N = len(frame)
    
    non_id_ilocs = []
    if value_vars != []:
        for v in value_vars:
            for i, v1 in enumerate(frame.columns):
                if v == v1:
                    non_id_ilocs.append(i)
    else:
        if id_vars == []:
            non_id_ilocs = list(range(frame.shape[1]))
        else:
            for i, v in enumerate(frame.columns):
                if v not in id_vars:
                    non_id_ilocs.append(i)
                        
    K = len(non_id_ilocs)

    mdata = {}
    mcolumns = []
    for col in id_vars:
        pandas_obj = frame[col]
        if isinstance(pandas_obj, DataFrame):
            for i in range(pandas_obj.shape[1]):
                col_name = col + '_id_' + str(i)
                mdata[col_name] = np.tile(pandas_obj.iloc[:, i].values, K + extra_group)
                mcolumns.append(col_name)
        else:
            mdata[col] = np.tile(pandas_obj, K + extra_group)
            mcolumns.append(col)

    values = np.concatenate([frame.iloc[:, i] for i in non_id_ilocs])
    if extra_group > 0:
        values = np.concatenate((values, np.full([N * extra_group], np.nan)))
    mdata[value_name[0]] = values
    
    for i, col in enumerate(var_name):
        values = frame.columns[non_id_ilocs]._get_level_values(i)
        if stubnames:
            regex = '^{0}{1}'.format(re.escape(value_name[0]), re.escape(sep))
            values = to_numeric(values.str.replace(regex, ''), errors='ignore')
        if isinstance(values, MultiIndex):
            # asanyarray will keep the columns as an Index
            values = np.asanyarray(values).repeat(N)
        else: 
            data_list = []
            for v in values.tolist():
                data_list.extend([v] * N)
            values = data_list
        if extra_group > 0:
            values = np.concatenate((values, np.full([N * extra_group], np.nan)))
        mdata[col] = values
    mcolumns += var_name + value_name
    
    return mdata, mcolumns


@Appender(_shared_docs['melt'] %
          dict(caller='pd.melt(df, ',
               versionadded="",
               other='DataFrame.melt'))
def melt(frame, id_vars=None, value_vars=None, var_name=None,
         value_name='value', col_level=None, stubnames=False,
         suffix=r'\d+', sep=''):
    def convert_to_list(val):
        if val is None:
            return []
        elif not is_list_like(val):
            return [val]
        else:
            return list(val)

    def get_var_names(df, stub, sep, suffix):
        regex = '^{0}{1}{2}$'.format(re.escape(stub), re.escape(sep), suffix)
        col_return = [col for col in df.columns if re.match(regex, col)]
        if col_return == []:
            raise ValueError('No stubname {}'.format(stub))
        return col_return

    id_vars = convert_to_list(id_vars)
    value_vars = convert_to_list(value_vars)
    var_name = convert_to_list(var_name)
    value_name = convert_to_list(value_name)
    col_level = convert_to_list(col_level)

    if stubnames:
        if value_vars == []:
            raise ValueError('Must provide stubnames as a list to value_vars')
        value_name = value_vars
        value_vars = [get_var_names(frame, stub, sep, suffix)
                      for stub in value_vars]
        if var_name == []:
            var_name = ['variable_' + v for v in value_name]

    if value_vars != [] and isinstance(value_vars[0], list):
        if var_name != []:
            if len(value_vars) != len(var_name):
                raise ValueError('Number of inner lists of value_vars must '
                                 'equal length of var_name '
                                 '{} != {}'.format(len(value_vars),
                                                   len(var_name)))
        else:
            var_name = [[]] * len(value_vars)

        if len(value_name) > 1:
            if len(value_vars) != len(value_name):
                raise ValueError('Number of inner lists of value_vars must '
                                 'equal length of value_name '
                                 '{} != {}'.format(len(value_vars),
                                                   len(value_name)))
        elif not stubnames:
            value_name = [value_name[0] + '_' + str(i) for i in range(len(value_vars))]

        value_vars_length = []
        for vv in value_vars:
            count = 0
            for col in frame.columns.values:
                if col in vv:
                    count += 1
            value_vars_length.append(count)
        max_group_len = max(value_vars_length)  

        mdata_list = []
        mcolumns_list = []
        vars_zipped = zip(value_vars, var_name, value_name, value_vars_length)
        for i, (val_v, var_n, val_n, vvl) in enumerate(vars_zipped):
            var_n = convert_to_list(var_n)
            val_n = convert_to_list(val_n)

            id_vars_ = [] if i > 0 else id_vars
            var_end = i if var_n == [] else None
            
            md, mc = _melt(frame, id_vars=id_vars_, value_vars=val_v,
                       var_name=var_n, value_name=val_n,
                       col_level=col_level, stubnames=stubnames,
                       suffix=suffix, sep=sep, 
                       extra_group=max_group_len - vvl,
                       var_end=var_end)

            mdata_list.append(md)
            mcolumns_list.append(mc)
            
        mdata = {}
        for d in mdata_list:
            mdata.update(d)
            
        mcolumns = [e for lst in mcolumns_list for e in lst]
        return DataFrame(mdata, columns=mcolumns)

    else:   
        mdata, mcolumns =  _melt(frame, id_vars=id_vars, value_vars=value_vars,
                             var_name=var_name, value_name=value_name,
                             col_level=col_level, stubnames=stubnames,
                             suffix=suffix, sep=sep)
        return DataFrame(mdata, columns=mcolumns)

Umělá data, opakovaný experiment


In [33]:
def process_gc_stats(stats1, prefix=None):
    if prefix is None:
        prefix = ""
    
        
    outstats = {}
    for key in stats1:
        outstats[prefix + key] = stats1[key]
        
    outstats[prefix + "nlinks number"] = np.sum(np.asarray(outstats[prefix + "nlinks shape"]), axis=0)[0]
    outstats[prefix + "tlinks number"] = np.sum(np.asarray(outstats[prefix + "tlinks shape"]), axis=0)[0]
    outstats.pop(prefix + "tlinks shape")
    outstats.pop(prefix + "nlinks shape")
    outstats[prefix + "edge number"] = outstats[prefix + "nlinks number"] + outstats[prefix + "tlinks number"]

    return outstats

    
def merge_stats(stats0, stats1, stats2, labels=None):
    if labels is None:
        labels = [""] * 3
    
   
    stats0 = process_gc_stats(stats0, labels[0])
    stats1 = process_gc_stats(stats1, labels[1])
    stats2 = process_gc_stats(stats2, labels[2])
    stats = {}
    stats.update(stats0)
    stats.update(stats1)
    stats.update(stats2)

    
    return stats

def run_gc_with_defined_setup(img, segparams, seeds, true_seg, voxelsize_mm, experiment_label=""):
    
    start = time.time()
    gc = pycut.ImageGraphCut(img, segparams=segparams)
    gc.set_seeds(seeds)
    gc.run()
    sg1 = gc.segmentation
    sg1 = (sg1 == 0).astype(np.int)
    stats1 = gc.stats
    elapsed1 = (time.time() - start)
    err1 = np.sum(np.abs(true_seg - sg1))
    stats1["time"] = elapsed1
    stats1["error"] = err1
    stats1["experiment"] = experiment_label
    
    stats2 = lisa.volumetry_evaluation.compare_volumes(sg1, true_seg, voxelsize_mm)
    stats1.update(stats2)
    return stats1
# def run_gc_with_defined_setup(img, segparams):
    
#     start = time.time()
#     gc = pycut.ImageGraphCut(img, segparams=segparams)
#     gc.set_seeds(seeds)
#     gc.run()
#     sg1 = gc.segmentation
#     stats1 = gc.stats
#     elapsed1 = (time.time() - start)
#     err1 = np.sum(np.abs(seg - (1 - sg1)))
#     stats1["time"] = elapsed1
#     stats1["error"] = err1
#     return stats1

def run_gc_3_times(data_params, segparamsTri, experiment_label, i, df, dfnew):
    start = time.time()
    voxelsize_mm = [1., 1., 1.]
    img, seg, seeds = make_data(data_params[0], data_params[1], data_params[2], data_params[3])
    stats0 = run_gc_with_defined_setup(img, segparamsTri[0], seeds, seg, voxelsize_mm, experiment_label=experiment_label)
    stats1 = run_gc_with_defined_setup(img, segparamsTri[1], seeds, seg, voxelsize_mm, experiment_label=experiment_label)
    stats2 = run_gc_with_defined_setup(img, segparamsTri[2], seeds, seg, voxelsize_mm, experiment_label=experiment_label)
    stats = merge_stats(stats0, stats1, stats2, labels)
    
    stats = add_data_and_algoritm_info(stats, data_params, segparams0, start, seg)
    
    dfi = pd.DataFrame(stats, index=[i])
    
    # display(df)
    df = df.append(dfi, sort=True)
    df.to_csv(fname, index=False)
    
    dfinew = add_data_seaborn(stats0, data_params, segparamsTri[0], start, seg, i, labels[0])
    dfnew = dfnew.append(dfinew, sort=True)
    dfinew = add_data_seaborn(stats1, data_params, segparamsTri[1], start, seg, i, labels[1])
    dfnew = dfnew.append(dfinew, sort=True)
    dfinew = add_data_seaborn(stats2, data_params, segparamsTri[2], start, seg, i, labels[2])
    dfnew = dfnew.append(dfinew, sort=True)
    
    dfnew.to_csv(fnamenew, index=False)
    return df, dfnew


def add_data_and_algoritm_info(stats, data_params, segparams, start, true_seg):
    machine_hostname = platform.node()
    #     stats['msgc time'] = elapsed1
#     stats['normal time'] = elapsed2
    stats['data size'] = data_params[0]
    stats['data offset'] = data_params[1]
    stats['data radius'] = data_params[2]
    stats["block size"] = segparams["block_size"]
    stats["data seedsz"] = data_params[3]
    stats["data image size px"] = np.prod(true_seg.shape)
    stats["data object size px"] = np.sum(true_seg > 0)
#     stats["GC error"] = err2
#     stats["MSGC error"] = err1
    stats['machine hostname'] = machine_hostname
    stats['experiment iteration start time'] = start
    
    return stats

def add_data_seaborn(stats, data_params, segparams, start, true_seg, i, label):
    stats = process_gc_stats(stats, "")
    stats = add_data_and_algoritm_info(stats, data_params, segparams, start, true_seg)
    stats["method"] = label
    dfinew = pd.DataFrame(stats, index=[i*3 + 0])
    #dfnew = dfnew.append(dfinew, sort=True)
    
    return dfinew