In [1]:
import sys
import glob
import re
import fnmatch
import math
import os
from os import listdir
from os.path import join, isfile, basename

import itertools

import numpy as np
from numpy import float32, int32, uint8, dtype, genfromtxt

import scipy
from scipy.stats import ttest_ind

import pandas as pd

import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

import seaborn as sns

import colorsys

In [2]:
labels = [16,64,8,32,2,4,65,66,33,67,34,17,69,70,35,71,9,18,72,36,73,74,37,75,19,76,38,77,39,78,79,20,5,40,80,10,81,82,83,84,85,86,11,22,23,24,12,3,6,49,50,25,51,13,52,26,53,27,54,55,56,28,7,14,57,58,29,59,30,60,15,61,31,62,63]
label_names_file = '/groups/saalfeld/home/bogovicj/vfb/DrosAdultBRAINdomains/refData/Original_Index.tsv'

label_names = pd.read_csv( label_names_file, delimiter='\t', header=0 )
# print label_names[ label_names['Stack id'] == 11 ]['JFRCtempate2010.mask130819' ].iloc[0]
# print label_names[ label_names['Stack id'] == 70 ]['JFRCtempate2010.mask130819' ].iloc[0]

def get_label_name( label_id ):
    return label_names[ label_names['Stack id'] == label_id ]['JFRCtempate2010.mask130819' ].iloc[0]

In [3]:
templates = ['JFRCtemplate2010', 'JFRC2013_lo', 'F-antsFlip_lo', 'F-cmtkFlip_lof', 'TeforBrain_f']
reg_methods = [ 'cmtkCow', 'cmtkCOG', 'cmtkHideo', 'antsRegOwl', 'antsRegDog', 'antsRegYang' ]

In [4]:
line=3
dist_samples_f = '/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/label_data_line{}.csv.gz'.format( line )
dist_samples_df = pd.read_csv( dist_samples_f, header=None, names=['TEMPLATE','ALG','LINE','LABEL','DISTANCE'] )

In [5]:
# dist_samples_df.head()
# print( dist_samples_df['ALG'].unique())
# print( dist_samples_df['TEMPLATE'].unique())

def pad_zero(field='DISTANCE', pad=0.01):
    def pfunc(xin):
        x=xin[field]
        if x > 0.:
            return x
        elif x == 0.:
            return pad
        else:
            return float('nan')
    return pfunc

dist_samples_df['DISTANCEPAD'] = dist_samples_df.apply( pad_zero(field='DISTANCE'), axis=1)

In [ ]:
# some_dat = dist_samples_df[ (dist_samples_df.TEMPLATE =='F-antsFlip_lo') & (dist_samples_df.ALG == 'antsRegDog') & (dist_samples_df.LABEL == 84) ]
# print( some_dat.size )

# gam_params_fl = scipy.stats.gamma.fit( some_dat.DISTANCEPAD, floc=0. )
# print( gam_params_fl )

# print( len(some_dat[some_dat.DISTANCEPAD <= 0.]))
# print( len(l_dists) )
# l_dists[l_dists <= 0.]

# some_dat.DISTANCEPAD

In [ ]:
ray_offset = []
ray_scale = []
ray_offset_fl = []
ray_scale_fl = []

gam_a = []
gam_offset = []
gam_scale = []
gam_a_fl = []
gam_offset_fl = []
gam_scale_fl = []

tlist = []
alist = []
llist = []

for t in templates:
    t_dists = dist_samples_df[ dist_samples_df.TEMPLATE == t ]
    print( 't ', t )
    
    for a in reg_methods:
        print( 'a ', a )
        a_dists = t_dists[ t_dists.ALG == a ]
        
        for l in labels:
            l_dists = a_dists[ a_dists.LABEL == l ].DISTANCEPAD

            tlist += [t]
            alist += [a]
            llist += [l]
            
            if( l_dists.size > 10 ):
#                 print( '  label {} has {} samples'.format(l, l_dists.size) )
                params = scipy.stats.rayleigh.fit( l_dists )
                ray_offset += [ params[0]]
                ray_scale += [ params[1]]
                
                params_fl = scipy.stats.rayleigh.fit( l_dists, floc=0. )
                ray_offset_fl += [ params_fl[0] ]
                ray_scale_fl += [ params_fl[1] ]
                
                gam_params = scipy.stats.gamma.fit( l_dists )
                gam_a += [ gam_params[0] ]
                gam_offset += [ gam_params[1] ]
                gam_scale += [ gam_params[2] ]
                
                gam_params_fl = scipy.stats.gamma.fit( l_dists, floc=0. )
                gam_a_fl += [ gam_params_fl[0] ]
                gam_offset_fl += [ gam_params_fl[1] ]
                gam_scale_fl += [ gam_params_fl[2] ]
            else:
#                 print( '  skipping for label {}'.format(l) )
                ray_offset += [float('nan')]
                ray_scale += [float('nan')]
                ray_offset_fl += [float('nan')]
                ray_scale_fl += [float('nan')]
                
                gam_a += [float('nan')]
                gam_offset += [float('nan')]
                gam_scale += [float('nan')]

                gam_a_fl += [float('nan')]
                gam_offset_fl += [float('nan')]
                gam_scale_fl += [float('nan')]
                
            
        # merge all labels
        tlist += [t]
        alist += [a]
        llist += [-1]
        if( a_dists.size > 10 ):
            params = scipy.stats.rayleigh.fit( a_dists.DISTANCEPAD )
            ray_offset += [ params[0]]
            ray_scale += [ params[1]]
            
            params_fl = scipy.stats.rayleigh.fit( a_dists.DISTANCEPAD, floc=0 )
            ray_offset_fl += [ params_fl[0] ]
            ray_scale_fl += [ params_fl[1] ]
            
            gam_params = scipy.stats.gamma.fit( a_dists.DISTANCEPAD )
            gam_a += [ gam_params[0] ]
            gam_offset += [ gam_params[1] ]
            gam_scale += [ gam_params[2] ]

            gam_params_fl = scipy.stats.gamma.fit( a_dists.DISTANCEPAD, floc=0. )
            gam_a_fl += [ gam_params_fl[0] ]
            gam_offset_fl += [ gam_params_fl[1] ]
            gam_scale_fl += [ gam_params_fl[2] ]
        else:
#             print( '  skipping for MERGE')
            ray_offset += [float('nan')]
            ray_scale += [float('nan')]
            ray_offset_fl += [float('nan')]
            ray_scale_fl += [float('nan')]
            
            gam_a += [float('nan')]
            gam_offset += [float('nan')]
            gam_scale += [float('nan')]

            gam_a_fl += [float('nan')]
            gam_offset_fl += [float('nan')]
            gam_scale_fl += [float('nan')]

In [11]:
# print( len(llist) )
# print( len(tlist) )
# print( len(alist) )
# print( len(ray_offset) )
# print( len(ray_scale) )
# print( len(ray_offset_fl) )
# print( len(ray_scale_fl) )

# print( len(gam_a) )
# print( len(gam_offset) )
# print( len(gam_scale) )
# print( len(gam_a_fl) )
# print( len(gam_offset_fl) )
# print( len(gam_scale_fl) )


dist_df = pd.DataFrame( {'LABEL':llist,
                        'ALG':alist,
                        'TEMPLATE':tlist,
                        'RAY_OFFSET':ray_offset,
                        'RAY_SCALE':ray_scale,
                        'RAY_OFFSET_FL':ray_offset_fl,
                        'RAY_SCALE_FL':ray_scale_fl,
                        'GAM_A':gam_a,
                        'GAM_OFFSET':gam_offset,
                        'GAM_SCALE':gam_scale,
                        'GAM_A_FL':gam_a_fl,
                        'GAM_OFFSET_FL':gam_offset_fl,
                        'GAM_SCALE_FL':gam_scale_fl
                       })

# dist_df

dist_df.to_csv('/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/stats/line{}_dist_params.csv'.format(line))