In [1]:
import sys
import glob
import re
import fnmatch
import math
import os
from os import listdir
from os.path import join, isfile, basename
import itertools
import numpy as np
from numpy import float32, int32, uint8, dtype, genfromtxt
import scipy
from scipy.stats import ttest_ind
import pandas as pd
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import colorsys
In [2]:
labels = [16,64,8,32,2,4,65,66,33,67,34,17,69,70,35,71,9,18,72,36,73,74,37,75,19,76,38,77,39,78,79,20,5,40,80,10,81,82,83,84,85,86,11,22,23,24,12,3,6,49,50,25,51,13,52,26,53,27,54,55,56,28,7,14,57,58,29,59,30,60,15,61,31,62,63]
label_names_file = '/groups/saalfeld/home/bogovicj/vfb/DrosAdultBRAINdomains/refData/Original_Index.tsv'
label_names = pd.read_csv( label_names_file, delimiter='\t', header=0 )
# print label_names[ label_names['Stack id'] == 11 ]['JFRCtempate2010.mask130819' ].iloc[0]
# print label_names[ label_names['Stack id'] == 70 ]['JFRCtempate2010.mask130819' ].iloc[0]
def get_label_name( label_id ):
return label_names[ label_names['Stack id'] == label_id ]['JFRCtempate2010.mask130819' ].iloc[0]
In [3]:
templates = ['JFRCtemplate2010', 'JFRC2013_lo', 'F-antsFlip_lo', 'F-cmtkFlip_lof', 'TeforBrain_f']
reg_methods = [ 'cmtkCow', 'cmtkCOG', 'cmtkHideo', 'antsRegOwl', 'antsRegDog', 'antsRegYang' ]
In [4]:
line=3
dist_samples_f = '/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/label_data_line{}.csv.gz'.format( line )
dist_samples_df = pd.read_csv( dist_samples_f, header=None, names=['TEMPLATE','ALG','LINE','LABEL','DISTANCE'] )
In [5]:
# dist_samples_df.head()
# print( dist_samples_df['ALG'].unique())
# print( dist_samples_df['TEMPLATE'].unique())
def pad_zero(field='DISTANCE', pad=0.01):
def pfunc(xin):
x=xin[field]
if x > 0.:
return x
elif x == 0.:
return pad
else:
return float('nan')
return pfunc
dist_samples_df['DISTANCEPAD'] = dist_samples_df.apply( pad_zero(field='DISTANCE'), axis=1)
In [ ]:
# some_dat = dist_samples_df[ (dist_samples_df.TEMPLATE =='F-antsFlip_lo') & (dist_samples_df.ALG == 'antsRegDog') & (dist_samples_df.LABEL == 84) ]
# print( some_dat.size )
# gam_params_fl = scipy.stats.gamma.fit( some_dat.DISTANCEPAD, floc=0. )
# print( gam_params_fl )
# print( len(some_dat[some_dat.DISTANCEPAD <= 0.]))
# print( len(l_dists) )
# l_dists[l_dists <= 0.]
# some_dat.DISTANCEPAD
In [ ]:
ray_offset = []
ray_scale = []
ray_offset_fl = []
ray_scale_fl = []
gam_a = []
gam_offset = []
gam_scale = []
gam_a_fl = []
gam_offset_fl = []
gam_scale_fl = []
tlist = []
alist = []
llist = []
for t in templates:
t_dists = dist_samples_df[ dist_samples_df.TEMPLATE == t ]
print( 't ', t )
for a in reg_methods:
print( 'a ', a )
a_dists = t_dists[ t_dists.ALG == a ]
for l in labels:
l_dists = a_dists[ a_dists.LABEL == l ].DISTANCEPAD
tlist += [t]
alist += [a]
llist += [l]
if( l_dists.size > 10 ):
# print( ' label {} has {} samples'.format(l, l_dists.size) )
params = scipy.stats.rayleigh.fit( l_dists )
ray_offset += [ params[0]]
ray_scale += [ params[1]]
params_fl = scipy.stats.rayleigh.fit( l_dists, floc=0. )
ray_offset_fl += [ params_fl[0] ]
ray_scale_fl += [ params_fl[1] ]
gam_params = scipy.stats.gamma.fit( l_dists )
gam_a += [ gam_params[0] ]
gam_offset += [ gam_params[1] ]
gam_scale += [ gam_params[2] ]
gam_params_fl = scipy.stats.gamma.fit( l_dists, floc=0. )
gam_a_fl += [ gam_params_fl[0] ]
gam_offset_fl += [ gam_params_fl[1] ]
gam_scale_fl += [ gam_params_fl[2] ]
else:
# print( ' skipping for label {}'.format(l) )
ray_offset += [float('nan')]
ray_scale += [float('nan')]
ray_offset_fl += [float('nan')]
ray_scale_fl += [float('nan')]
gam_a += [float('nan')]
gam_offset += [float('nan')]
gam_scale += [float('nan')]
gam_a_fl += [float('nan')]
gam_offset_fl += [float('nan')]
gam_scale_fl += [float('nan')]
# merge all labels
tlist += [t]
alist += [a]
llist += [-1]
if( a_dists.size > 10 ):
params = scipy.stats.rayleigh.fit( a_dists.DISTANCEPAD )
ray_offset += [ params[0]]
ray_scale += [ params[1]]
params_fl = scipy.stats.rayleigh.fit( a_dists.DISTANCEPAD, floc=0 )
ray_offset_fl += [ params_fl[0] ]
ray_scale_fl += [ params_fl[1] ]
gam_params = scipy.stats.gamma.fit( a_dists.DISTANCEPAD )
gam_a += [ gam_params[0] ]
gam_offset += [ gam_params[1] ]
gam_scale += [ gam_params[2] ]
gam_params_fl = scipy.stats.gamma.fit( a_dists.DISTANCEPAD, floc=0. )
gam_a_fl += [ gam_params_fl[0] ]
gam_offset_fl += [ gam_params_fl[1] ]
gam_scale_fl += [ gam_params_fl[2] ]
else:
# print( ' skipping for MERGE')
ray_offset += [float('nan')]
ray_scale += [float('nan')]
ray_offset_fl += [float('nan')]
ray_scale_fl += [float('nan')]
gam_a += [float('nan')]
gam_offset += [float('nan')]
gam_scale += [float('nan')]
gam_a_fl += [float('nan')]
gam_offset_fl += [float('nan')]
gam_scale_fl += [float('nan')]
In [11]:
# print( len(llist) )
# print( len(tlist) )
# print( len(alist) )
# print( len(ray_offset) )
# print( len(ray_scale) )
# print( len(ray_offset_fl) )
# print( len(ray_scale_fl) )
# print( len(gam_a) )
# print( len(gam_offset) )
# print( len(gam_scale) )
# print( len(gam_a_fl) )
# print( len(gam_offset_fl) )
# print( len(gam_scale_fl) )
dist_df = pd.DataFrame( {'LABEL':llist,
'ALG':alist,
'TEMPLATE':tlist,
'RAY_OFFSET':ray_offset,
'RAY_SCALE':ray_scale,
'RAY_OFFSET_FL':ray_offset_fl,
'RAY_SCALE_FL':ray_scale_fl,
'GAM_A':gam_a,
'GAM_OFFSET':gam_offset,
'GAM_SCALE':gam_scale,
'GAM_A_FL':gam_a_fl,
'GAM_OFFSET_FL':gam_offset_fl,
'GAM_SCALE_FL':gam_scale_fl
})
# dist_df
dist_df.to_csv('/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/stats/line{}_dist_params.csv'.format(line))