In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from os.path import join, expanduser
In [2]:
src_dir = join(expanduser('~'), '.alembic/neuroglancer/pinky100_v0/father_of_alignment_v5/match')
In [114]:
def parse_fn(fn):
split = fn.split(', ')
return int(split[0][1:]), int(split[1][:-1])
def get_depth(fn):
return parse_fn(fn)[1] - parse_fn(fn)[0]
def get_src_index(fn):
return parse_fn(fn)[0]
def sort_df(df, key):
'''Takes dataframe, column index and custom function for sorting,
returns dataframe sorted by this column using this function'''
col = df.ix[:,column_idx]
temp = pd.DataFrame([])
temp[0] = col
temp[1] = df.index
temp = temp.values.tolist()
df = df.ix[[i[1] for i in sorted(temp, key=key)]]
return df
In [77]:
from random import shuffle
x = [i for i in range(100)]
shuffle(x)
In [57]:
files = os.listdir(src_dir)
files = sorted(files, key=get_src_index)
In [14]:
cols = ['patches_src_normalized_dyn_range', 'patches_src_kurtosis',
'patches_dst_kurtosis', 'xcorr_r_max', 'xcorr_delta_5',
'xcorr_delta_10', 'xcorr_delta_15', 'xcorr_sigma_0.5',
'xcorr_sigma_0.75', 'xcorr_sigma_0.95', 'vects_norm', 'filter']
In [58]:
stacks = []
for fn in files[:100]:
df = pd.read_csv(join(src_dir, fn))
df['filter'] = df['filter'].astype(bool)
df['id'] = fn
stacks.append(df)
df = pd.concat(stacks, ignore_index=True)
In [59]:
accepted_df = df[df['filter']]
rejected_df = df[~df['filter']]
In [60]:
accepted_grouped = accepted_df.groupby('id')
In [61]:
accepted_stats = accepted_grouped.describe()
In [115]:
accepted_stats['depth'] = [get_depth(i) for i in accepted_stats.index]
In [117]:
accepted_stats_1 = accepted_stats[accepted_stats['depth'] == 1]
accepted_stats_2 = accepted_stats[accepted_stats['depth'] == 2]
In [118]:
def plot_quantiles(df, k):
f = plt.figure(figsize=(16,8))
df[(k, 'max')].plot()
df[(k, '75%')].plot()
df[(k, '50%')].plot()
df[(k, '25%')].plot()
df[(k, 'min')].plot()
df[(k, 'std')].plot()
plt.legend()
plt.show()
In [121]:
plot_quantiles(accepted_stats_2, 'xcorr_r_max')
In [155]:
for fn in files:
df = pd.read_csv(join(src_dir, fn), index_col=0)
src, dst = parse_fn(fn)
df['name'] = fn
df['src'] = src
df['dst'] = dst
stacks.append(df.stack())
d = pd.concat(stacks, axis=1).transpose()
d = d.set_index(('xcorr_r_max', 'name'))
d.index.rename('name', inplace=True)
In [183]:
d.index[d['xcorr_delta_15']['accepted'] < 5]
Out[183]:
In [186]:
f = plt.figure(figsize=(16,8))
d['xcorr_delta_15']['accepted'].plot()
d['xcorr_delta_15']['rejects'].plot()
(d['xcorr_delta_15']['accepted'] + d['xcorr_delta_15']['rejects']).plot()
plt.show()
In [184]:
f = plt.figure(figsize=(16,8))
d['xcorr_delta_15']['rejects'].plot()
plt.show()
In [181]:
f = plt.figure(figsize=(16,8))
d['xcorr_delta_15']['std'].plot()
# d['xcorr_delta_15']['std'][d['xcorr_delta_15']['accepted'] > 0].plot()
plt.show()