In [22]:
import pandas as pd
from glob import glob
from subprocess import check_output

In [23]:
path = '/fs0/New_Server/RCV/MR_Processed/*/StructConn/cluster/*.nii.gz.out'
def time_spent_tracking(path):
    with open(path) as f:
        raw = f.read()
    try:
        tracking_line = [l for l in raw.split('\n') if 'time spent' in l][0]
        ret = float(tracking_line.split(':')[1].strip().split()[0])
    except IndexError:
        ret = pd.np.nan
    finally:
        return ret

def roi_size(roi):
    cmd = ['fslstats', roi, '-V']
    return float(check_output(cmd).strip().split()[1])


data = []
all_logs = glob(path)
print("{:d} logs...".format(len(all_logs)))


3244 logs...

In [24]:
for log in all_logs:
    time_spent = time_spent_tracking(log)
    roi = log.replace('cluster', 'label').replace('.out', '')
    size = roi_size(roi)
    data.append({'roi': os.path.basename(roi), 'time': time_spent, 'size': size})

In [25]:
df = pd.DataFrame(data).sort_index(by='time', ascending=False)
df = df.dropna()
df.to_csv('/home/burnsss1/probtracking_times.csv')

In [28]:
import seaborn as sns
sns.regplot('size', 'time', data=df)