In [1]:
import sys
sys.path.append("../../xcorr")
In [33]:
DB.close()
import xcorr_db
import importlib
importlib.reload(xcorr_db)
Out[33]:
In [34]:
DB = xcorr_db.setup_db('./initial.db')
In [35]:
hpo_id = DB.insert_hpo_record(2)
In [131]:
import pandas as pd
import csv
import numpy as np
import json
In [209]:
df = pd.read_csv('~/Documents/results/cp1/non_nci_hpo_log/hpos.txt', sep="|", header=None, names=["i", "hpo_id", "params", "run_dir", "ts", "val_loss"])
In [212]:
df.groupby("hpo_id")['val_loss'].agg([np.min, np.max, np.mean, np.std])
Out[212]:
In [161]:
n = 10
smallest = df.groupby('hpo_id')['val_loss'].nsmallest(n)
best_n = df.iloc[smallest.index.get_level_values(1), :]
#best_n.to_csv('~/Documents/results/cp1/best_{}_nci.txt'.format(n), sep="|", index=False)
params = '/home/nick/Documents/results/cp1/best_{}_nci_params.txt'.format(n)
# Write out the best n parameters
best_n[['params']].to_csv(params, index=False,
header=False, quoting=csv.QUOTE_NONE, sep='|')
In [102]:
pd.__version__
Out[102]:
In [141]:
# stats for the best n
best_n.groupby("hpo_id")['val_loss'].agg([np.min, np.max, np.mean, np.std])
Out[141]:
In [165]:
from os import path
with open(params) as f_in:
lines = f_in.readlines()
upf = '/home/nick/Documents/results/cp1/best_{}_nci_params_upf.txt'.format(n)
with open(upf, 'w') as f_out:
for line in lines:
j = json.loads(line)
train_sources = j['train_sources']
if 'cell_feature_subset_path' in j and train_sources == 'NCI60':
fsp = path.basename(j['cell_feature_subset_path'])
train = '{}_train.h5'.format(fsp[:fsp.find('_features')])
j['use_exported_data'] = '/autofs/nccs-svm1_proj/med106/ncollier/repos/Supervisor/workflows/cp1/cache/{}'.format(train)
del j['save_path']
j['epochs'] = 100
f_out.write('{}\n'.format(json.dumps(j)))
In [189]:
js = """{{"batch_size": 6144, "train_sources": "{}", "preprocess_rnaseq": "combat", "gpus": "0 1 2 3 4 5", "cell_feature_subset_path": "/autofs/nccs-svm1_proj/med106/ncollier/repos/Supervisor/workflows/cp1/xcorr_data/{}_{}_2000_1000_features.txt", "export_data": "/autofs/nccs-svm1_proj/med106/ncollier/repos/Supervisor/workflows/cp1/cache/{}_{}_2000_1000_{}.h5", "no_feature_source": true, "no_response_source": true, "cp": true}}"""
studies = ['CCLE', 'CTRP', 'gCSI', 'GDSC']
for s1 in studies:
for s2 in studies:
if s1 != s2:
j1 = js.format(s1, s1, s2, s1, s2, 'train')
j2 = js.format(s2, s1, s2, s1, s2, 'test')
#print(j1)
#print(j2)
for s1 in studies:
s2 = 'NCI60'
j = js.format(s2, s1, s2, s1, s2, 'test')
print(j)
In [208]:
from os import path
# python uno_infer.py --data CTRP_CCLE_2000_1000_test.h5 --model_file model.h5
inputs = '/home/nick/Documents/results/cp1/inputs.txt'
model_class_ids = {}
next_id = 0
infer_upf = '/home/nick/Documents/repos/Supervisor/workflows/cp1/data/infer_upf_all.txt'
with open(infer_upf, 'w') as f_out:
studies = ['CCLE', 'CTRP', 'gCSI', 'GDSC', 'NCI60']
with open(inputs) as f_in:
reader = csv.reader(f_in, delimiter="|")
for r in reader:
params = json.loads(r[2])
save_path = params['save_path']
if 'cell_feature_subset_path' in params:
fsp = path.basename(params['cell_feature_subset_path'])
fsp_prefix = fsp[:fsp.find('_features')]
test_data = '{}_test.h5'.format(fsp_prefix)
save_path = params['save_path']
train_source = params['train_sources']
#if fsp.find('_NCI60_') != -1:
f_out.write('{},{},{}\n'.format(test_data, save_path, fsp_prefix))
else:
train_source = params['train_sources']
for s in studies:
test_data = '{}.h5'.format(s)
f_out.write('{},{},{}_{}\n'.format(test_data, save_path, train_source, s))
In [185]:
f = "/home/nick/Documents/repos/Supervisor/workflows/cp1/scripts/counts_by_hpo.csv"
hp = {}
class Entry:
def __init__(self, start):
self.start = start
self.end = -1
def __repr__(self):
return "[{}, {}]".format(self.start, self.end)
with open(f) as f_in:
reader = csv.reader(f_in)
next(reader)
for row in reader:
if row[1] == "1":
hpo_id = int(row[3])
h = float(row[2])
if not hpo_id in hp:
hp[hpo_id] = [Entry(h)]
else:
entry = hp[hpo_id][-1]
if entry.end != -1:
hp[hpo_id].append(Entry(h))
elif row[1] == "0":
hpo_id = int(row[3])
h = float(row[2])
hp[hpo_id][-1].end = h
with open('/home/nick/Documents/repos/Supervisor/workflows/cp1/scripts/start_end.csv', 'w') as f_out:
for k in hp:
for e in hp[k]:
f_out.write('{},{},{}\n'.format(e.start, e.end, k))
In [6]:
import csv
import os
f = "/home/nick/Documents/results/cp1/inference_log.txt"
prefix = '/gpfs/alpine/med106/scratch/ncollier/experiments/infer_all_4/run/'
with open(f) as f_in:
reader = csv.reader(f_in, delimiter='|')
for i, row in enumerate(reader):
#train_path = row[2]
run_id = int(os.path.basename(os.path.dirname(row[2])))
if run_id < 200:
print('{}{}'.format(prefix, i))
In [ ]: