In [89]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np, json
In [133]:
false_pos_csv = '../../results/false_positives_DR5_v4.3.1_gen.csv'
results_json = '../../tmp/model_4.3.1_data_dr5/predictions.json'
false_pos_matrix = np.loadtxt(false_pos_csv, skiprows=1, delimiter=',')
with open(results_json, 'r') as f:
results_array = json.load(f)
# generate a list of tuples: [(false pos zabs, [list of result dlas])]
sl_per_fp = [(row[2], sl['id'], sl['dlas']) \
for row in false_pos_matrix \
for sl in results_array \
if "%05d-%05d"%(row[5],row[6]) == sl['id']]
fp_dla_confidence = []
# pick out the closest matching dla comparing zabs with z_dla
for (zabs,id,dlas) in sl_per_fp:
difflist = [abs(d['z_dla']-zabs) for d in dlas]
minix = difflist.index(min(difflist))
fp_dla_confidence.append(dlas[minix]['dla_confidence'])
# print len(dla_confidence), len(false_pos_matrix)
# fp_dla_confidence
all_dla_confidence = [dla['dla_confidence'] for sl in results_array for dla in sl['dlas']]
BINS=20
fp_hist = np.histogram(fp_dla_confidence, range=[0,1], bins=BINS)
all_hist = np.histogram(all_dla_confidence, range=[0,1], bins=BINS)
In [131]:
# all_hist[0] - fp_hist[0]
fp_hist[0]
Out[131]:
In [135]:
plt.figure(figsize=(20, 6))
# plt.hist(fp_dla_confidence, 50, facecolor='green', range=[0,1]);
plt.bar(range(BINS), fp_hist[0])
plt.xlabel("Histogram confidence of FPs.")
plt.ylim([0,40])
plt.figure(figsize=(20, 6))
# plt.hist(all_dla_confidence, 50, facecolor='green', range=[0,1]);
plt.bar(range(BINS), all_hist[0])
plt.xlabel("Histogram confidence of all predicted DLAs.")
plt.ylim([0,150])
plt.figure(figsize=(20, 6))
# plt.hist(all_dla_confidence, 50, facecolor='green', range=[0,1]);
plt.bar(range(BINS), all_hist[0]-fp_hist[0])
plt.xlabel("Histogram confidence of all predicted DLAs - FPs.")
plt.ylim([0,150])
Out[135]:
In [139]:
# fp copy command
x = ["cp dla-spec-%05d-%05d.pdf false_pos"%(row[5],row[6]) for row in false_pos_matrix]
for i in x:
print i