Potential TODO: Write code for creating the pickles? Potential TODO: Write code for downloading all the fields in advance?
Create the annotated csv catalog
In [ ]:
import pandas as pd
import swap
base_collection_path = '/nfs/slac/g/ki/ki18/cpd/swap/pickles/15.09.02/'
base_directory = '/nfs/slac/g/ki/ki18/cpd/swap_catalog_diagnostics/'
annotated_catalog_path = base_directory + 'annotated_catalog.csv'
cut_empty = True
stages = [1, 2]
categories = ['ID', 'ZooID', 'location', 'mean_probability', 'category', 'kind', 'flavor',
'state', 'status', 'truth', 'stage', 'line']
annotation_categories = ['At_X', 'At_Y', 'PD', 'PL']
catalog = []
for stage in stages:
print(stage)
collection_path = base_collection_path + 'stage{0}'.format(stage) + '/CFHTLS_collection.pickle'
collection = swap.read_pickle(collection_path, 'collection')
for ID in collection.list():
subject = collection.member[ID]
catalog_i = []
# for stage1 we shall skip the tests for now
if (stage == 1) * (subject.category == 'test'):
continue
# flatten out x and y. also cut out empty entries
annotationhistory = subject.annotationhistory
x_unflat = annotationhistory['At_X']
x = np.array([xi for xj in x_unflat for xi in xj])
# cut out catalogs with no clicks
if (len(x) < 1) and (cut_empty):
continue
# oh yeah there's that absolutely nutso entry with 50k clicks
if len(x) > 10000:
continue
for category in categories:
if category == 'stage':
catalog_i.append(stage)
elif category == 'line':
catalog_i.append(line)
else:
catalog_i.append(subject.__dict__[category])
for category in annotation_categories:
catalog_i.append(list(annotationhistory[category]))
catalog.append(catalog_i)
catalog = pd.DataFrame(catalog, columns=categories + annotation_categories)
# save catalog
catalog.to_csv(annotated_catalog_path)
Create the knownlens catalog
In [ ]:
knownlens_dir = '/nfs/slac/g/ki/ki18/cpd/code/strongcnn/catalog/knownlens/'
knownlensID = pd.read_csv(knownlens_dir + 'knownlensID', sep=' ')
listfiles_d1_d11 = pd.read_csv(knownlens_dir + 'listfiles_d1_d11.txt', sep=' ')
knownlenspath = knownlens_dir + 'knownlens.csv'
X2 = listfiles_d1_d11[listfiles_d1_d11['CFHTID'].isin(knownlensID['CFHTID'])] # cuts down to like 212 entries.
ZooID = []
for i in range(len(Y)):
ZooID.append(X2['ZooID'][X2['CFHTID'] == knownlensID['CFHTID'][i]].values[0])
knownlensID['ZooID'] = ZooID
knownlensID.to_csv(knownlenspath)
Convert the annotated catalog and knownlens catalog into cluster catalogs and cutouts
In [2]:
# code to regenerate the catalogs
base_directory = '/nfs/slac/g/ki/ki18/cpd/swap_catalog_diagnostics/'
cluster_directory = base_directory
## uncomment this line when updating the shared catalog!
# base_directory = '/nfs/slac/g/ki/ki18/cpd/swap_catalog/'
# cluster_directory = base_directory + 'clusters/'
field_directory = base_directory
knownlens_path = base_directory + 'knownlens.csv'
collection_path = base_directory + 'annotated_catalog.csv'
catalog_path = cluster_directory + 'catalog.csv'
# if we're rerunning this code, we should remove the old cluster pngs,
# all of which have *_*.png
from glob import glob
files_to_delete = glob(cluster_directory + '*_*.png')
from os import remove
for delete_this_file in files_to_delete:
remove(delete_this_file)
# run create catalog code. This can take a while.
from subprocess import call
command = ['python', '/nfs/slac/g/ki/ki18/cpd/code/strongcnn/code/create_catalogs.py',
'--collection', collection_path,
'--knownlens', knownlens_path,
'--clusters', cluster_directory,
'--fields', field_directory,
#'--augment', augmented_directory,
#'--do_a_few', '100',
]
call(command)
Out[2]:
In [ ]: