Run this notebook to produce the cutout catalogs!

Potential TODO: Write code for creating the pickles? Potential TODO: Write code for downloading all the fields in advance?

Create the annotated csv catalog


In [ ]:
import pandas as pd
import swap

base_collection_path = '/nfs/slac/g/ki/ki18/cpd/swap/pickles/15.09.02/'
base_directory = '/nfs/slac/g/ki/ki18/cpd/swap_catalog_diagnostics/'
annotated_catalog_path = base_directory + 'annotated_catalog.csv'
cut_empty = True


stages = [1, 2]
categories = ['ID', 'ZooID', 'location', 'mean_probability', 'category', 'kind', 'flavor', 
              'state', 'status', 'truth', 'stage', 'line']
annotation_categories = ['At_X', 'At_Y', 'PD', 'PL']

catalog = []
for stage in stages:
    print(stage)
    collection_path = base_collection_path + 'stage{0}'.format(stage) + '/CFHTLS_collection.pickle'
    collection = swap.read_pickle(collection_path, 'collection')
    for ID in collection.list():

        subject = collection.member[ID]
        catalog_i = []

        # for stage1 we shall skip the tests for now
        if (stage == 1) * (subject.category == 'test'):
            continue

        # flatten out x and y. also cut out empty entries
        annotationhistory = subject.annotationhistory
        x_unflat = annotationhistory['At_X']
        x = np.array([xi for xj in x_unflat for xi in xj])

        # cut out catalogs with no clicks
        if (len(x) < 1) and (cut_empty):
            continue
        # oh yeah there's that absolutely nutso entry with 50k clicks
        if len(x) > 10000:
            continue

        for category in categories:
            if category == 'stage':
                catalog_i.append(stage)
            elif category == 'line':
                catalog_i.append(line)
            else:
                catalog_i.append(subject.__dict__[category])
        for category in annotation_categories:
            catalog_i.append(list(annotationhistory[category]))

        catalog.append(catalog_i)
catalog = pd.DataFrame(catalog, columns=categories + annotation_categories)

# save catalog
catalog.to_csv(annotated_catalog_path)

Create the knownlens catalog


In [ ]:
knownlens_dir = '/nfs/slac/g/ki/ki18/cpd/code/strongcnn/catalog/knownlens/'
knownlensID = pd.read_csv(knownlens_dir + 'knownlensID', sep=' ')
listfiles_d1_d11 = pd.read_csv(knownlens_dir + 'listfiles_d1_d11.txt', sep=' ')
knownlenspath = knownlens_dir + 'knownlens.csv'

X2 = listfiles_d1_d11[listfiles_d1_d11['CFHTID'].isin(knownlensID['CFHTID'])]  # cuts down to like 212 entries.

ZooID = []

for i in range(len(Y)):
    ZooID.append(X2['ZooID'][X2['CFHTID'] == knownlensID['CFHTID'][i]].values[0])

knownlensID['ZooID'] = ZooID

knownlensID.to_csv(knownlenspath)

Convert the annotated catalog and knownlens catalog into cluster catalogs and cutouts


In [2]:
# code to regenerate the catalogs
base_directory = '/nfs/slac/g/ki/ki18/cpd/swap_catalog_diagnostics/'
cluster_directory = base_directory

## uncomment this line when updating the shared catalog!
# base_directory = '/nfs/slac/g/ki/ki18/cpd/swap_catalog/'
# cluster_directory = base_directory + 'clusters/'


field_directory = base_directory
knownlens_path = base_directory + 'knownlens.csv'
collection_path = base_directory + 'annotated_catalog.csv'
catalog_path = cluster_directory + 'catalog.csv'

# if we're rerunning this code, we should remove the old cluster pngs,
# all of which have *_*.png
from glob import glob
files_to_delete = glob(cluster_directory + '*_*.png')
from os import remove
for delete_this_file in files_to_delete:
    remove(delete_this_file)


# run create catalog code. This can take a while.
from subprocess import call
command = ['python', '/nfs/slac/g/ki/ki18/cpd/code/strongcnn/code/create_catalogs.py',
           '--collection', collection_path,
           '--knownlens', knownlens_path,
           '--clusters', cluster_directory,
           '--fields', field_directory,
           #'--augment', augmented_directory,
           #'--do_a_few', '100',
           ]
call(command)


Out[2]:
0

In [ ]: