In [1]:
import holoviews as hv
hv.notebook_extension('bokeh')
In [2]:
import dask
import dask.dataframe as dd
import pandas as pd
from distributed import Client
client = Client(scheduler_file='/scratch/tmorton/dask/scheduler.json')
In [3]:
from explorer.dataset import QADataset
from explorer.functors import (Mag, MagDiff, CustomFunctor, DeconvolvedMoments, Column,
SdssTraceSize, PsfSdssTraceSizeDiff, HsmTraceSize, Seeing,
PsfHsmTraceSizeDiff, CompositeFunctor, StarGalaxyLabeller)
# mag_diff = MagDiff('modelfit_CModel', 'base_PsfFlux')
psf_mag = Mag('base_PsfFlux')
gauss_mag = Mag('base_GaussianFlux')
# seeing = CustomFunctor('0.168*2.35*sqrt(0.5*(ext_shapeHSM_HsmPsfMoments_xx**2 + ext_shapeHSM_HsmPsfMoments_yy**2))')
seeing = Seeing()
label = StarGalaxyLabeller()
funcs = CompositeFunctor({'psf':psf_mag, 'gauss':gauss_mag, 'seeing':seeing, 'label':label})
In [4]:
# import sys
# sys.path.append('..')
from explorer.catalog import ParquetCatalog, MatchedCatalog, MultiMatchedCatalog
from explorer.utils import get_visits
import glob, re
def parse_visit(filename):
m = re.search('visit-(\d+)', filename)
return int(m.group(1))
def get_test_catalog(n_visits=3):
coadd_cat = ParquetCatalog(['test_data/HSC-G/tract-8766/forced.parq'])
filenames = glob.glob('test_data/HSC-G/tract-8766/visit*/catalog.parq')
filenames.sort()
visit_cats = [ParquetCatalog([f], name=parse_visit(f)) for f in filenames[2:2+n_visits]]
return MultiMatchedCatalog(coadd_cat, visit_cats, match_registry='test_registry.h5')
In [5]:
cat = get_test_catalog()
In [6]:
cat.match()
In [7]:
data = QADataset(cat, {'psf':psf_mag, 'gauss':gauss_mag, 'seeing':seeing}, flags=['calib_psfUsed', 'qaBad_flag'], client=client)
In [8]:
%time data.df.head()
In [9]:
data.ds
Out[9]:
In [10]:
%opts Points [width=600, height=400, tools=['hover'], color_index='y', colorbar=True] (cmap='coolwarm', size=4)
from holoviews.operation import decimate
from explorer.plots import FilterStream
filter_stream = FilterStream()
dmap = data.visit_explore(filter_stream=filter_stream)
dmap
Out[10]:
In [11]:
from explorer.plots import scattersky, multi_scattersky, FilterStream
multi_scattersky(data.ds.groupby('label'), filter_stream=filter_stream)
Out[11]: