In [1]:
import holoviews as hv
hv.notebook_extension('bokeh')
In [2]:
import dask
import dask.dataframe as dd
import pandas as pd
from distributed import Client
client = Client(scheduler_file='/scratch/tmorton/dask/scheduler.json')
In [3]:
import sys
sys.path.append('..')
from explorer.catalog import ParquetCatalog, MatchedCatalog
def get_coadd(rerun, filt, tracts, unforced=False):
table = 'unforced' if unforced else 'forced'
files = ['{}/plots/{}/tract-{}/{}.parq'.format(rerun, filt, t, table) for t in tracts]
return ParquetCatalog(files)
def get_visit(rerun, filt, tract, visit):
file = '{}/plots/{}/tract-{}/visit-{}/catalog.parq'.format(rerun, filt, tract, visit)
return ParquetCatalog([file])
In [4]:
rerun = '/scratch/tmorton/hscRerun/DM-12043/SSP_DEEP_XMM_LSS'
filt = 'HSC-Y'
tract = 8767
visit = 39308
coadd_cat = get_coadd(rerun, filt, [tract])
visit_cat = get_visit(rerun, filt, tract, visit)
matched = MatchedCatalog(coadd_cat, visit_cat)
In [5]:
matched.match()
In [8]:
matched.cat1.filenames
Out[8]:
In [9]:
matched.cat2.filenames
Out[9]:
In [10]:
import hashlib
In [12]:
h = hashlib.md5(open('/scratch/tmorton/hscRerun/DM-12043/SSP_DEEP_XMM_LSS/plots/HSC-Y/tract-8767/visit-39308/catalog.parq','rb').read())
In [14]:
h.update(open('/scratch/tmorton/hscRerun/DM-12043/SSP_DEEP_XMM_LSS/plots/HSC-Y/tract-8767/forced.parq','rb').read())
In [22]:
from functools import reduce
def get_md5(file):
return hashlib.md5(open(file, 'rb').read())
hs = [get_md5(f) for f in matched.cat1.filenames + matched.cat2.filenames]
In [34]:
files = matched.cat1.filenames + matched.cat2.filenames
h = get_md5(files[0])
for f in files[1:]:
h.update(open(f, 'rb').read())
In [43]:
from operator import add
all_str = reduce(lambda s1, s2: s1 + s2, [open(f, 'rb').read() for f in files])
all_str = reduce(add, [open(f, 'rb').read() for f in files])
hashlib.md5(all_str).hexdigest()
Out[43]:
In [37]:
h = hashlib.md5(open(files[0], 'rb').read() + open(files[1], 'rb').read())
In [38]:
h.hexdigest()
Out[38]:
In [33]:
h.hexdigest()
Out[33]:
In [ ]:
In [6]:
matched.match_inds
Out[6]:
In [7]:
matched.match_distance
Out[7]:
In [6]:
from explorer.functors import (Mag, MagDiff, CustomFunctor, DeconvolvedMoments, Column,
SdssTraceSize, PsfSdssTraceSizeDiff, HsmTraceSize,
PsfHsmTraceSizeDiff, CompositeFunctor)
f = MagDiff('modelfit_CModel', 'base_PsfFlux')
mag = Mag('base_PsfFlux')
In [7]:
from explorer.plots import multi_scattersky, FlagSetter, FilterStream
In [8]:
from explorer.dataset import QADataset
data = QADataset(matched, mag, flags=['calib_psfUsed', 'qaBad_flag'])
In [9]:
data.ds
Out[9]:
In [10]:
filter_stream = FilterStream()
plot = multi_scattersky(data.ds.groupby('label'), filter_stream=filter_stream)
In [11]:
plot
Out[11]:
In [12]:
import parambokeh
flag_setter = FlagSetter(filter_stream=filter_stream, flags=data.flags, bad_flags=data.flags)
parambokeh.Widgets(flag_setter, callback=flag_setter.event, push=False, on_init=True)
In [ ]:
matched
In [ ]: