In [1]:
import holoviews as hv
hv.notebook_extension('bokeh')
In [2]:
import dask
import dask.dataframe as dd
import pandas as pd
from distributed import Client
client = Client(scheduler_file='/scratch/tmorton/dask/scheduler.json')
In [3]:
import glob
import sys
sys.path.append('..')
from explorer.catalog import ParquetCatalog, MatchedCatalog
def get_files(rerun, filt, tracts, unforced=False):
table = 'unforced' if unforced else 'forced'
return ['{}/plots/{}/tract-{}/{}.parq'.format(rerun, filt, t, table) for t in tracts]
filt = 'HSC-Z'
rerun = '/scratch/tmorton/hscRerun/DM-10859'
cosmos_files = get_files(rerun, filt, tracts=[9813])
cosmos_files_unforced = get_files(rerun, filt, tracts=[9813], unforced=True)
wide_files = get_files(rerun, filt, tracts=[8766, 8767])
wide_files_unforced = get_files(rerun, filt, tracts=[8766, 8767], unforced=True)
cosmos = ParquetCatalog(cosmos_files, client=client)
cosmos_unforced = ParquetCatalog(cosmos_files_unforced, client=client)
wide = ParquetCatalog(wide_files, client=client)
wide_unforced = ParquetCatalog(wide_files_unforced, client=client)
In [4]:
import numpy as np
import fastparquet
def perturb_coords(df, scatter=0.01):
new_df = df.copy()
new_df.index = new_df.index.astype(int)*10
n = len(new_df)
r = np.random.randn(n)*scatter * 1./3600 * np.pi / 180
pa = np.random.random(n)*2*np.pi
dra = r*np.cos(pa)
ddec = r*np.sin(pa)
new_df.coord_ra += dra
new_df.coord_dec += ddec
return new_df
def write_test_cats():
wide1 = ParquetCatalog([wide_files[0]])
wide2 = ParquetCatalog([wide_files[1]])
df1 = wide1.df_all.sample(0.01).compute()
df2 = wide2.df_all.sample(0.01).compute()
fastparquet.write('test_data/8766/forced.parq', df1)
fastparquet.write('test_data/8767/forced.parq', df2)
xdf1 = perturb_coords(df1)
xdf2 = perturb_coords(df2)
fastparquet.write('test_data/8766_perturb/forced.parq', xdf1)
fastparquet.write('test_data/8767_perturb/forced.parq', xdf2)
In [5]:
# write_test_cats()
In [6]:
from explorer.catalog import MatchedCatalog
files1 = ['test_data/8766/forced.parq', 'test_data/8767/forced.parq']
files2 = ['test_data/8766_perturb/forced.parq', 'test_data/8767_perturb/forced.parq']
cat1 = ParquetCatalog(files1)
cat2 = ParquetCatalog(files2)
matched = MatchedCatalog(cat1, cat2)
In [7]:
from explorer.functors import (Mag, MagDiff, CustomFunctor, DeconvolvedMoments, Column,
SdssTraceSize, PsfSdssTraceSizeDiff, HsmTraceSize,
PsfHsmTraceSizeDiff, CompositeFunctor)
funcs = [Mag('base_PsfFlux'), MagDiff('modelfit_CModel', 'base_PsfFlux')]
f = CompositeFunctor({k:fn for k,fn in zip(['x', 'y'], funcs)})
In [8]:
from explorer.catalog import MatchedCatalog
cosmos_matched = MatchedCatalog(cosmos, cosmos_unforced)
wide_matched = MatchedCatalog(wide, wide_unforced)
In [9]:
%time wide_matched._match_cats()
In [10]:
from explorer.dataset import QADataset
matched_data = QADataset(wide_matched, funcs[0])
In [11]:
matched_data.df.head()
Out[11]:
In [12]:
matched_data.ds
Out[12]:
In [13]:
from explorer.plots import scattersky, multi_scattersky
multi_scattersky(matched_data.ds)
Out[13]:
In [13]:
matched_data.allfuncs
Out[13]:
In [17]:
dec = matched_data.allfuncs['dec'](wide_matched)
In [18]:
label = matched_data.allfuncs['label'](wide_matched)
In [19]:
ra = matched_data.allfuncs['ra'](wide_matched)
In [40]:
x = matched_data.allfuncs['x'](wide_matched)
In [23]:
y0 = matched_data.allfuncs['y0'](wide_matched)
In [24]:
d = {'dec':dec, 'label':label, 'ra':ra, 'x':x, 'y0':y0}
In [26]:
len(dec), len(label), len(ra), len(x), len(y0)
Out[26]:
In [28]:
dec.index
Out[28]:
In [29]:
ra.index
Out[29]:
In [30]:
label.index
Out[30]:
In [31]:
x.index
Out[31]:
In [32]:
y0.index
Out[32]:
In [36]:
x.name = 'x'
In [37]:
y0.name = 'y0'
In [42]:
x.index.duplicated().sum()
Out[42]:
In [38]:
df = pd.DataFrame({'x':x, 'y0':y0})
In [44]:
y = f(wide_matched)
In [45]:
y2 = f(wide)
In [46]:
y3 = f(wide_unforced)
In [47]:
len(y)
Out[47]:
In [48]:
len(y2)
Out[48]:
In [49]:
len(y3)
Out[49]:
In [50]:
len(wide_matched.match_inds1)
Out[50]:
In [51]:
len(wide_matched.match_inds2)
Out[51]:
In [52]:
z = f(wide_matched, dropna=False)
In [53]:
len(z)
Out[53]:
In [54]:
z2 = f(wide, dropna=False)
In [55]:
z3 = f(wide_unforced, dropna=False)
In [56]:
len(z2)
Out[56]:
In [57]:
len(z3)
Out[57]:
In [ ]: