In [1]:
import numpy as np
import os
import pandas as pd
import seaborn as sns
import fitsio
from astrometry.util.fits import fits_table, merge_tables
from glob import glob
# to make this notebook's output stable across runs
np.random.seed(7)
# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
%load_ext autoreload
%autoreload 2
In [2]:
from obiwan.common import fits2pandas
In [ ]:
In [3]:
DATA_DIR = os.path.join(os.environ['HOME'],'Downloads',
'truth')
dr3_fns= glob(os.path.join(DATA_DIR,
'dr3.1/trimmed/decals-dr3.1-deep2-field*-trim.fits'))
dp2_fns= glob(os.path.join(DATA_DIR,
'dr3.1/trimmed/deep2-field*-trim.fits'))
dr3_fns,dp2_fns
Out[3]:
In [176]:
def stack_tables(fns):
cat=[]
assert(len(fns) > 0)
for fn in fns:
assert(os.path.exists(fn))
print('Stacking %s' % fn)
cat.append( fits_table(fn) )
return merge_tables(cat, columns='fillzero')
dr3= stack_tables(dr3_fns)
dp2= stack_tables(dp2_fns)
len(dr3),len(dp2)
Out[176]:
In [5]:
dr3.get_columns(), dp2.get_columns()
Out[5]:
In [125]:
set(dr3.type)
Out[125]:
In [177]:
grz_gt0= (np.all(dr3.decam_flux[:,[1,2,4]] > 0,axis=1) &
np.all(dr3.decam_flux_ivar[:,[1,2,4]] > 0,axis=1))
notCOMP= dr3.type != 'COMP'
len(dr3[grz_gt0]), len(dr3[~notCOMP])
Out[177]:
In [178]:
redshift_gt0= dp2.zhelio > 0
#keep= ((dp2.zhelio >= 0.6) &
# (dp2.zhelio <= 1.7) &
hasO2= ((dp2.oii_3727 >= 0.) &
(dp2.oii_3727_err > 0.))
hiO2= ((dp2.oii_3727 > 8.e-17) &
(dp2.oii_3727_err > 0.))
print(len(dp2[hasO2]),dp2[hasO2].zhelio.min(),dp2[hasO2].zhelio.max())
print(len(dp2[hiO2]),dp2[hiO2].zhelio.min(),dp2[hiO2].zhelio.max())
print(len(dp2[~hasO2]),dp2[~hasO2].zhelio.min(),dp2[~hasO2].zhelio.max())
_=plt.hist(dp2.zhelio[hasO2],bins=30,histtype='step',color='b',
label='hasO2',normed=True)
_=plt.hist(dp2.zhelio[~hasO2],bins=30,histtype='step',color='g',
label='not hasO2',normed=True)
In [156]:
set(dr3.type)
Out[156]:
In [179]:
fwhm_or_rhalf= np.zeros(len(dr3))-1 # arcsec
isPSF= dr3.type == 'PSF'
isEXP= dr3.type == 'EXP'
isSIMP= dr3.type == 'SIMP'
isDEV= dr3.type == 'DEV'
fwhm_or_rhalf[isPSF]= np.mean(dr3[isPSF].decam_psfsize[:,[1,2,4]],axis=1)
fwhm_or_rhalf[isSIMP]= 0.5
fwhm_or_rhalf[isEXP]= dr3[isEXP].shapeexp_r
fwhm_or_rhalf[isDEV]= dr3[isDEV].shapedev_r
dr3.set('fwhm_or_rhalf',fwhm_or_rhalf)
In [180]:
print(set(dr3.fwhm_or_rhalf[~notCOMP]))
_= plt.hist(dr3.fwhm_or_rhalf[(fwhm_or_rhalf < 5) & (notCOMP)])
In [181]:
#print(len(dr3[grz_gt0]),len(dr3[notCOMP]),len(dr3[fwhm_or_rhalf]), len(dr3[redshift_gt0]))
keep= ((grz_gt0) &
(notCOMP) &
(fwhm_or_rhalf < 5) &
(redshift_gt0))
# (hasO2))
dr3.cut(keep)
dp2.cut(keep)
len(dr3)
Out[181]:
In [182]:
_= plt.hist(dr3.fwhm_or_rhalf)
In [183]:
def flux2mag(nmgy):
return -2.5 * (np.log10(nmgy) - 9)
def deredden_flux_ivar(flux_ivar,ext):
return flux_ivar * np.power(ext,2)
In [184]:
d= {}
for i,b in zip([1,2,4],'grz'):
d[b]= flux2mag(dr3.get('decam_flux')[:,i]/dr3.get('decam_mw_transmission')[:,i])
#d[b+'_ivar']= flux2mag(dr3.get('decam_flux_ivar')[:,i]/dr3.get('decam_mw_transmission')[:,i])
d['redshift']= dp2.get('zhelio')
d['fwhm_or_rhalf']= dr3.fwhm_or_rhalf
d['type']= dr3.get('type')
d['oii']= dp2.oii_3727
d['oii_err']= dp2.oii_3727_err
df= pd.DataFrame(d)
In [185]:
df['type'].value_counts()/len(df)
Out[185]:
In [186]:
df.describe()
Out[186]:
In [187]:
df.hist(bins=20,figsize=(12,8))
Out[187]:
In [188]:
def get_xy_pad(slope,pad):
"""Returns dx,dy"""
theta= np.arctan(abs(slope))
return pad*np.sin(theta), pad*np.cos(theta)
def y1_line(rz,pad=None):
slope,yint= 1.15,-0.15
if pad:
dx,dy= get_xy_pad(slope,pad)
return slope*(rz+dx) + yint + dy
else:
return slope*rz + yint
def y2_line(rz,pad=None):
slope,yint= -1.2,1.6
if pad:
dx,dy= get_xy_pad(slope,pad)
return slope*(rz-dx) + yint + dy
else:
return slope*rz + yint
def get_ELG_box(rz,gr, pad=None):
"""
Args:
rz: r-z
gr: g-r
pad: magnitudes of padding to expand TS box
"""
x1,y1= rz,y1_line(rz)
x2,y2= rz,y2_line(rz)
x3,y3= np.array([0.3]*len(rz)),gr
if pad:
dx,dy= get_xy_pad(1.15,pad)
x1,y1= x1-dx,y1+dy
dx,dy= get_xy_pad(-1.2,pad)
x2,y2= x2+dx,y2+dy
x3 -= pad
return dict(x1=x1, y1=y1,
x2=x2, y2=y2,
x3=x3, y3=y3)
In [189]:
df['r-z']= df['r'] - df['z']
df['g-r']= df['g'] - df['r']
In [190]:
fig,ax=plt.subplots()
ax.scatter(df['r-z'],df['g-r'],alpha=0.5)
ax.set_xlabel('r-z')
ax.set_ylabel('g-r')
ax.set_ylim(-0.3,2)
ax.set_xlim(-0.6,2.2)
rz= np.linspace(ax.get_xlim()[0],ax.get_xlim()[1])
gr= np.linspace(ax.get_ylim()[0],ax.get_ylim()[1])
nopad= get_ELG_box(rz,gr)
pad= get_ELG_box(rz,gr,pad=0.5)
for d,color in zip([nopad,pad],['k','r']):
ax.plot(d['x1'],d['y1'],c=color,ls='--',lw=2)
ax.plot(d['x2'],d['y2'],c=color,ls='--',lw=2)
ax.plot(d['x3'],d['y3'],c=color,ls='--',lw=2)
In [298]:
def inSGC_cut(rz,gr):
return ((-0.068*rz + 0.457 < gr) &
(gr < 0.112*rz + 0.773) &
(0.218*gr + 0.571 < rz) &
(rz < -0.555*gr + 1.901))
In [ ]:
def get_xy_pad(slope,pad):
"""Returns dx,dy"""
theta= np.arctan(abs(slope))
return pad*np.sin(theta), pad*np.cos(theta)
def y1_line(rz,pad=None):
slope,yint= -0.068,0.457
if pad:
dx,dy= get_xy_pad(slope,pad)
return slope*(rz+dx) + yint + dy
else:
return slope*rz + yint
def y2_line(rz,pad=None):
slope,yint= 0.112,0.773
if pad:
dx,dy= get_xy_pad(slope,pad)
return slope*(rz-dx) + yint + dy
else:
return slope*rz + yint
def y3_line(gr,pad=None):
slope,yint= 0.218,0.571
if pad:
dx,dy= get_xy_pad(slope,pad)
return slope*(gr+dx) + yint + dy
else:
return slope*rz + yint
def y4_line(gr,pad=None):
slope,yint= -0.555,1.901
if pad:
dx,dy= get_xy_pad(slope,pad)
return slope*(gr-dx) + yint + dy
else:
return slope*rz + yint
def get_ELG_box(rz,gr, pad=None):
"""
Args:
rz: r-z
gr: g-r
pad: magnitudes of padding to expand TS box
"""
x1,y1= rz,y1_line(rz)
x2,y2= rz,y2_line(rz)
x3,y3= y3_line(gr),gr
x4,y4= y4_line(gr),gr
if pad:
dx,dy= get_xy_pad(1.15,pad)
x1,y1= x1-dx,y1+dy
dx,dy= get_xy_pad(-1.2,pad)
x2,y2= x2+dx,y2+dy
x3 -= pad
return dict(x1=x1, y1=y1,
x2=x2, y2=y2,
x3=x3, y3=y3)
In [299]:
inSGC= inSGC_cut(df['r-z'],df['g-r'])
fig,ax=plt.subplots()
ax.scatter(df.loc[inSGC,'r-z'], df.loc[inSGC,'g-r'],alpha=0.5)
#ax.plot(pad['x1'],pad['y1'],c=color,ls='--',lw=2)
#ax.plot(pad['x2'],pad['y2'],c=color,ls='--',lw=2)
#ax.plot(pad['x3'],pad['y3'],c=color,ls='--',lw=2)
ax.set_xlabel('r-z')
ax.set_ylabel('g-r')
ax.set_ylim(-0.3,2)
ax.set_xlim(-0.6,2.2)
Out[299]:
In [192]:
df[inBox].hist(bins=20,figsize=(12,8))
Out[192]:
In [ ]:
In [204]:
attrs= ['redshift','g','r','z','fwhm_or_rhalf']
hasO2= ((df['oii'] >= 0.) &
(df['oii_err'] > 0.))
hiO2= ((df['oii'] > 8.e-17) &
(df['oii_err'] > 0.))
# buffer of 0.2 redshift so MoG fit is improved
complDP2_buff= ((df['redshift'] >= 0.8-0.2) &
(df['redshift'] <= 1.4+0.2))
fig,ax= plt.subplots(2,3,figsize=(12,10))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(cols):
continue
_=ax[row,col].hist(df.loc[inBox,attrs[i]],histtype='step',normed=True,
color='b',label='inBox')
_=ax[row,col].hist(df.loc[complDP2_buff,attrs[i]],histtype='step',normed=True,
color='k',label='desiRed')
_=ax[row,col].hist(df[attrs[i]],histtype='step',normed=True,
color='r',label='All')
ax[row,col].set_xlabel(attrs[i])
if i == 0:
ax[row,col].legend()
In [205]:
df.columns
Out[205]:
In [206]:
cols=['fwhm_or_rhalf', 'g', 'r', 'redshift', 'z']
df.loc[complDP2_buff,cols].hist(bins=20,figsize=(12,8))
Out[206]:
In [210]:
# MoG fit
from sklearn import mixture
def my_mixture(X):
# Compute density via Gaussian Mixtures
# we'll try several numbers of clusters
n_components = np.arange(3, 16)
gmms = [mixture.GaussianMixture(n_components=n).fit(X)
for n in n_components]
BICs = [gmm.bic(X)/X.shape[0] for gmm in gmms]
i_min = np.argmin(BICs)
print("%d components" % n_components[i_min])
return gmms[i_min], n_components, BICs
In [211]:
np.argmin(np.arange(5,15))
Out[211]:
In [220]:
df_new= df.loc[(complDP2_buff) & (inBox),
['fwhm_or_rhalf', 'g-r', 'r-z','z','redshift']]
In [221]:
X= df_new.values
gmm, n_comp, BICs= my_mixture(X)
logprob= gmm.score_samples(X)
responsibilities = gmm.predict_proba(X)
In [222]:
plt.plot(n_comp,BICs)
Out[222]:
In [223]:
Xpred,Ypred= gmm.sample(10000)
Xpred.shape
Out[223]:
In [224]:
d={}
for i,col in enumerate(['fwhm_or_rhalf', 'g-r', 'r-z','z','redshift']):
d[col]= Xpred[:,i]
df_pred= pd.DataFrame(d)
In [238]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['r-z'],df_new['g-r'],color='b',alpha=0.05)
ax[1].scatter(df_pred['r-z'],df_pred['g-r'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('r-z')
ax[i].set_xlim(-0.5,2.5)
ax[i].set_ylim(-1.5,1.8)
ax[0].set_ylabel('g-r')
Out[238]:
In [226]:
fig,ax= plt.subplots(1,3,figsize=(14,5))
_,bins,_= ax[0].hist(df_new['z'],bins=20,histtype='step',color='b',normed=True)
_=ax[0].hist(df_pred['z'],bins=20,histtype='step',color='r',normed=True)
ax[0].set_xlabel('z mag')
_,bins,_= ax[1].hist(df_new['redshift'],bins=20,histtype='step',color='b',normed=True)
_=ax[1].hist(df_pred['redshift'],bins=20,histtype='step',color='r',normed=True)
ax[1].set_xlabel('redshift')
_,bins,_= ax[2].hist(df_new['fwhm_or_rhalf'],bins=20,histtype='step',color='b',normed=True)
_=ax[2].hist(df_pred['fwhm_or_rhalf'],bins=20,histtype='step',color='r',normed=True)
ax[2].set_xlabel('fwhm_or_rhalf')
print(df_new['fwhm_or_rhalf'].min())
In [237]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['z'],df_new['fwhm_or_rhalf'],color='b',alpha=0.05)
ax[1].scatter(df_pred['z'],df_pred['fwhm_or_rhalf'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('z mag')
ax[0].set_ylabel('fwhm_or_rhalf')
for i in range(2):
ax[i].set_xlim(19,26)
ax[i].set_ylim(-1,4.5)
In [236]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['z'],df_new['redshift'],color='b',alpha=0.05)
ax[1].scatter(df_pred['z'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('z mag')
ax[0].set_ylabel('redshift')
for i in range(2):
ax[i].set_xlim(19,26)
ax[i].set_ylim(0.5,1.7)
In [234]:
fig,ax= plt.subplots(2,2,figsize=(10,10))
ax[0,0].scatter(df_new['r-z'],df_new['redshift'],color='b',alpha=0.05)
ax[0,1].scatter(df_pred['r-z'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
ax[0,i].set_xlabel('r-z')
ax[0,i].set_xlim(-0.6,3)
ax[0,i].set_ylim(0.2,2)
ax[1,0].scatter(df_new['g-r'],df_new['redshift'],color='b',alpha=0.05)
ax[1,1].scatter(df_pred['g-r'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
ax[1,i].set_xlabel('g-r')
ax[1,i].set_xlim(-2,2)
ax[1,i].set_ylim(0.2,2)
for i in range(2):
ax[i,0].set_ylabel('redshift')
In [258]:
dr2_fns= glob(os.path.join(DATA_DIR,
'dr2/decals-dr2-deep2-field*.fits.gz'))
dp2_fns= glob(os.path.join(DATA_DIR,
'dr2/deep2-field*.fits.gz'))
print(dr2_fns,dp2_fns)
dr3= stack_tables(dr2_fns)
dp2= stack_tables(dp2_fns)
In [259]:
grz_gt0= (np.all(dr3.decam_flux[:,[1,2,4]] > 0,axis=1) &
np.all(dr3.decam_flux_ivar[:,[1,2,4]] > 0,axis=1))
notCOMP= dr3.type != 'COMP'
#redshift_gt0= dp2.zhelio > 0
complDP2_buff= ((dp2.zhelio >= 0.8-0.2) &
(dp2.zhelio <= 1.4+0.2))
fwhm_or_rhalf= np.zeros(len(dr3))-1 # arcsec
isPSF= np.char.strip(dr3.type) == 'PSF'
isEXP= np.char.strip(dr3.type) == 'EXP'
isSIMP= np.char.strip(dr3.type) == 'SIMP'
isDEV= np.char.strip(dr3.type) == 'DEV'
fwhm_or_rhalf[isPSF]= np.mean(dr3[isPSF].decam_psfsize[:,[1,2,4]],axis=1)
fwhm_or_rhalf[isSIMP]= 0.5
fwhm_or_rhalf[isEXP]= dr3[isEXP].shapeexp_r
fwhm_or_rhalf[isDEV]= dr3[isDEV].shapedev_r
dr3.set('fwhm_or_rhalf',fwhm_or_rhalf)
print(len(dr3),len(dp2))
print(len(dr3[grz_gt0]), len(dr3[~notCOMP]))
print(len(dr3[fwhm_or_rhalf < 5]),len(dr3[complDP2_buff]))
print(set(dr3.type))
print(pd.Series(dr3.type).value_counts()/len(dr3))
In [260]:
keep= ((grz_gt0) &
(notCOMP) &
(fwhm_or_rhalf < 5) &
(complDP2_buff))
dr3.cut(keep)
dp2.cut(keep)
len(dr3)
d= {}
for i,b in zip([1,2,4],'grz'):
d[b]= flux2mag(dr3.get('decam_flux')[:,i]/dr3.get('decam_mw_transmission')[:,i])
#d[b+'_ivar']= flux2mag(dr3.get('decam_flux_ivar')[:,i]/dr3.get('decam_mw_transmission')[:,i])
d['redshift']= dp2.get('zhelio')
d['fwhm_or_rhalf']= dr3.fwhm_or_rhalf
d['type']= dr3.get('type')
df= pd.DataFrame(d)
df['g-r']= df['g'] - df['r']
df['r-z']= df['r'] - df['z']
inBox= ((df['g-r'] <= y1_line(df['r-z'],pad=0.5)) &
(df['g-r'] <= y2_line(df['r-z'],pad=0.5)) &
(df['r-z'] >= 0.3 - 0.5))
In [261]:
fig,ax=plt.subplots()
ax.scatter(df.loc[inBox,'r-z'], df.loc[inBox,'g-r'],alpha=0.5)
ax.plot(pad['x1'],pad['y1'],c=color,ls='--',lw=2)
ax.plot(pad['x2'],pad['y2'],c=color,ls='--',lw=2)
ax.plot(pad['x3'],pad['y3'],c=color,ls='--',lw=2)
ax.set_xlabel('r-z')
ax.set_ylabel('g-r')
ax.set_ylim(-0.3,2)
ax.set_xlim(-0.6,2.2)
Out[261]:
In [262]:
attrs= ['redshift','g','r','z','fwhm_or_rhalf']
fig,ax= plt.subplots(2,3,figsize=(12,10))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(cols):
continue
_=ax[row,col].hist(df.loc[inBox,attrs[i]],histtype='step',normed=True,
color='b',label='inBox')
_=ax[row,col].hist(df[attrs[i]],histtype='step',normed=True,
color='r',label='All')
ax[row,col].set_xlabel(attrs[i])
if i == 0:
ax[row,col].legend()
In [263]:
cols=['fwhm_or_rhalf', 'g', 'r', 'redshift', 'z']
df.loc[inBox,cols].hist(bins=20,figsize=(12,8))
Out[263]:
In [264]:
fit_cols= ['fwhm_or_rhalf', 'g-r', 'r-z','z','redshift']
df_new= df.loc[inBox,fit_cols]
In [265]:
X= df_new.values
gmm, n_comp, BICs= my_mixture(X)
logprob= gmm.score_samples(X)
responsibilities = gmm.predict_proba(X)
plt.plot(n_comp,BICs)
Xpred,Ypred= gmm.sample(10000)
d={}
for i,col in enumerate(fit_cols):
d[col]= Xpred[:,i]
df_pred= pd.DataFrame(d)
In [267]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['r-z'],df_new['g-r'],color='b',alpha=0.05)
ax[1].scatter(df_pred['r-z'],df_pred['g-r'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('r-z')
ax[i].set_xlim(-0.5,2.5)
ax[i].set_ylim(-1,2)
ax[0].set_ylabel('g-r')
Out[267]:
In [268]:
fig,ax= plt.subplots(1,3,figsize=(14,5))
_,bins,_= ax[0].hist(df_new['z'],bins=20,histtype='step',color='b',normed=True)
_=ax[0].hist(df_pred['z'],bins=20,histtype='step',color='r',normed=True)
ax[0].set_xlabel('z mag')
_,bins,_= ax[1].hist(df_new['redshift'],bins=20,histtype='step',color='b',normed=True)
_=ax[1].hist(df_pred['redshift'],bins=20,histtype='step',color='r',normed=True)
ax[1].set_xlabel('redshift')
_,bins,_= ax[2].hist(df_new['fwhm_or_rhalf'],bins=20,histtype='step',color='b',normed=True)
_=ax[2].hist(df_pred['fwhm_or_rhalf'],bins=20,histtype='step',color='r',normed=True)
ax[2].set_xlabel('fwhm_or_rhalf')
print(df_new['fwhm_or_rhalf'].min())
In [269]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['z'],df_new['fwhm_or_rhalf'],color='b',alpha=0.05)
ax[1].scatter(df_pred['z'],df_pred['fwhm_or_rhalf'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('z mag')
ax[0].set_ylabel('fwhm_or_rhalf')
for i in range(2):
ax[i].set_xlim(19,26)
ax[i].set_ylim(-1,4.5)
In [270]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['z'],df_new['redshift'],color='b',alpha=0.05)
ax[1].scatter(df_pred['z'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('z mag')
ax[0].set_ylabel('redshift')
for i in range(2):
ax[i].set_xlim(19,26)
ax[i].set_ylim(0.5,1.7)
In [271]:
fig,ax= plt.subplots(2,2,figsize=(10,10))
ax[0,0].scatter(df_new['r-z'],df_new['redshift'],color='b',alpha=0.05)
ax[0,1].scatter(df_pred['r-z'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
ax[0,i].set_xlabel('r-z')
ax[0,i].set_xlim(-0.6,3)
ax[0,i].set_ylim(0.2,2)
ax[1,0].scatter(df_new['g-r'],df_new['redshift'],color='b',alpha=0.05)
ax[1,1].scatter(df_pred['g-r'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
ax[1,i].set_xlabel('g-r')
ax[1,i].set_xlim(-2,2)
ax[1,i].set_ylim(0.2,2)
for i in range(2):
ax[i,0].set_ylabel('redshift')
In [274]:
dr5_fns= glob(os.path.join(DATA_DIR,
'dr5.0/trimmed/decals-dr5.0-deep2-field*-trim.fits'))
dp2_fns= glob(os.path.join(DATA_DIR,
'dr5.0/trimmed/deep2-field*-trim.fits'))
print(dr5_fns,dp2_fns)
dr3= stack_tables(dr5_fns)
dp2= stack_tables(dp2_fns)
In [282]:
Out[282]:
In [284]:
grz_gt0= ((dr3.flux_g > 0) &
(dr3.flux_r > 0) &
(dr3.flux_z > 0) &
(dr3.flux_ivar_g > 0) &
(dr3.flux_ivar_r > 0) &
(dr3.flux_ivar_z > 0))
notCOMP= dr3.type != 'COMP'
#redshift_gt0= dp2.zhelio > 0
complDP2_buff= ((dp2.zhelio >= 0.8-0.2) &
(dp2.zhelio <= 1.4+0.2))
fwhm_or_rhalf= np.zeros(len(dr3))-1 # arcsec
isPSF= np.char.strip(dr3.type) == 'PSF'
isEXP= pd.Series(np.char.strip(dr3.type)).isin(['EXP','REX'])
isSIMP= np.char.strip(dr3.type) == 'SIMP'
isDEV= np.char.strip(dr3.type) == 'DEV'
fwhm_or_rhalf[isPSF]= np.mean(np.array([dr3[isPSF].psfsize_g,
dr3[isPSF].psfsize_r,
dr3[isPSF].psfsize_z]),axis=0)
fwhm_or_rhalf[isSIMP]= 0.5
fwhm_or_rhalf[isEXP]= dr3[isEXP].shapeexp_r
fwhm_or_rhalf[isDEV]= dr3[isDEV].shapedev_r
dr3.set('fwhm_or_rhalf',fwhm_or_rhalf)
print(len(dr3),len(dp2))
print(len(dr3[grz_gt0]), len(dr3[~notCOMP]))
print(len(dr3[fwhm_or_rhalf < 5]),len(dr3[complDP2_buff]))
print(set(dr3.type))
print(pd.Series(dr3.type).value_counts()/len(dr3))
In [ ]:
dr3.mw_transmission_g
In [286]:
keep= ((grz_gt0) &
(notCOMP) &
(fwhm_or_rhalf < 5) &
(complDP2_buff))
dr3.cut(keep)
dp2.cut(keep)
len(dr3)
d= {}
for i,b in zip([1,2,4],'grz'):
d[b]= flux2mag(dr3.get('flux_'+b)/dr3.get('mw_transmission_'+b))
#d[b+'_ivar']= flux2mag(dr3.get('decam_flux_ivar')[:,i]/dr3.get('decam_mw_transmission')[:,i])
d['redshift']= dp2.get('zhelio')
d['fwhm_or_rhalf']= dr3.fwhm_or_rhalf
d['type']= dr3.get('type')
df= pd.DataFrame(d)
df['g-r']= df['g'] - df['r']
df['r-z']= df['r'] - df['z']
inBox= ((df['g-r'] <= y1_line(df['r-z'],pad=0.5)) &
(df['g-r'] <= y2_line(df['r-z'],pad=0.5)) &
(df['r-z'] >= 0.3 - 0.5))
In [287]:
fig,ax=plt.subplots()
ax.scatter(df.loc[inBox,'r-z'], df.loc[inBox,'g-r'],alpha=0.5)
ax.plot(pad['x1'],pad['y1'],c=color,ls='--',lw=2)
ax.plot(pad['x2'],pad['y2'],c=color,ls='--',lw=2)
ax.plot(pad['x3'],pad['y3'],c=color,ls='--',lw=2)
ax.set_xlabel('r-z')
ax.set_ylabel('g-r')
ax.set_ylim(-0.3,2)
ax.set_xlim(-0.6,2.2)
Out[287]:
In [288]:
attrs= ['redshift','g','r','z','fwhm_or_rhalf']
fig,ax= plt.subplots(2,3,figsize=(12,10))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(cols):
continue
_=ax[row,col].hist(df.loc[inBox,attrs[i]],histtype='step',normed=True,
color='b',label='inBox')
_=ax[row,col].hist(df[attrs[i]],histtype='step',normed=True,
color='r',label='All')
ax[row,col].set_xlabel(attrs[i])
if i == 0:
ax[row,col].legend()
In [289]:
cols=['fwhm_or_rhalf', 'g', 'r', 'redshift', 'z']
df.loc[inBox,cols].hist(bins=20,figsize=(12,8))
Out[289]:
In [290]:
fit_cols= ['fwhm_or_rhalf', 'g-r', 'r-z','z','redshift']
df_new= df.loc[inBox,fit_cols]
In [291]:
X= df_new.values
gmm, n_comp, BICs= my_mixture(X)
logprob= gmm.score_samples(X)
responsibilities = gmm.predict_proba(X)
plt.plot(n_comp,BICs)
Xpred,Ypred= gmm.sample(10000)
d={}
for i,col in enumerate(fit_cols):
d[col]= Xpred[:,i]
df_pred= pd.DataFrame(d)
In [292]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['r-z'],df_new['g-r'],color='b',alpha=0.05)
ax[1].scatter(df_pred['r-z'],df_pred['g-r'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('r-z')
ax[i].set_xlim(-0.5,2.5)
ax[i].set_ylim(-1,2)
ax[0].set_ylabel('g-r')
Out[292]:
In [293]:
fig,ax= plt.subplots(1,3,figsize=(14,5))
_,bins,_= ax[0].hist(df_new['z'],bins=20,histtype='step',color='b',normed=True)
_=ax[0].hist(df_pred['z'],bins=20,histtype='step',color='r',normed=True)
ax[0].set_xlabel('z mag')
_,bins,_= ax[1].hist(df_new['redshift'],bins=20,histtype='step',color='b',normed=True)
_=ax[1].hist(df_pred['redshift'],bins=20,histtype='step',color='r',normed=True)
ax[1].set_xlabel('redshift')
_,bins,_= ax[2].hist(df_new['fwhm_or_rhalf'],bins=20,histtype='step',color='b',normed=True)
_=ax[2].hist(df_pred['fwhm_or_rhalf'],bins=20,histtype='step',color='r',normed=True)
ax[2].set_xlabel('fwhm_or_rhalf')
print(df_new['fwhm_or_rhalf'].min())
In [294]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['z'],df_new['fwhm_or_rhalf'],color='b',alpha=0.05)
ax[1].scatter(df_pred['z'],df_pred['fwhm_or_rhalf'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('z mag')
ax[0].set_ylabel('fwhm_or_rhalf')
for i in range(2):
ax[i].set_xlim(19,26)
ax[i].set_ylim(-1,4.5)
In [295]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['z'],df_new['redshift'],color='b',alpha=0.05)
ax[1].scatter(df_pred['z'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('z mag')
ax[0].set_ylabel('redshift')
for i in range(2):
ax[i].set_xlim(19,26)
ax[i].set_ylim(0.5,1.7)
In [296]:
fig,ax= plt.subplots(2,2,figsize=(10,10))
ax[0,0].scatter(df_new['r-z'],df_new['redshift'],color='b',alpha=0.05)
ax[0,1].scatter(df_pred['r-z'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
ax[0,i].set_xlabel('r-z')
ax[0,i].set_xlim(-0.6,3)
ax[0,i].set_ylim(0.2,2)
ax[1,0].scatter(df_new['g-r'],df_new['redshift'],color='b',alpha=0.05)
ax[1,1].scatter(df_pred['g-r'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
ax[1,i].set_xlabel('g-r')
ax[1,i].set_xlim(-2,2)
ax[1,i].set_ylim(0.2,2)
for i in range(2):
ax[i,0].set_ylabel('redshift')
In [263]:
from matplotlib.patches import Ellipse
def draw_ellipse(position, covariance, ax=None, **kwargs):
"""Draw an ellipse with a given position and covariance"""
ax = ax or plt.gca()
# Convert covariance to principal axes
if covariance.shape == (2, 2):
U, s, Vt = np.linalg.svd(covariance)
angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))
width, height = 2 * np.sqrt(s)
else:
angle = 0
width, height = 2 * np.sqrt(covariance)
# Draw the Ellipse
for nsig in range(1, 4):
ax.add_patch(Ellipse(position, nsig * width, nsig * height,
angle, **kwargs))
In [264]:
gmm.means_.shape, gmm.covariances_.shape, gmm.weights_.shape
Out[264]:
In [301]:
from scipy.stats import norm
def sum_gaussians(x,means,covs,wts):
a= np.array([wt * norm.pdf(x,loc=mu,scale=var**0.5)
for mu,var,wt in zip(means,covs,wts)])
print(a.shape)
return np.sum(a,axis=0)
In [214]:
a=np.array([np.arange(10) for i in range(5)])
np.sum(a,axis=0)
Out[214]:
In [302]:
_=plt.hist(df_new['redshift'],bins=20,histtype='step',color='b',normed=True)
_=plt.hist(df_pred['redshift'],bins=20,histtype='step',color='r',normed=True)
plt.xlabel('redshift')
i=-1
for mu,var,wt in zip(gmm.means_[:,-1],gmm.covariances_[:,-1,-1],
gmm.weights_):
x= np.linspace(mu-2*var**0.5,mu+2*var**0.5)
plt.plot(x,wt * norm.pdf(x,loc=mu,scale=var**0.5))
x= np.linspace(0.6,1.6,num=50)
plt.plot(x,sum_gaussians(x,gmm.means_[:,-1],gmm.covariances_[:,-1,-1],
gmm.weights_),
'k--')
# plt.plot(x,sum_gaussians(x,gmm.means_[:,-1],gmm.covariances_[:,-1,-1],
# new_wts),
# 'm--')
Out[302]:
In [229]:
new_wts= [wt + norm.cdf(0.8,loc=mu,scale=var**0.5) + norm.cdf(1.4,loc=mu,scale=var**0.5)
for mu,var,wt in zip(gmm.means_[:,-1],\
gmm.covariances_[:,-1,-1],\
gmm.weights_)]
new_wts= np.array(new_wts)/np.sum(new_wts)
# new_wts=[]
# for mu,var,wt in zip(gmm.means_[:,-1],
# gmm.covariances_[:,-1,-1],
# gmm.weights_):
# new_wts.append(wt + norm.cdf(0.8,loc=mu,scale=var**0.5))
In [193]:
gmm.predict_proba
In [204]:
mu,std= gmm.means_[:,-1][-2], gmm.covariances_[:,-1,-1][-2]**0.5
#x= np.linspace(mu-2*std,mu+2*std)
print(mu,std)
norm.cdf(0.8,loc=mu,scale=std)
Out[204]:
In [192]:
gmm.means_[:,-1][-2], gmm.weights_.sum()
Out[192]:
In [163]:
fig,ax= plt.subplots(1,3,figsize=(14,5))
_= ax[0].hist(df_new['z'],bins=20,histtype='step',color='b',normed=True)
_=ax[0].hist(df_pred.loc[inRng,'z'],bins=20,histtype='step',color='r',normed=True)
ax[0].set_xlabel('z mag')
_,bins,_= ax[1].hist(df_new['redshift'],bins=20,histtype='step',color='b',normed=True)
_=ax[1].hist(df_pred.loc[inRng,'redshift'],bins=20,histtype='step',color='r',normed=True)
ax[1].set_xlabel('redshift')
_,bins,_= ax[2].hist(df_new['fwhm_or_rhalf'],bins=20,histtype='step',color='b',normed=True)
_=ax[2].hist(df_pred.loc[inRng,'fwhm_or_rhalf'],bins=20,histtype='step',color='r',normed=True)
ax[2].set_xlabel('fwhm_or_rhalf')
Out[163]:
In [ ]:
fig,ax= plt.subplots()
_,bins,_= ax.hist(df_new['z'],bins=20,histtype='step',color='b',normed=True)
_=ax.hist(df_pred['z'],bins,histtype='step',color='r',normed=True)
In [306]:
len(df_pred['redshift']),len(set(df_pred['redshift']))
Out[306]:
In [309]:
# ID={}
# for i,z in enumerate(df_pred['redshift']):
# len(ID.keys())
Out[309]:
In [320]:
spatial.KDTree?
In [321]:
from scipy import spatial
tree = spatial.KDTree(df_pred['redshift'].values.reshape(-1,1))
In [337]:
_,ind= tree.query(np.linspace(0.8,1.4,num=100000).reshape(-1,1))
randoms= df_pred.loc[ind,:]
len(df_pred),len(ind),len(randoms)
Out[337]:
In [310]:
DATA_DIR = os.path.join(os.environ['HOME'],'Downloads',
'truth')
z_bins= np.loadtxt(os.path.join(DATA_DIR,'sanchez_kirkby_bins.txt'),
dtype=float)
pdf= np.loadtxt(os.path.join(DATA_DIR,'sanchez_kirkby_pdf.txt'),
dtype=float)
z_10k= fits_table(os.path.join(DATA_DIR,'sanchez_kirkby_z_rand10k.fits'))
In [32]:
z_bins,pdf
Out[32]:
In [62]:
plt.bar(left=z_bins[:-1],height=pdf,width=z_bins[1]-z_bins[0],fill=False)
_=plt.hist(z_10k.z_cosmo,bins=z_bins,histtype='step',color='b',normed=True)
In [43]:
np.vstack([(z_bins[1:]+z_bins[:-1])/2,
pdf]).T.shape
Out[43]:
In [7]:
mixture.GaussianMixture?
In [51]:
from sklearn.mixture import GMM
In [64]:
z_10k.z_cosmo.reshape(-1,1).shape
Out[64]:
In [57]:
np.random.seed(1)
gmm = GMM(3, n_iter=1)
gmm.means_ = np.array([[-1], [0], [3]])
gmm.covars_ = np.array([[1.5], [1], [0.5]]) ** 2
gmm.weights_ = np.array([0.3, 0.5, 0.2])
X = gmm.sample(1000)
print(X.shape)
plt.hist(X)
Out[57]:
In [58]:
lowest_bic = np.infty
bic = []
n_components_range = range(1, 10)
cv_types = ['spherical', 'tied', 'diag', 'full']
for cv_type in cv_types:
for n_components in n_components_range:
# Fit a Gaussian mixture with EM
gmm = mixture.GaussianMixture(n_components=n_components,
covariance_type=cv_type)
gmm.fit(X)
bic.append(gmm.bic(X))
if bic[-1] < lowest_bic:
lowest_bic = bic[-1]
best_gmm = gmm
bic = np.array(bic)
color_iter = itertools.cycle(['navy', 'turquoise', 'cornflowerblue',
'darkorange'])
clf = best_gmm
bars = []
# Plot the BIC scores
spl = plt.subplot(2, 1, 1)
for i, (cv_type, color) in enumerate(zip(cv_types, color_iter)):
xpos = np.array(n_components_range) + .2 * (i - 2)
bars.append(plt.bar(xpos, bic[i * len(n_components_range):
(i + 1) * len(n_components_range)],
width=.2, color=color))
plt.xticks(n_components_range)
plt.ylim([bic.min() * 1.01 - .01 * bic.max(), bic.max()])
plt.title('BIC score per model')
xpos = np.mod(bic.argmin(), len(n_components_range)) + .65 +\
.2 * np.floor(bic.argmin() / len(n_components_range))
plt.text(xpos, bic.min() * 0.97 + .03 * bic.max(), '*', fontsize=14)
spl.set_xlabel('Number of components')
spl.legend([b[0] for b in bars], cv_types)
# Plot the winner
splot = plt.subplot(2, 1, 2)
Y_ = clf.predict(X)
for i, (mean, cov, color) in enumerate(zip(clf.means_, clf.covariances_,
color_iter)):
v, w = linalg.eigh(cov)
if not np.any(Y_ == i):
continue
plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
# Plot an ellipse to show the Gaussian component
angle = np.arctan2(w[0][1], w[0][0])
angle = 180. * angle / np.pi # convert to degrees
v = 2. * np.sqrt(2.) * np.sqrt(v)
ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=color)
ell.set_clip_box(splot.bbox)
ell.set_alpha(.5)
splot.add_artist(ell)
In [67]:
X= z_10k.z_cosmo.reshape(-1,1)
# fit models with 1-10 components
N = np.arange(1, 11)
models = [None for i in range(len(N))]
for i in range(len(N)):
# Fit a Gaussian mixture with EM
GMM = mixture.GaussianMixture(n_components=in_components,
covariance_type=cv_type)
GMM.fit(X)
bic.append(GMM.bic(X))
if bic[-1] < lowest_bic:
lowest_bic = bic[-1]
best_gmm = gmm
models[i] = GMM(N[i]).fit(X)
# compute the AIC and the BIC
AIC = [m.aic(X) for m in models]
BIC = [m.bic(X) for m in models]
#------------------------------------------------------------
# Plot the results
# We'll use three panels:
# 1) data + best-fit mixture
# 2) AIC and BIC vs number of components
# 3) probability that a point came from each component
fig = plt.figure(figsize=(5, 1.7))
fig.subplots_adjust(left=0.12, right=0.97,
bottom=0.21, top=0.9, wspace=0.5)
# plot 1: data + best-fit mixture
ax = fig.add_subplot(131)
M_best = models[np.argmin(AIC)]
x = np.linspace(-6, 6, 1000)
logprob, responsibilities = M_best.eval(x)
pdf = np.exp(logprob)
pdf_individual = responsibilities * pdf[:, np.newaxis]
ax.hist(X, 30, normed=True, histtype='stepfilled', alpha=0.4)
ax.plot(x, pdf, '-k')
ax.plot(x, pdf_individual, '--k')
ax.text(0.04, 0.96, "Best-fit Mixture",
ha='left', va='top', transform=ax.transAxes)
ax.set_xlabel('$x$')
ax.set_ylabel('$p(x)$')
# plot 2: AIC and BIC
ax = fig.add_subplot(132)
ax.plot(N, AIC, '-k', label='AIC')
ax.plot(N, BIC, '--k', label='BIC')
ax.set_xlabel('n. components')
ax.set_ylabel('information criterion')
ax.legend(loc=2)
In [108]:
sns.distplot(pd.Series(z_10k.z_cosmo))
Out[108]:
In [142]:
hiO2= df['oii'] > 8.e-17
plt.hist(df.loc[hiO2,'redshift'])
Out[142]:
In [143]:
# Table 2.3 DESI Science Part 1
dN_dz_ddeg= np.array([309,2269,1923,2094,1441,1353,1337,523,466,329,126,0,0])
z= np.linspace(0.65,1.85,num=len(dN_dz_ddeg))
bin_width= z[1]-z[0]
zbins= np.linspace(z[0]-bin_width/2.,z[-1]+bin_width/2,num=len(z)+1)
len(dN_dz_ddeg),len(z),z,zbins
Out[143]:
In [144]:
plt.step(z,dN_dz_ddeg/10,where='post')
Out[144]:
In [116]:
plt.hist(dN_dz_ddeg,bins=zbins,histtype='step')
Out[116]:
In [117]:
plt.step?
In [24]:
from sklearn.mixture import GMM
In [25]:
gmm.predict?
In [9]:
from sklearn import mixture
mixture.GaussianMixture?
In [17]:
plt.plot(t.reshape(-1,1), true_pdf(t).reshape(-1,1),
':', color='black', zorder=3,
label="Generating Distribution")
#plt.show()
Out[17]:
In [27]:
gmm=gmms[0]
gmm.predict?
In [121]:
import numpy as np
from matplotlib import pyplot as plt
from scipy import stats
fig = plt.figure(figsize=(5, 5))
fig.subplots_adjust(bottom=0.08, top=0.95, right=0.95, hspace=0.1)
# Generate our data: a mix of several Cauchy distributions
# this is the same data used in the Bayesian Blocks figure
np.random.seed(0)
N = 10000
mu_gamma_f = [(5, 1.0, 0.1),
(7, 0.5, 0.5),
(9, 0.1, 0.1),
(12, 0.5, 0.2),
(14, 1.0, 0.1)]
true_pdf = lambda x: sum([f * stats.cauchy(mu, gamma).pdf(x)
for (mu, gamma, f) in mu_gamma_f]).reshape(-1,1)
x = np.concatenate([stats.cauchy(mu, gamma).rvs(int(f * N))
for (mu, gamma, f) in mu_gamma_f])
np.random.shuffle(x)
x = x[x > -10]
x = x[x < 30]
#plt.show()
#------------------------------------------------------------
# plot the results
N_values = (500, 5000)
subplots = (221, 223)
for N, subplot in zip(N_values, subplots):
ax = fig.add_subplot(subplot)
t = np.linspace(-10, 30, 1000).reshape(-1,1)
print(t.shape,true_pdf(t).shape)
ax.plot(t.reshape(-1,1), true_pdf(t).reshape(-1,1),
':', color='black', zorder=3,
label="Generating Distribution")
xN = x[:N].reshape(-1,1)
print(xN.shape)
# Compute density via Gaussian Mixtures
# we'll try several numbers of clusters
n_components = np.arange(3, 16)
gmms = [mixture.GaussianMixture(n_components=n).fit(xN)
for n in n_components]
BICs = [gmm.bic(xN)/xN.shape[0] for gmm in gmms]
i_min = np.argmin(BICs)
print(t.shape)
logprob= gmms[i_min].score_samples(t)
responsibilities = gmms[i_min].predict_proba(t)
# plot the results
ax.plot(xN, -0.005 * np.ones(len(xN)), '|k', lw=1.5)
ax.plot(t, np.exp(logprob), '-', color='gray',
label="Mixture Model\n(%i components)" % n_components[i_min])
# label the plot
ax.text(0.02, 0.95, "%i points" % N, ha='left', va='top',
transform=ax.transAxes)
ax.set_ylabel('$p(x)$')
ax.legend(loc='upper right')
if subplot == 223:
ax.set_xlabel('$x$')
if subplot == 221:
ax2 = fig.add_subplot(222)
else:
ax2 = fig.add_subplot(224)
ax2.plot(n_components,BICs)
ax2.set_xlabel('components')
ax2.set_ylabel('BIC')
ax.set_xlim(0, 20)
ax.set_ylim(-0.01, 0.4001)
plt.show()
In [59]:
bins.shape,h.shape
Out[59]:
In [62]:
np.histogram?
In [70]:
X= z_10k.z_cosmo[:1000]
h,bins,_=plt.hist(X,bins=np.linspace(X.min(),X.max(),num=25))
plt.plot((bins[1:]+bins[:-1])/2,h,'k--')
h,bins= np.histogram(X,bins)
plt.plot((bins[1:]+bins[:-1])/2,h,'r--')
print(h.shape,bins.shape,X.shape)
In [311]:
def my_mixture(X):
fig = plt.figure(figsize=(5, 5))
fig.subplots_adjust(bottom=0.08, top=0.95, right=0.95, hspace=0.1)
N_values = (5000,10000)
subplots = (221, 223)
t = np.linspace(X.min(),X.max(),num=25)
print(X.shape,t.shape)
pdf,bins= np.histogram(X,t,normed=True)
t= t.reshape(-1,1)
for N, subplot in zip(N_values, subplots):
ax = fig.add_subplot(subplot)
ax.plot((t[1:]+t[:-1])/2,pdf,'k:',
label="Generating Distribution")
xN = z_10k.z_cosmo[:N].reshape(-1,1)
print(xN.shape)
# Compute density via Gaussian Mixtures
# we'll try several numbers of clusters
n_components = np.arange(3, 16)
gmms = [mixture.GaussianMixture(n_components=n).fit(xN)
for n in n_components]
BICs = [gmm.bic(xN)/xN.shape[0] for gmm in gmms]
i_min = np.argmin(BICs)
print(t.shape)
logprob= gmms[i_min].score_samples(t)
responsibilities = gmms[i_min].predict_proba(t)
# plot the results
ax.plot(xN, -0.005 * np.ones(len(xN)), '|k', lw=1.5)
ax.plot(t, np.exp(logprob), '-', color='gray',
label="Mixture Model\n(%i components)" % n_components[i_min])
# label the plot
ax.text(0.02, 0.95, "%i points" % N, ha='left', va='top',
transform=ax.transAxes)
ax.set_ylabel('$p(x)$')
#ax.legend(loc='upper right')
if subplot == 222:
ax.set_xlabel('$x$')
if subplot == 221:
ax2 = fig.add_subplot(222)
else:
ax2 = fig.add_subplot(224)
ax2.plot(n_components,BICs)
ax2.set_xlabel('components')
ax2.set_ylabel('BIC')
#ax.set_xlim(0, 20)
#ax.set_ylim(-0.01, 0.4001)
plt.show()
In [312]:
my_mixture(z_10k.z_cosmo)
In [316]:
# Repeat for full Sanchez & Kirkby dataset
t= np.linspace(0.6,1.4,num=20)
logprob= gmms[i_min].score_samples(t.reshape(-1,1))
plt.plot(t, np.exp(logprob), '-', color='gray')
Out[316]:
In [317]:
gmm= gmms[i_min]
X,y= gmm.sample(10000)
In [318]:
plt.hist(X)
Out[318]:
In [ ]: