In [1]:
%connect_info


{
  "stdin_port": 45118, 
  "ip": "127.0.0.1", 
  "control_port": 40350, 
  "hb_port": 43491, 
  "signature_scheme": "hmac-sha256", 
  "key": "3d92b112-fb69-4f5e-ab13-1a917d31ddc0", 
  "shell_port": 60843, 
  "transport": "tcp", 
  "iopub_port": 40453
}

Paste the above JSON into a file, and connect with:
    $> ipython <app> --existing <file>
or, if you are local, you can connect with just:
    $> ipython <app> --existing /run/user/1000/jupyter/kernel-3ba9c064-918c-4aeb-965a-c1686d92ada9.json 
or even just:
    $> ipython <app> --existing 
if this is the most recent IPython session you have started.

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('photoz.csv',header=0)

In [4]:
df.describe()


Out[4]:
z_spec z_photo z_photo_wgt z_photo_err
count 6071.000000 6071.000000 6071 6071.000000
mean 0.490558 0.489772 1 0.067040
std 0.158013 0.110906 0 0.021950
min 0.005954 0.061783 1 0.017339
25% 0.415064 0.458071 1 0.055957
50% 0.512590 0.528519 1 0.066252
75% 0.584309 0.560872 1 0.076828
max 5.810223 0.663229 1 0.198344

In [5]:
from bokeh.io import output_notebook,show,hplot,vplot
from bokeh.plotting import figure
output_notebook()


BokehJS successfully loaded.

In [6]:
from bokeh.charts import Scatter,Histogram,defaults
defaults.width = 10
defaults.height = 10

In [7]:
def scatter(df,x,y,axis_min=None,axis_max=None):
    from bokeh.models import ColumnDataSource, Range1d
    from bokeh.models.glyphs import Line
    import math as m
    if axis_min is None:
        axis_min = min(df[x].min(),df[y].min())
    if axis_max is None:
        axis_max = max(df[x].max(),df[y].max())
    p = figure()
    ray_length = m.sqrt(2*(axis_max-axis_min)**2)
    p.ray(axis_min,axis_min,length=ray_length,length_units='data',angle=45,angle_units='deg',color='black',line_width=2)
    p.circle(x=df[x],y=df[y],alpha=0.5)
    p.x_range = Range1d(start=axis_min,end=axis_max)
    p.y_range = Range1d(start=axis_min,end=axis_max)
    return p

s = scatter(df,x='z_spec',y='z_photo',axis_min=0,axis_max=1)
h = Histogram(df['z_photo_err'])
p = hplot(s,h)

show(p)


Out[7]:
<bokeh.io._CommsHandle at 0x7fec373aeb90>

In [8]:
from bokeh.plotting import figure
f = figure()
import numpy as np
bins = np.linspace(0,1,50)

h,b = np.histogram(df['z_spec'],bins=bins,density=True)
f.quad(top=h,bottom=0,left=bins[:-1],right=bins[1:],fill_color="#036564",fill_alpha=0.5,legend="z-spec")

h,b = np.histogram(df['z_photo'],bins=bins,density=True)
h = h.tolist()
hh = h+h
hh[::2] = h
hh[1::2] = h
b = b.tolist()
bb = b[:-1]+b[1:]
bb.sort()
assert len(hh)==len(bb)
f.line(x=bb,y=hh,legend="photo-z",line_color="#D95B43",line_width=2)
_b = np.diff(bins)/2+bins[:-1]
f.circle(x=_b,y=h,size=9,line_color="#D95B43",line_width=2,fill_color="white",fill_alpha=1,legend='photo-z')

show(f)


Out[8]:
<bokeh.io._CommsHandle at 0x7fec3715f0d0>

Sigma photo-z

$ \sigma_f = \frac{\Delta z}{(1 + z_{spec})} ; \Delta z = z_{photo} - z_{spec} $


In [9]:
def dispersion(x,y):
    from bokeh.models import Range1d
    f = figure()
    x_min = 0
    x_max = x.max()
    f.ray(x=int(x_min),y=0,length=int(x_max+1),angle=0,line_color='black',line_width=2)
    f.circle(x=x,y=y,alpha=0.5)
    f.y_range = Range1d(start=-1,end=1)
    return f

dz = df['z_photo'] - df['z_spec']
sfz = dz/(1+df['z_spec'])

from bokeh.models import Range1d
f = figure()
x_min = 0
x_max = df['z_spec'].max()

f.ray(x=int(x_min),y=0,length=int(x_max+1),angle=0,line_color='black',line_width=2)

# deviation guide lines
yq1,yq2 = sfz.quantile([0.25,0.75])
f.ray(x=int(x_min),y=yq1,length=int(x_max+1),angle=0,line_color='orange',line_width=1)
f.ray(x=int(x_min),y=yq2,length=int(x_max+1),angle=0,line_color='orange',line_width=1)
f.ray(x=int(x_min),y=-0.15,length=int(x_max+1),angle=0,line_color='red',line_width=1)
f.ray(x=int(x_min),y=0.15,length=int(x_max+1),angle=0,line_color='red',line_width=1)

f.circle(x=df['z_spec'],y=sfz,alpha=0.5)

f.y_range = Range1d(start=-1,end=1)
show(f)


Out[9]:
<bokeh.io._CommsHandle at 0x7fec370bb990>

remove outliers from sigma

  • soq is cleaned based on quartils
  • sof is cleaned based on a fixed distance (0.15)

In [10]:
# Figure 1
soq_val1,soq_val2 = sfz.quantile([0.05,0.95])
soq_bool = (sfz>soq_val1) & (sfz<soq_val2)

from bokeh.models import Range1d
f1 = figure()
x_min = 0
x_max = df['z_spec'].max()

f1.ray(x=int(x_min),y=0,length=int(x_max+1),angle=0,line_color='black',line_width=2)

f1.circle(x=df['z_spec'].where(soq_bool),y=sfz.where(soq_bool),alpha=0.5)

f1.y_range = Range1d(start=-1,end=1)

# Figure 2
sof_bool = (sfz>-0.15) & (sfz<0.15)
from bokeh.models import Range1d
f2 = figure()
x_min = 0
x_max = df['z_spec'].max()
f2.ray(x=int(x_min),y=0,length=int(x_max+1),angle=0,line_color='black',line_width=2)
f2.circle(x=df['z_spec'].where(sof_bool),y=sfz.where(sof_bool),alpha=0.5)
f2.y_range = Range1d(start=-1,end=1)

show(hplot(f1,f2))


Out[10]:
<bokeh.io._CommsHandle at 0x7fec35629f50>

In [11]:
p = figure(x_range=(0,1),y_range=(0,1))
H,bx,by = np.histogram2d(df['z_spec'],df['z_photo'],bins=(bins,bins),normed=True)
h2n = H/H.sum()
N=bins.size-1
img = np.empty((N,N), dtype=np.uint32)
view = img.view(dtype=np.uint8).reshape((N, N, 4))
for i in range(N):
    for j in range(N):
        view[i, j, 0] = 255
        view[i, j, 1] = 255-int(H[i,j]*255)
        view[i, j, 2] = 255-int(H[i,j]*255)
        view[i, j, 3] = 255
p.image_rgba(image=[img],x=0,y=0,dw=1,dh=1)
show(p)


Out[11]:
<bokeh.io._CommsHandle at 0x7fec352e0e50>

In [12]:
%qtconsole

In [13]:
p = figure()
#bins = np.linspace(0,df['z_spec'].max(),50)
bins = np.linspace(0,1,50)
bb = bins[:-1]+np.diff(bins)/2
h2,bx,by = np.histogram2d(x=df['z_spec'],y=df['z_photo'],bins=bins)
hw = h2[np.where(h2>0)]
hwp = 10*np.log2((hw-hw.min())/(hw.max()-hw.min())+1)+5
yind,xind = np.where(h2>0)
ybins = bins[yind]
xbins = bins[xind]
dfp = pd.DataFrame({'xbins':xbins,'ybins':ybins,'scale':hwp})
p.square(y=dfp['xbins'],x=dfp['ybins'],size=dfp['scale'],alpha=0.5)
ph = Histogram(hwp)
#p.circle(y=df['z_photo'],x=df['z_spec'],size=1,color='black')
p.x_range = Range1d(start=0,end=1)
p.y_range = Range1d(start=0,end=1)
show(vplot(p,ph))


Out[13]:
<bokeh.io._CommsHandle at 0x7fec352d7750>

Kernel density estimate (KDE)


In [16]:
import scipy
from scipy.stats import gaussian_kde
m1 = df['z_spec'].values
m2 = df['z_photo'].values
xmin = m1.min()
xmax = m1.max()
ymin = m2.min()
ymax = m2.max()
X,Y = np.mgrid[xmin:xmax:100j,ymin:ymax:50j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([m1, m2])
kernel = gaussian_kde(values)
kernel_pos = kernel(positions).T
Z = np.reshape(kernel_pos, X.shape)

#import matplotlib.pyplot as plt
#fig, ax = plt.subplots()
#ax.imshow(np.rot90(Z), cmap=plt.cm.gist_earth_r,
#          extent=[xmin, xmax, ymin, ymax])
#ax.plot(m1, m2, 'k.', markersize=2)
#ax.set_xlim([xmin, xmax])
#ax.set_ylim([ymin, ymax])
#plt.show()
#

def hex_to_rgb(value):
    value = value.lstrip('#')
    lv = len(value)
    return tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3))

class RGBAColorMapper(object):
    """Maps floating point values to rgb values over a palette"""

    def __init__(self, low, high, palette):
        self.range = np.linspace(low, high, len(palette))
        self.r, self.g, self.b = np.array(zip(*[hex_to_rgb(i) for i in palette]))

    def color(self, data):
        """Maps your data values to the pallette with linear interpolation"""

        red = np.interp(data, self.range, self.r)
        blue = np.interp(data, self.range, self.b)
        green = np.interp(data, self.range, self.g)
        # Style plot to return a grey color when value is 'nan'
        red[np.isnan(red)] = 240
        blue[np.isnan(blue)] = 240
        green[np.isnan(green)] = 240
        colors = np.dstack([red.astype(np.uint8),
                          green.astype(np.uint8),
                          blue.astype(np.uint8),
                          np.full_like(data, 255, dtype=np.uint8)])
        return colors.view(dtype=np.uint32).reshape(data.shape)

from scipy.interpolate import griddata
XX,YY = np.mgrid[X.min():X.max():200j,Y.min():Y.max():100j]
ZZ = griddata(positions.T,kernel_pos,(XX,YY),method='cubic')

from bokeh.palettes import YlOrBr9 as pal
colormap = RGBAColorMapper(0, 30, pal)
img = colormap.color(Z.max()-Z.T)
img2 = colormap.color(ZZ.max()-ZZ.T)

i = figure()
i.image_rgba(image=[img],x=[0],y=[0],dw=[xmax],dh=[ymax])
i.x_range = Range1d(start=0,end=1)
i.y_range = Range1d(start=0,end=ymax)
i2 = figure()
i2.image_rgba(image=[img2],x=[0],y=[0],dw=[xmax],dh=[ymax])
i2.x_range = Range1d(start=0,end=1)
i2.y_range = Range1d(start=0,end=ymax)
show(vplot(i,i2))


Out[16]:
<bokeh.io._CommsHandle at 0x7fec23ea9f10>

In [17]:
m2b = np.digitize(m2,Y[0,:])-1
m1b = np.digitize(m1,X[:,0])-1
w = Z[m1b,m2b]
f = figure()
f.circle(x=m1,y=m2,size=w,color='red',alpha=0.5)
f.x_range = Range1d(start=0,end=1)
f.y_range = Range1d(start=0,end=1)
show(f)


Out[17]:
<bokeh.io._CommsHandle at 0x7fec23c1f710>

In [ ]: