In [1]:
%connect_info
In [2]:
import pandas as pd
In [3]:
df = pd.read_csv('photoz.csv',header=0)
In [4]:
df.describe()
Out[4]:
In [5]:
from bokeh.io import output_notebook,show,hplot,vplot
from bokeh.plotting import figure
output_notebook()
In [6]:
from bokeh.charts import Scatter,Histogram,defaults
defaults.width = 10
defaults.height = 10
In [7]:
def scatter(df,x,y,axis_min=None,axis_max=None):
from bokeh.models import ColumnDataSource, Range1d
from bokeh.models.glyphs import Line
import math as m
if axis_min is None:
axis_min = min(df[x].min(),df[y].min())
if axis_max is None:
axis_max = max(df[x].max(),df[y].max())
p = figure()
ray_length = m.sqrt(2*(axis_max-axis_min)**2)
p.ray(axis_min,axis_min,length=ray_length,length_units='data',angle=45,angle_units='deg',color='black',line_width=2)
p.circle(x=df[x],y=df[y],alpha=0.5)
p.x_range = Range1d(start=axis_min,end=axis_max)
p.y_range = Range1d(start=axis_min,end=axis_max)
return p
s = scatter(df,x='z_spec',y='z_photo',axis_min=0,axis_max=1)
h = Histogram(df['z_photo_err'])
p = hplot(s,h)
show(p)
Out[7]:
In [8]:
from bokeh.plotting import figure
f = figure()
import numpy as np
bins = np.linspace(0,1,50)
h,b = np.histogram(df['z_spec'],bins=bins,density=True)
f.quad(top=h,bottom=0,left=bins[:-1],right=bins[1:],fill_color="#036564",fill_alpha=0.5,legend="z-spec")
h,b = np.histogram(df['z_photo'],bins=bins,density=True)
h = h.tolist()
hh = h+h
hh[::2] = h
hh[1::2] = h
b = b.tolist()
bb = b[:-1]+b[1:]
bb.sort()
assert len(hh)==len(bb)
f.line(x=bb,y=hh,legend="photo-z",line_color="#D95B43",line_width=2)
_b = np.diff(bins)/2+bins[:-1]
f.circle(x=_b,y=h,size=9,line_color="#D95B43",line_width=2,fill_color="white",fill_alpha=1,legend='photo-z')
show(f)
Out[8]:
In [9]:
def dispersion(x,y):
from bokeh.models import Range1d
f = figure()
x_min = 0
x_max = x.max()
f.ray(x=int(x_min),y=0,length=int(x_max+1),angle=0,line_color='black',line_width=2)
f.circle(x=x,y=y,alpha=0.5)
f.y_range = Range1d(start=-1,end=1)
return f
dz = df['z_photo'] - df['z_spec']
sfz = dz/(1+df['z_spec'])
from bokeh.models import Range1d
f = figure()
x_min = 0
x_max = df['z_spec'].max()
f.ray(x=int(x_min),y=0,length=int(x_max+1),angle=0,line_color='black',line_width=2)
# deviation guide lines
yq1,yq2 = sfz.quantile([0.25,0.75])
f.ray(x=int(x_min),y=yq1,length=int(x_max+1),angle=0,line_color='orange',line_width=1)
f.ray(x=int(x_min),y=yq2,length=int(x_max+1),angle=0,line_color='orange',line_width=1)
f.ray(x=int(x_min),y=-0.15,length=int(x_max+1),angle=0,line_color='red',line_width=1)
f.ray(x=int(x_min),y=0.15,length=int(x_max+1),angle=0,line_color='red',line_width=1)
f.circle(x=df['z_spec'],y=sfz,alpha=0.5)
f.y_range = Range1d(start=-1,end=1)
show(f)
Out[9]:
In [10]:
# Figure 1
soq_val1,soq_val2 = sfz.quantile([0.05,0.95])
soq_bool = (sfz>soq_val1) & (sfz<soq_val2)
from bokeh.models import Range1d
f1 = figure()
x_min = 0
x_max = df['z_spec'].max()
f1.ray(x=int(x_min),y=0,length=int(x_max+1),angle=0,line_color='black',line_width=2)
f1.circle(x=df['z_spec'].where(soq_bool),y=sfz.where(soq_bool),alpha=0.5)
f1.y_range = Range1d(start=-1,end=1)
# Figure 2
sof_bool = (sfz>-0.15) & (sfz<0.15)
from bokeh.models import Range1d
f2 = figure()
x_min = 0
x_max = df['z_spec'].max()
f2.ray(x=int(x_min),y=0,length=int(x_max+1),angle=0,line_color='black',line_width=2)
f2.circle(x=df['z_spec'].where(sof_bool),y=sfz.where(sof_bool),alpha=0.5)
f2.y_range = Range1d(start=-1,end=1)
show(hplot(f1,f2))
Out[10]:
In [11]:
p = figure(x_range=(0,1),y_range=(0,1))
H,bx,by = np.histogram2d(df['z_spec'],df['z_photo'],bins=(bins,bins),normed=True)
h2n = H/H.sum()
N=bins.size-1
img = np.empty((N,N), dtype=np.uint32)
view = img.view(dtype=np.uint8).reshape((N, N, 4))
for i in range(N):
for j in range(N):
view[i, j, 0] = 255
view[i, j, 1] = 255-int(H[i,j]*255)
view[i, j, 2] = 255-int(H[i,j]*255)
view[i, j, 3] = 255
p.image_rgba(image=[img],x=0,y=0,dw=1,dh=1)
show(p)
Out[11]:
In [12]:
%qtconsole
In [13]:
p = figure()
#bins = np.linspace(0,df['z_spec'].max(),50)
bins = np.linspace(0,1,50)
bb = bins[:-1]+np.diff(bins)/2
h2,bx,by = np.histogram2d(x=df['z_spec'],y=df['z_photo'],bins=bins)
hw = h2[np.where(h2>0)]
hwp = 10*np.log2((hw-hw.min())/(hw.max()-hw.min())+1)+5
yind,xind = np.where(h2>0)
ybins = bins[yind]
xbins = bins[xind]
dfp = pd.DataFrame({'xbins':xbins,'ybins':ybins,'scale':hwp})
p.square(y=dfp['xbins'],x=dfp['ybins'],size=dfp['scale'],alpha=0.5)
ph = Histogram(hwp)
#p.circle(y=df['z_photo'],x=df['z_spec'],size=1,color='black')
p.x_range = Range1d(start=0,end=1)
p.y_range = Range1d(start=0,end=1)
show(vplot(p,ph))
Out[13]:
In [16]:
import scipy
from scipy.stats import gaussian_kde
m1 = df['z_spec'].values
m2 = df['z_photo'].values
xmin = m1.min()
xmax = m1.max()
ymin = m2.min()
ymax = m2.max()
X,Y = np.mgrid[xmin:xmax:100j,ymin:ymax:50j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([m1, m2])
kernel = gaussian_kde(values)
kernel_pos = kernel(positions).T
Z = np.reshape(kernel_pos, X.shape)
#import matplotlib.pyplot as plt
#fig, ax = plt.subplots()
#ax.imshow(np.rot90(Z), cmap=plt.cm.gist_earth_r,
# extent=[xmin, xmax, ymin, ymax])
#ax.plot(m1, m2, 'k.', markersize=2)
#ax.set_xlim([xmin, xmax])
#ax.set_ylim([ymin, ymax])
#plt.show()
#
def hex_to_rgb(value):
value = value.lstrip('#')
lv = len(value)
return tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3))
class RGBAColorMapper(object):
"""Maps floating point values to rgb values over a palette"""
def __init__(self, low, high, palette):
self.range = np.linspace(low, high, len(palette))
self.r, self.g, self.b = np.array(zip(*[hex_to_rgb(i) for i in palette]))
def color(self, data):
"""Maps your data values to the pallette with linear interpolation"""
red = np.interp(data, self.range, self.r)
blue = np.interp(data, self.range, self.b)
green = np.interp(data, self.range, self.g)
# Style plot to return a grey color when value is 'nan'
red[np.isnan(red)] = 240
blue[np.isnan(blue)] = 240
green[np.isnan(green)] = 240
colors = np.dstack([red.astype(np.uint8),
green.astype(np.uint8),
blue.astype(np.uint8),
np.full_like(data, 255, dtype=np.uint8)])
return colors.view(dtype=np.uint32).reshape(data.shape)
from scipy.interpolate import griddata
XX,YY = np.mgrid[X.min():X.max():200j,Y.min():Y.max():100j]
ZZ = griddata(positions.T,kernel_pos,(XX,YY),method='cubic')
from bokeh.palettes import YlOrBr9 as pal
colormap = RGBAColorMapper(0, 30, pal)
img = colormap.color(Z.max()-Z.T)
img2 = colormap.color(ZZ.max()-ZZ.T)
i = figure()
i.image_rgba(image=[img],x=[0],y=[0],dw=[xmax],dh=[ymax])
i.x_range = Range1d(start=0,end=1)
i.y_range = Range1d(start=0,end=ymax)
i2 = figure()
i2.image_rgba(image=[img2],x=[0],y=[0],dw=[xmax],dh=[ymax])
i2.x_range = Range1d(start=0,end=1)
i2.y_range = Range1d(start=0,end=ymax)
show(vplot(i,i2))