In [1]:
import pandas as pd
import numpy as np
Read data from an Excel file:
In [2]:
xl = pd.ExcelFile("CSCEng.xls")
dfc = xl.parse("Sheet1")
dfc.columns
Out[2]:
In [3]:
dfc.head()
Out[3]:
We estimate the joint pdf of the two columns dfc['multiannual]'
, dfc['bachelor-th]'
, using a gaussian kernel:
In [4]:
import scipy.stats as st
def kde_scipy( vals1, vals2, (a,b), (c,d), N ):
#vals1, vals2 are the values of two variables
#(a,b) interval for vals1; usually larger than (np.min(vals1), np.max(vals1))
#(c,d) -"- vals2
x=np.linspace(a,b,N)
y=np.linspace(c,d,N)
X,Y=np.meshgrid(x,y)
positions = np.vstack([Y.ravel(), X.ravel()]) #X.ravel() concatenates the rows of X
values = np.vstack([vals1, vals2])
kernel = st.gaussian_kde(values)
Z = np.reshape(kernel(positions).T, X.shape)
return [x, y, Z]# return x, y, Z to be passed to Plotly for plotting the contour of joint pdf
In [5]:
a,b=(5,11) # joint pdf is evaluated at the N xN grid points of the square [a,b] x[a,b]
N=200
In [6]:
x=list(dfc['multiannual'])
y=list(dfc['bachelor-th'])
In [7]:
pdfx= st.gaussian_kde(x) #estimation of the pdfx from x-values
pdfy=st.gaussian_kde(y)
X=np.linspace(a, b, 100)
Y=pdfx(X)#evaluate the pdfx at X
yy=np.linspace(a, b, 100)
xx=pdfy(yy)# the pdfy is a function of y-variable
In [8]:
Xvals, Yvals, Zvals = kde_scipy( dfc['bachelor-th'],dfc['multiannual'], (a,b), (a,b), N )
#attn: here we reversed the columns order
Define Data and Layout for Plotly plot:
In [9]:
import plotly.plotly as py
from plotly.graph_objs import *
Set the text to be displayed when hovering the mouse over the contour plot of the joint pdf:
In [22]:
hover_xy=[
['f('+'{:0.2f}'.format(Xvals[j])+', '+'{:0.2f}'.format(Yvals[i])+')= '+'{:0.2f}'.format(Zvals[i][j])+')'
for j in range(len(Xvals))] for i in range(len(Yvals)) ]
In [11]:
hover_xy[62][57]
Out[11]:
Plotly version of the matplotlib
cmocean.salinity
colormap:
In [12]:
pl_salinity=[[0.0, 'rgb(41,24,107)'],
[0.05, 'rgb(45,27,137)'],
[0.1, 'rgb(40,39,162)'],
[0.15, 'rgb(24,61,158)'],
[0.2, 'rgb(12,77,150)'],
[0.25, 'rgb(15,91,144)'],
[0.3, 'rgb(24,102,140)'],
[0.35, 'rgb(35,113,138)'],
[0.4, 'rgb(44,124,136)'],
[0.45, 'rgb(52,135,136)'],
[0.5, 'rgb(59,147,135)'],
[0.55, 'rgb(66,158,132)'],
[0.6, 'rgb(74,169,128)'],
[0.65, 'rgb(85,181,122)'],
[0.7, 'rgb(100,193,113)'],
[0.75, 'rgb(122,203,102)'],
[0.8, 'rgb(148,211,93)'],
[0.85, 'rgb(179,217,94)'],
[0.9, 'rgb(208,224,109)'],
[0.95, 'rgb(232,231,131)'],
[1.0, 'rgb(253,238,153)']]
Define a Contour object:
In [13]:
trace1= Contour(
z=Zvals,
x=Xvals,
y=Yvals,
colorscale=pl_salinity,
showscale=False,
text=hover_xy,
hoverinfo='text',
contours=Contours(
showlines=False),
)
Set hover text for the two marginal pdfs:
In [14]:
textx=['(x,g(x))=('+'{:0.2f}'.format(X[i])+', '+'{:0.2f}'.format(Y[i])+')' for i in range(len(X))]
texty=['(y,h(y))=('+'{:0.2f}'.format(yy[i])+', '+'{:0.2f}'.format(xx[i])+')' for i in range(len(yy))]
In [23]:
trace2 = Scatter(# Scatter object for the marginal pdf g(x)
x=X,
y=Y,
name='pdf-x',
mode='lines',
fill='tozeroy',
fillcolor='rgb(122,203,102)',
line=Line(width=2, color='rgb(66,158,132)', shape='spline'),
xaxis='x1',
yaxis='y2',
text=textx,
hoverinfo='text',
)
trace3 = Scatter(# Scatter object for the marginal pdf h(y)
x=xx,
y=yy,
name='pdf-y',
mode='lines',
fill='tozerox',
fillcolor='rgb(122,203,102)',
line=Line(width=2, color='rgb(66,158,132)', shape='spline'),
text=texty,
hoverinfo='text',
xaxis='x2',
yaxis='y1'
)
In [24]:
data = Data([trace1, trace2, trace3])
Set the plot layout:
In [26]:
layout=Layout(title='Kernel Density Estimation',
autosize=False,
font=Font(size=11),
height=550,
showlegend=False,
width=650,
xaxis=XAxis(
showgrid=False,
domain=[0, 0.8],
range=[a, b],
title='x',
titlefont=Font(size=11),
zeroline=False,
tickvals=[6,7,8,9,10, 11]
),
xaxis2=XAxis(
domain=[0.82, 1],
showgrid=False,
zeroline=False,
side='top',
ticklen=4,
),
yaxis=YAxis(
domain=[0, 0.8],
range=[a, b],
showgrid=False,
title='y',
zeroline=False,
titlefont=Font(size=11),
),
yaxis2=YAxis(
domain=[0.82, 1],
showgrid=False,
zeroline=False,
ticklen=4,
),
margin=Margin(t=50),
hovermode='closest',
)
fig = Figure(data=data, layout=layout)
In [27]:
import plotly
plotly.offline.init_notebook_mode()
In [28]:
plotly.offline.iplot(fig)
In [29]:
from IPython.core.display import HTML
def css_styling():
styles = open("./custom.css", "r").read()
return HTML(styles)
css_styling()
Out[29]: