In [1]:
# Imports
import numpy as np
import pandas as pd
import xarray as xr
import hvplot.xarray
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
# Set default figure size on curves
opts.defaults(opts.Curve( height=400, width=900 ,show_grid=True))
(from [http://xarray.pydata.org/en/stable/data-structures.html#creating-a-dataset])
In [ ]:
temp = 15 + 8 * np.random.randn(2, 2, 3)
precip = 10 * np.random.rand(2, 2, 3)
lon = [[-99.83, -99.32], [-99.79, -99.23]]
lat = [[42.25, 42.21], [42.63, 42.59]]
# for real use cases, its good practice to supply array attributes such as
# units, but we won't bother here for the sake of brevity
ds = xr.Dataset({'temperature': (['x', 'y', 'time'], temp),
'precipitation': (['x', 'y', 'time'], precip)},
coords={'lon': (['x', 'y'], lon),
'lat': (['x', 'y'], lat),
'time': pd.date_range('2014-09-06', periods=3),
'reference_time': pd.Timestamp('2014-09-05')})
ds
In [ ]:
nt = 5
nx = 2
ny = 3
temp = np.zeros((nx,ny,nt))
precip = np.zeros((nx,ny,nt))
for t in range(nt):
for x in range(nx):
for y in range(ny):
temp[x,y,t] = 20000 + 100*x + 10*y + t/10
precip[x,y,t] = 15000 + 100*x + 10*y + t/10
lon = np.zeros((nx,ny))
lat = np.zeros((nx,ny))
for x in range(nx):
for y in range(ny):
lon[x,y] = 100 + x*10 + y/10
lat[x,y] = 200 + x*10 + y/10
ds = xr.Dataset({'temperature': (['x', 'y', 'time'], temp),
'precipitation': (['x', 'y', 'time'], precip)},
coords={'lon': (['x', 'y'], lon),
'lat': (['x', 'y'], lat),
'time': pd.date_range('2014-09-06', periods=nt),
'reference_time': pd.Timestamp('2014-09-05')})
ds
In [ ]:
ds.temperature
In [ ]:
ds.temperature.isel(time=0)
In [ ]:
ds.temperature.isel(time=1,y=2)
In [ ]:
ds.temperature.isel(time=0).to_dataframe()
In [3]:
nT = 4
nWL = 5
nVb1 = 3
param1 = np.zeros((nWL,nT,nVb1))
param2 = np.zeros((nWL,nT,nVb1))
for T in range(nT):
for wl in range(nWL):
for Vb1 in range(nVb1):
param1[wl,T,Vb1] = 20000 + 100*T + 10*wl + Vb1/10
param2[wl,T,Vb1] = 15000 + 100*T + 10*wl + Vb1/10
wl_range = 1550 + np.arange(nWL)
T_range = 10*np.arange(nT)
Vb1 = np.arange(nVb1)
ds = xr.Dataset({
'parameter1': (['wl', 'T', 'Vb1'], param1),
'parameter2': (['wl', 'T', 'Vb1'], param2)},
coords={
'wl': wl_range,
'T': T_range,
'Vb1': Vb1,
}
)
ds
Out[3]:
In [ ]:
ds.parameter1.sel(T=0,wl=1551).to_dataframe()
In [ ]:
ds2 = xr.Dataset({
'parameter1': (['wl', 'T', 'Vb1'], param1),
'parameter2': (['wl', 'T', 'Vb1'], param2),
'Vb1':(['Vb1'], Vb1),},
coords={
'wl': wl_range,
'T': T_range,
}
)
In [ ]:
ds2.parameter1.sel(T=0,wl=1551).to_dataframe()
hvplots adds the 'hvplot' method directly to the dataset for easy plotting
See hvplots [https://hvplot.pyviz.org/index.html]
Install with
conda install -c pyviz hvplot
In [4]:
ds.parameter1.sel(wl=1551,T=10).hvplot()
Out[4]:
In [8]:
ds.parameter1.sel(wl=1551,).hvplot(by='T')
Out[8]:
In [10]:
ds.parameter1.sel().hvplot(by=['T','wl'])
Out[10]:
Overlaying line and scatter plots to show the actual points
In [26]:
ds.parameter1.sel().hvplot(by=['T','wl'])*ds.parameter1.sel().hvplot.scatter(by=['T','wl'])
Out[26]:
In [8]:
ds.hvplot.scatter(x='parameter1',y='parameter2',by=['Vb1'])
Out[8]:
In [10]:
ds.sel(Vb1=ds.Vb1).hvplot.scatter(x='parameter1',y='parameter2',by=['wl'])
Out[10]:
See holoviews website [http://holoviews.org/user_guide/index.html]
Install using conda
conda install holoviews
In [ ]:
data = ds2.sel(T=0,wl=1551).to_dataframe()
scatter = hv.Curve(data, 'Vb1', 'parameter1')
scatter
In [27]:
ds.parameter1.sel(T=10,wl=1551).to_dataframe()
Out[27]:
In [28]:
hv.Overlay([hv.Curve(ds.parameter1.sel(T=TT,wl=1551).to_dataframe(), 'Vb1', 'parameter1',label='%.1f degC' % TT) for TT in ds.T.values])
Out[28]:
In [3]:
hv_ds = hv.Dataset(ds)
hv_ds
Out[3]:
In [7]:
hv.Scatter(hv_ds.select(wl=1551,T=20),'Vb1','parameter1')
Out[7]:
In [29]:
hv.Overlay([hv.Curve(hv_ds.select(wl=1551,T=T_degC),'Vb1','parameter1',label='%.1f degC' % T_degC) for T_degC in ds.T.values]).opts(legend_position='right',title='Plot vs Vb1 at different T')
Out[29]:
In [26]:
scatter = hv_ds.to(hv.Scatter, 'Vb1', 'parameter1',
groupby=['wl', 'T'], dynamic=False)
scatter
Out[26]:
In [24]:
curve= hv_ds.to(hv.Curve, 'Vb1', 'parameter1',
groupby=['wl', 'T'], dynamic=False)
curve
Out[24]:
In [ ]:
In [8]:
nT = 2
nWL = 3
nVb1 = 3
ds_list = []
dsT_list = []
for iT in range(nT):
# Create Datasets over wavelength
wl_list = []
for iWl in range(nWL):
wl_range = 1550 + iWl/10
T_range = 10*iT
Vb1_range = np.arange(nVb1)
param1 = np.zeros((1,1,nVb1))
param2 = np.zeros((1,1,nVb1))
for Vb1 in range(nVb1):
param1[0,0,Vb1] = 20000 + 100*iT + 10*iWl + Vb1/10
param2[0,0,Vb1] = 15000 + 100*iT + 10*iWl + Vb1/10
dss = xr.Dataset({
'parameter1': (['wl', 'T', 'Vb1'], param1),
'parameter2': (['wl', 'T', 'Vb1'], param2),
},
coords={
'wl': np.array([wl_range]),
'T': np.array([T_range]),
'Vb1': Vb1_range,
}
)
ds_list.append(dss)
wl_list.append(dss)
# Concatenate over wavelength for every temperature
dsT_list.append(xr.concat(wl_list,dim='wl'))
# Concatenate over Temperature
ds_wl_T = xr.concat(dsT_list,dim='T')
In [3]:
ds_wl_T
Out[3]:
In [9]:
ds_list[0]
Out[9]:
In [11]:
xr.merge(ds_list)
Out[11]:
In [3]:
nT = 4
nWL = 5
nVb1 = 3
param1 = np.zeros((nWL,nT,nVb1))
param2 = np.zeros((nWL,nT,nVb1))
for T in range(nT):
for wl in range(nWL):
for Vb1 in range(nVb1):
param1[wl,T,Vb1] = 20000 + 100*T + 10*wl + Vb1/10
param2[wl,T,Vb1] = 15000 + 100*T + 10*wl + Vb1/10
wl_range = 1550 + np.arange(nWL)
T_range = 10*np.arange(nT)
Vb1 = np.arange(nVb1)
ds = xr.Dataset({
'parameter1': (['wl', 'T', 'Vb1'], param1),
'parameter2': (['wl', 'T', 'Vb1'], param2)},
coords={
'wl': wl_range,
'T': T_range,
'Vb1': Vb1,
}
)
ds
Out[3]:
In [18]:
groups =[g for g in ds.parameter1.sel(wl=1550).to_dataframe().groupby('T')]
In [19]:
groups[0][1]
Out[19]:
In [22]:
groups[0][1].index
Out[22]:
In [27]:
parameter1_table = ds.parameter1.sel(wl=1550).to_pandas().transpose()
parameter1_table
Out[27]:
In [33]:
parameter1_table.columns.name
Out[33]:
In [34]:
parameter1_table.index.name
Out[34]:
In [35]:
parameter1_table[0]
Out[35]:
In [37]:
ds.parameter1.sel(wl=1550).shape
Out[37]:
In [4]:
nT = 4
nWL = 5
nVb1 = 3
param1 = np.zeros((nWL,nT,nVb1))
param2 = np.zeros((nWL,nT,nVb1))
for T in range(nT):
for wl in range(nWL):
for Vb1 in range(nVb1):
param1[wl,T,Vb1] = 20000 + 100*T + 10*wl + Vb1/10
param2[wl,T,Vb1] = 15000 + 100*T + 10*wl + Vb1/10
wl_range = 1550 + np.arange(nWL)
T_range = 10*np.arange(nT)
Vb1 = np.arange(nVb1)
ds = xr.Dataset({
'parameter1': (['wl', 'T', 'Vb1'], param1),
'parameter2': (['wl', 'T', 'Vb1'], param2)},
coords={
'wl': wl_range,
'T': T_range,
'Vb1': Vb1,
}
)
ds
Out[4]:
In [8]:
# Select parameter values
ds.parameter1.sel(T=10,wl=1550)
Out[8]:
In [11]:
# Select using loc
ds.parameter1.loc[dict(T=10,wl=1550)]
Out[11]:
In [10]:
# Assign new values using loc
ds.parameter1.loc[dict(T=10,wl=1550)] = np.array([1,2,3])
In [38]:
# Select subset of data but use expand_dims() to restore coordinates that were selected
# - default behaviour is to remove selected coordinates from all data vars
# - Note the .copy(deep=True), this is required to be able to change the values
ds_sub = ds.sel(T=10,wl=1550).expand_dims('wl').expand_dims('T').copy(deep=True)
ds_sub
Out[38]:
Two ways to assign new values to the subset
In [39]:
# Use T and wl coordinates
ds_sub.parameter1.loc[dict(T=10,wl=1550)] = np.array([10,20,30])
In [41]:
# Updated subset
ds_sub
Out[41]:
In [42]:
# Original
ds
Out[42]:
In [43]:
# Split out into sub datasets and re-merge
# - use expand_dims() to maintain same coordinates as original
ds_sub1 = ds.sel(T=0,wl=1550).expand_dims('wl').expand_dims('T')
ds_sub2 = ds.sel(T=10,wl=1550).expand_dims('wl').expand_dims('T')
ds_sub3 = ds.sel(T=20,wl=1550).expand_dims('wl').expand_dims('T')
xr.merge([ds_sub1,ds_sub2,ds_sub3])
Out[43]: