In [1]:
import numpy as np
import pandas as pd
In [2]:
import scipy as sp
import matplotlib.pyplot as plt
import os, sys
import cPickle as pickle
In [3]:
#connect echoRD Tools
pathdir='../' #path to echoRD
lib_path = os.path.abspath(pathdir)
sys.path.append(lib_path)
import vG_conv as vG
from hydro_tools import plotparticles_t,hydroprofile,plotparticles_specht
In [4]:
obs=pd.read_csv('brspecht.dat',delimiter='\t')
obs.index=np.arange(-0.05,-1.,-0.1)
plt.figure(figsize=(18,6))
plt.subplot(131)
plt.imshow(obs.iloc[:,:10],cmap='Blues')
plt.colorbar()
plt.title('Br- Tracer Recovery\nProfile 1')
plt.subplot(132)
plt.imshow(obs.iloc[:,10:],cmap='Blues')
plt.title('Br- Tracer Recovery\nProfile 2')
plt.colorbar()
plt.subplot(133)
plt.plot(obs.iloc[:,:10].mean(axis=1),obs.index,label='Profile 1',c='b')
plt.errorbar(obs.iloc[:,:10].mean(axis=1),obs.index,xerr=obs.iloc[:,:10].std(axis=1),c='b')
plt.plot(obs.iloc[:,10:].mean(axis=1),obs.index,label='Profile 2',c='r')
plt.errorbar(obs.iloc[:,10:].mean(axis=1),obs.index,xerr=obs.iloc[:,10:].std(axis=1),c='r')
plt.legend(loc=4)
plt.xlabel('c(Br-)')
plt.ylabel('depth [m]')
plt.title('Br- Tracer Recovery [mg]\nLateral Mean with Var over Depth')
plt.tight_layout()
In [5]:
#connect to echoRD
import run_echoRD as rE
#connect and load project
[dr,mc,mcp,pdyn,cinf,vG]=rE.loadconnect(pathdir='../',mcinif='mcini_specht4y')
mcp.mcpick_out(mc,'specht4y_x2.pickle')
mc.advectref='Shipitalo'
[mc,particles,npart]=dr.particle_setup(mc)
precTS=pd.read_csv(mc.precf, sep=',',skiprows=3)
In [6]:
mc.prects=False
[thS,npart]=pdyn.gridupdate_thS(particles.lat,particles.z,mc)
[A,B]=plotparticles_t(particles,thS/100.,mc,vG,store=True)
In [7]:
mc.prects=False
precTS.tstart=60
precTS.tend=60+2.3*3600
precTS.total=0.02543
precTS.intense=precTS.total/(precTS.tend-precTS.tstart)
#DEBUG ONLY:
precTS.intense*=10.
precTS
Out[7]:
In [8]:
#get all runs:
files=!ls picks/NechoRD_2D_Weiherbach_*L_TS.pick
In [9]:
runs=np.zeros(len(files)).astype(int)
for i in np.arange(len(files)):
runs[i]=int(files[i][28:32])
runs
Out[9]:
In [304]:
#create same reference like aaba for the 4 hypotheses
#1457=abbb
#2377=bbaa
runs_H=np.array(['abab','abaa','aaaa','aaab',
'abbb','abba','aaba','aabb',
'bbab','bbaa','baaa','baab',
'bbbb','bbba','baba','babb',
])
In [19]:
f = open(files[0],"rb")
[TS_store,TX_store,NP_store] = pickle.loads(pickle.load(f))
f.close()
TSstore=pickle.loads(TS_store)
TXstore=pickle.loads(TX_store)
NPstore=pickle.loads(NP_store)
TS=np.zeros(shape(TSstore)+(len(runs),))
TX=np.zeros(shape(TXstore)+(len(runs),))
NP=np.zeros(shape(NPstore)+(len(runs),))
TS[:,:,:,0]=pickle.loads(TS_store)
TX[:,:,0]=pickle.loads(TX_store)
NP[:,:,0]=pickle.loads(NP_store)
for i in np.arange(len(files))[1:]:
#unpickle TSstore array
f = open(files[i],"rb")
[TS_store,TX_store,NP_store] = pickle.loads(pickle.load(f))
f.close()
TS[:,:,:,i]=pickle.loads(TS_store)
TX[:,:,i]=pickle.loads(TX_store)
NP[:,:,i]=pickle.loads(NP_store)
In [134]:
import scipy.ndimage as spn
t_inst=[1,6,11,21,41,61,121,320,961]
for it in np.arange(len(runs)):
plt.figure(figsize=(18,6))
allplt=191
for i in np.arange(9):
k=allplt+i
plt.subplot(k)
plt.imshow(spn.filters.median_filter(TS[t_inst[i],:,:,it],size=mc.smooth),vmin=0., vmax=100., cmap='Blues',origin='upper')
plt.title('t='+str(t_inst[i]-1)+'min',fontsize=16)
if i==0:
plt.text(-12,70,''.join(['RUN ',runs_H[it],' (',str(runs[it]),')']),rotation='vertical',fontsize=16)
plt.tight_layout()
In [58]:
fig, ((ax1, ax2, ax3, ax4), (ax5, ax6, ax7, ax8), (ax9, ax10, ax11, ax12), (ax13, ax14, ax15, ax16)) = plt.subplots(nrows=4, ncols=4, figsize=(6,18))
def im_plot(ax,it,xlab_show=True,ylab_show=True):
if (it==13) | (it==14):
ax.imshow(spn.filters.median_filter(TS[60,:,:,it],size=mc.smooth),vmin=0., vmax=100., cmap='Blues',origin='upper')
else:
ax.imshow(spn.filters.median_filter(TS[121,:,:,it],size=mc.smooth),vmin=0., vmax=100., cmap='Blues',origin='upper')
# decorate axes
ax.set_xlim((0,34))
ax.set_ylim((100,0))
ax.set_xticks([0,10,20,30])
ax.set_yticks([100,80,60,40,20,0])
if xlab_show:
ax.set_xticklabels([0,0.1,0.2,0.3])
else:
ax.set_xticklabels([])
if ylab_show:
ax.set_yticklabels([-1.0,-0.8,-0.6,-0.4,-0.2,0])
else:
ax.set_yticklabels([])
#ax1.text(40,-5,'width [m]')
#ax.text(-4,85,'depth [m]',rotation='vertical')
#ax.set_title(''.join(['RUN ',str(runs_H[it])]))
if (it==13) | (it==14):
ax.text(5,78,'t = 60 min',color='white',fontsize=12)
ax.text(5,85,'Hypotheses',color='white',fontsize=12)
ax.text(5,94,runs_H[it],color='white',fontsize=25)
im_plot(ax1,0,False)
ax1.text(4,-15,'Weiherbach Simulation after 2h',fontsize=16)
im_plot(ax2,1,False,False)
im_plot(ax3,2,False,False)
im_plot(ax4,3,False,False)
im_plot(ax5,4,False)
im_plot(ax6,5,False,False)
im_plot(ax7,6,False,False)
im_plot(ax8,7,False,False)
im_plot(ax9,8,False)
im_plot(ax10,9,False,False)
im_plot(ax11,10,False,False)
im_plot(ax12,11,False,False)
im_plot(ax13,12)
im_plot(ax14,13,True,False)
im_plot(ax15,14,True,False)
im_plot(ax16,15,True,False)
plt.tight_layout()
plt.savefig('model_hypotheses_compare.pdf', bbox_inches="tight")
In [305]:
last_entry=np.zeros(16)
for i in range(16):
last_entry[i]=np.where(TS[:,0,0,i]==0.)[0][0]-1
last_entry
Out[305]:
In [306]:
t_check=321.
last_entry[last_entry>t_check]=t_check
last_entry=last_entry.astype(int)
In [307]:
last_entry
Out[307]:
In [308]:
runs_H
Out[308]:
In [309]:
#sim=pd.DataFrame(NP[1441,:,:]*precTS.conc.values*mc.particlemass*1000.,np.linspace(0.,mc.soildepth,len(NP[0,:,it])))
sim=pd.DataFrame(NP[t_check,:,:]*precTS.conc.values*mc.particlemass*1000.,np.linspace(0.,mc.soildepth,len(NP[0,:,it])))
sim.columns=runs_H
#hydroprofile(obs,sim,fsize=(8, 12),xbound=[0.,0.04,3],ybound=[-1.,0.,5],ptitle='Bromid Recovery\nSimulation after 24h\nObservation after 24h\n',xlab='tracer recovery [mg]')
In [310]:
#get index of shorter TS
idx=np.where(last_entry<t_check)[0]
In [311]:
k=0
for i in runs_H[last_entry<t_check]:
sim[i]=pd.DataFrame(NP[last_entry[idx[k]]-1,:,idx[k]]*precTS.conc.values*mc.particlemass*1000.,np.linspace(0.,mc.soildepth,len(NP[0,:,it])))
k+=1
In [312]:
#define GOF function
def GOF(o,s):
import errlib as EL #errorlib as python file in folder
import scipy.stats as sps
#Goodness of Fit Measures
#Simple Version without NA handling!
gof_val=np.array(EL.KGE(o,s))
#kge: Kling-Gupta Efficiency
#cc: correlation
#alpha: ratio of the standard deviation
#beta: ratio of the mean
gof_val=np.append(gof_val,EL.L(o,s))
#L: Likelihood
gof_val=np.append(gof_val,EL.NS(o,s))
#NSE: Nash Sutcliffe efficient coefficient
gof_val=np.append(gof_val,EL.mae(o,s))
#mae: Mean Absolute Error
gof_val=np.append(gof_val,EL.rmse(o,s))
#rmse: Root Mean Squared Error
gof_val=np.append(gof_val,np.array(sps.spearmanr(s,o, axis=None)))
#spear: spearman rank correlation
#p_spear: p-value
gof_val=np.append(gof_val,np.array(sps.kendalltau(s,o)))
#tau: kendalls tau
#p_kend: p-value
gof_val=np.append(gof_val,np.array(sps.pearsonr(s,o)))
#pear: Pearson correlation coefficient
#p_pear: p-value
param_names=['KGE','cor','alpha','beta','L','NSE','mae','rmse','spearman','p_spear','kendall','p_kend','pearson','p_pear']
return pd.DataFrame(data=gof_val,index=param_names,dtype=float64).T
In [313]:
#establish matrix of values to compare
obs.index=[-0.05, -0.15, -0.25, -0.35, -0.45, -0.55, -0.65, -0.75, -0.85, -0.95]
#dummyL=pd.DataFrame(sim.iloc[:,1].copy())
dummyL=pd.DataFrame(sim.copy())
dummyS=pd.DataFrame(obs.mean(axis=1),columns=['obs'])
dummy=pd.concat([dummyS,dummyL],axis=1)
In [314]:
#run comparison
gof_mx=GOF(dummy.obs.values,dummy.iloc[:,1].interpolate().values)
for i in np.arange(len(runs)-1):
gof_mx=pd.concat([gof_mx,GOF(dummy.obs.values,dummy.iloc[:,i+2].interpolate().values)])
gof_mx.index=runs_H2
gof_mx
Out[314]:
In [315]:
gof_mx[['KGE','spearman','NSE','rmse']]
Out[315]:
In [256]:
gof_mx['KGE'][gof_mx['KGE'][-isnan(gof_mx['KGE'])].argsort().values][-5:]
Out[256]:
In [259]:
gof_mx['spearman'][(-gof_mx['spearman'][-isnan(gof_mx['spearman'])]).argsort().values][-5:]
Out[259]:
In [260]:
gof_mx['NSE'][gof_mx['NSE'][-isnan(gof_mx['NSE'])].argsort().values][-5:]
Out[260]:
In [261]:
gof_mx['rmse'][(-gof_mx['rmse'][-isnan(gof_mx['rmse'])]).argsort().values][-5:]
Out[261]:
In [316]:
#from these top-scores formulate criterion
goodfit=(gof_mx['KGE']>0.7) & (gof_mx['NSE']>0.7) & (-gof_mx['spearman']>0.3) & (gof_mx['rmse']<0.0018)
#manually add apparently well-fitting (although not scoring in all categories)
goodfit.loc['baaa']=True
In [317]:
#alternative approach:
obs_d=obs.T.describe()
lb=obs_d.loc['mean',:]-obs_d.loc['std',:]
ub=obs_d.loc['mean',:]+obs_d.loc['std',:]
o_bounds=pd.DataFrame([lb,ub]).T
o_bounds.columns=['lowb','upb']
In [318]:
dummyL=pd.DataFrame(sim.copy())
dummyX=pd.concat([o_bounds,dummyL],axis=1)
dummyX=dummyX.interpolate()
In [320]:
#OUT OF ERROR MARGINS:
out_error=np.zeros(len(runs))
for i in np.arange(len(runs)):
out_error[i]=np.sum((dummyX.iloc[:,2+i]<dummyX.lowb) | (dummyX.iloc[:,2+i]>dummyX.upb))
out_error_result=pd.DataFrame(out_error.T,index=runs).T
out_error_result<=len(dummyX.upb)/6
Out[320]:
In [321]:
#adapt plot function
def hydroprofile(obs,obs2=None,fsize=(6, 6),xbound=[0.,1.],ybound=[0.,1.],ptitle='Plot',xlab='feature',saving=False,ccontrast=False,colors=None,compress=True,compress2=False,grey=False):
'''
This is a rather simple function to plot hydrological data (profile) of a pandas data frame.
It is based on some excellent examples by Randy Olson and may need heavy adaption to your data.
(CC BY-NC-SA 4.0) jackisch@kit.edu
obs: dataframe to plot, index is y axis
mlab: labels for columns
mlabcols: columns to plot
fsize: Provide figure size as tuple.
saving: Provide a file name if you want it saving.
XXbound: Give bounds of axis and separations.
ptitle: Plot title
ccontrast: option to only use high contrast values from palette
colors: option to pass own colors
compress: compress obs to mean with errorbar
compress2: compress also simulations to mean with errorbar
'''
# These are the "Tableau 20" colors as RGB.
tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),
(44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),
(148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),
(227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
(188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
# Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.
for i in range(len(tableau20)):
r, g, b = tableau20[i]
tableau20[i] = (r / 255., g / 255., b / 255.)
if colors is None:
colors=tableau20
plt.figure(figsize=fsize)
# Remove the plot frame lines. They are unnecessary chartjunk.
ax = plt.subplot(111)
ax.spines["top"].set_visible(True)
ax.spines["bottom"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(True)
# Ensure that the axis ticks only show up on the bottom and left of the plot.
# Ticks on the right and top of the plot are generally unnecessary chartjunk.
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
# Limit the range of the plot to only where the data is.
# Avoid unnecessary whitespace.
plt.xlim(xbound[0], xbound[1])
plt.ylim(ybound[0], ybound[1])
# Make sure your axis ticks are large enough to be easily read.
# You don't want your viewers squinting to read your plot.
plt.yticks(np.linspace(ybound[0], ybound[1], ybound[2]), [str(x) for x in np.linspace(ybound[0], ybound[1], ybound[2])], fontsize=14)
plt.xticks(np.linspace(xbound[0], xbound[1], xbound[2]), [str(x) for x in np.linspace(xbound[0], xbound[1], xbound[2])],fontsize=14)
plt.ylabel('depth [m]')
plt.xlabel(xlab)
ax.xaxis.grid(True)
ax.yaxis.grid(True)
# Now that the plot is prepared, it's time to actually plot the data!
# Note that I plotted the majors in order of the highest % in the final year.
if compress:
plt.plot(obs.mean(axis=1).values,obs.index, lw=2., color=colors[0], label='observed')
plt.fill_betweenx(np.array(obs.index,dtype=float),(obs.mean(axis=1)-obs.std(axis=1)).values,(obs.mean(axis=1)+obs.std(axis=1)).values, color=colors[0],alpha=0.5)
offset=2
if (obs2 is not None):
if compress2:
plt.plot(obs2.mean(axis=1).values,obs2.index, lw=2., color=colors[2], label='simulated')
plt.errorbar(obs2.mean(axis=1).values,obs2.index,xerr=obs2.std(axis=1).values, lw=1., color=colors[2])
else:
for rank in range(len(obs2.columns)):
if any(grey):
if grey[rank]:
#colorx=(240. / 255., 240. / 255., 240. / 255.)
alphax=0.3
lsx='--'
else:
#colorx=colors[rank+offset]
alphax=1.
lsx='-'
else:
#colorx=colors[rank+offset]
alphax=1.
lsx='-'
#plt.plot(obs2.iloc[:,rank].values,obs2.index, lw=2., color=colorx, label=str(obs2.columns[rank]))
plt.plot(obs2.iloc[:,rank].values,obs2.index, lw=2., color=colors[rank+offset], alpha=alphax, ls=lsx, label=str(obs2.columns[rank]))
else:
for rank in range(len(obs.columns)):
# Plot each line separately with its own color, using the Tableau 20
# color set in order.
if ccontrast:
plt.plot(obs.iloc[:,rank].values,obs.index, lw=2., color=colors[rank*2], label=str(obs.columns[rank]))
else:
plt.plot(obs.iloc[:,rank].values,obs.index, lw=1., color=colors[rank], label=str(obs.columns[rank]))
offset=len(obs.columns)
if obs2 is not None:
for rank in range(len(obs2.columns)):
# Plot each line separately with its own color, using the Tableau 20
# color set in order.
if ccontrast:
plt.plot(obs2.iloc[:,rank].values,obs2.index, lw=2., color=colors[rank*2+offset], label=str(obs2.columns[rank]))
else:
plt.plot(obs2.iloc[:,rank].values,obs2.index, lw=2., color=colors[rank+offset], label=str(obs2.columns[rank]))
plt.title(ptitle,fontsize=16)
plt.legend(loc=4,frameon=False,ncol=int(np.ceil(len(obs2.columns)/8.)))
if saving!=False:
plt.savefig(saving, bbox_inches="tight")
In [322]:
hydroprofile(obs,sim,fsize=(5, 8),xbound=[0.,0.03,3],ybound=[-1.,0.,5],ptitle='Bromid Recovery\nSimulation after 5h\nObservation after 24h\n',xlab='tracer recovery [mg]',grey=-goodfit.values)
In [ ]: