In [110]:
from keras.models import load_model
import pandas as pd
import keras.backend as K
from keras.callbacks import LearningRateScheduler
from keras.callbacks import Callback
import math
import numpy as np
import matplotlib.pyplot as plt
def coeff_r2(y_true, y_pred):
from keras import backend as K
SS_res = K.sum(K.square( y_true-y_pred ))
SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
return ( 1 - SS_res/(SS_tot + K.epsilon()) )
In [2]:
model = load_model('./FPV_ANN_tabulated_Standard_4Res_500n.H5')
# model = load_model('../tmp/calc_100_3_3_cbrt.h5', custom_objects={'coeff_r2':coeff_r2})
model.summary()
In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
class data_scaler(object):
def __init__(self):
self.norm = None
self.norm_1 = None
self.std = None
self.case = None
self.scale = 1
self.bias = 1e-20
# self.bias = 1
self.switcher = {
'min_std': 'min_std',
'std2': 'std2',
'std_min':'std_min',
'min': 'min',
'no':'no',
'log': 'log',
'log_min':'log_min',
'log_std':'log_std',
'log2': 'log2',
'sqrt_std': 'sqrt_std',
'cbrt_std': 'cbrt_std',
'nrt_std':'nrt_std',
'tan': 'tan'
}
def fit_transform(self, input_data, case):
self.case = case
if self.switcher.get(self.case) == 'min_std':
self.norm = MinMaxScaler()
self.std = StandardScaler()
out = self.norm.fit_transform(input_data)
out = self.std.fit_transform(out)
if self.switcher.get(self.case) == 'std2':
self.std = StandardScaler()
out = self.std.fit_transform(input_data)
if self.switcher.get(self.case) == 'std_min':
self.norm = MinMaxScaler()
self.std = StandardScaler()
out = self.std.fit_transform(input_data)
out = self.norm.fit_transform(out)
if self.switcher.get(self.case) == 'min':
self.norm = MinMaxScaler()
out = self.norm.fit_transform(input_data)
if self.switcher.get(self.case) == 'no':
self.norm = MinMaxScaler()
self.std = StandardScaler()
out = input_data
if self.switcher.get(self.case) == 'log_min':
out = - np.log(np.asarray(input_data / self.scale) + self.bias)
self.norm = MinMaxScaler()
out = self.norm.fit_transform(out)
if self.switcher.get(self.case) == 'log_std':
out = - np.log(np.asarray(input_data / self.scale) + self.bias)
self.std = StandardScaler()
out = self.std.fit_transform(out)
if self.switcher.get(self.case) == 'log2':
self.norm = MinMaxScaler()
self.std = StandardScaler()
out = self.norm.fit_transform(input_data)
out = np.log(np.asarray(out) + self.bias)
out = self.std.fit_transform(out)
if self.switcher.get(self.case) == 'sqrt_std':
out = np.sqrt(np.asarray(input_data / self.scale))
self.std = StandardScaler()
out = self.std.fit_transform(out)
if self.switcher.get(self.case) == 'cbrt_std':
out = np.cbrt(np.asarray(input_data / self.scale))
self.std = StandardScaler()
out = self.std.fit_transform(out)
if self.switcher.get(self.case) == 'nrt_std':
out = np.power(np.asarray(input_data / self.scale),1/4)
self.std = StandardScaler()
out = self.std.fit_transform(out)
if self.switcher.get(self.case) == 'tan':
self.norm = MaxAbsScaler()
self.std = StandardScaler()
out = self.std.fit_transform(input_data)
out = self.norm.fit_transform(out)
out = np.tan(out / (2 * np.pi + self.bias))
return out
def transform(self, input_data):
if self.switcher.get(self.case) == 'min_std':
out = self.norm.transform(input_data)
out = self.std.transform(out)
if self.switcher.get(self.case) == 'std2':
out = self.std.transform(input_data)
if self.switcher.get(self.case) == 'std_min':
out = self.std.transform(input_data)
out = self.norm.transform(out)
if self.switcher.get(self.case) == 'min':
out = self.norm.transform(input_data)
if self.switcher.get(self.case) == 'no':
out = input_data
if self.switcher.get(self.case) == 'log_min':
out = - np.log(np.asarray(input_data / self.scale) + self.bias)
out = self.norm.transform(out)
if self.switcher.get(self.case) == 'log_std':
out = - np.log(np.asarray(input_data / self.scale) + self.bias)
out = self.std.transform(out)
if self.switcher.get(self.case) == 'log2':
out = self.norm.transform(input_data)
out = np.log(np.asarray(out) + self.bias)
out = self.std.transform(out)
if self.switcher.get(self.case) == 'sqrt_std':
out = np.sqrt(np.asarray(input_data / self.scale))
out = self.std.transform(out)
if self.switcher.get(self.case) == 'cbrt_std':
out = np.cbrt(np.asarray(input_data / self.scale))
out = self.std.transform(out)
if self.switcher.get(self.case) == 'nrt_std':
out = np.power(np.asarray(input_data / self.scale),1/4)
out = self.std.transform(out)
if self.switcher.get(self.case) == 'tan':
out = self.std.transform(input_data)
out = self.norm.transform(out)
out = np.tan(out / (2 * np.pi + self.bias))
return out
def inverse_transform(self, input_data):
if self.switcher.get(self.case) == 'min_std':
out = self.std.inverse_transform(input_data)
out = self.norm.inverse_transform(out)
if self.switcher.get(self.case) == 'std2':
out = self.std.inverse_transform(input_data)
if self.switcher.get(self.case) == 'std_min':
out = self.norm.inverse_transform(input_data)
out = self.std.inverse_transform(out)
if self.switcher.get(self.case) == 'min':
out = self.norm.inverse_transform(input_data)
if self.switcher.get(self.case) == 'no':
out = input_data
if self.switcher.get(self.case) == 'log_min':
out = self.norm.inverse_transform(input_data)
out = (np.exp(-out) - self.bias) * self.scale
if self.switcher.get(self.case) == 'log_std':
out = self.std.inverse_transform(input_data)
out = (np.exp(-out) - self.bias) * self.scale
if self.switcher.get(self.case) == 'log2':
out = self.std.inverse_transform(input_data)
out = np.exp(out) - self.bias
out = self.norm.inverse_transform(out)
if self.switcher.get(self.case) == 'sqrt_std':
out = self.std.inverse_transform(input_data)
out = np.power(out,2) * self.scale
if self.switcher.get(self.case) == 'cbrt_std':
out = self.std.inverse_transform(input_data)
out = np.power(out,3) * self.scale
if self.switcher.get(self.case) == 'nrt_std':
out = self.std.inverse_transform(input_data)
out = np.power(out,4) * self.scale
if self.switcher.get(self.case) == 'tan':
out = (2 * np.pi + self.bias) * np.arctan(input_data)
out = self.norm.inverse_transform(out)
out = self.std.inverse_transform(out)
return out
In [4]:
def read_h5_data(fileName, input_features, labels):
df = pd.read_hdf(fileName)
# df = df[df['f']<0.45]
# for i in range(5):
# pv_101=df[df['pv']==1]
# pv_101['pv']=pv_101['pv']+0.002*(i+1)
# df = pd.concat([df,pv_101])
input_df=df[input_features]
in_scaler = data_scaler()
input_np = in_scaler.fit_transform(input_df.values,'std2')
label_df=df[labels].clip(0)
# if 'PVs' in labels:
# label_df['PVs']=np.log(label_df['PVs']+1)
out_scaler = data_scaler()
label_np = out_scaler.fit_transform(label_df.values,'cbrt_std')
return input_np, label_np, df, in_scaler, out_scaler
In [179]:
labels = ['H2', 'H', 'O', 'O2', 'OH', 'H2O', 'HO2', 'CH3', 'CH4', 'CO', 'CO2', 'CH2O', 'N2', 'T', 'PVs']
print(labels)
input_features=['f','zeta','pv']
# read in the data
x_input, y_label, df, in_scaler, out_scaler = read_h5_data('../data/tables_of_fgm_psi.h5',input_features=input_features, labels = labels)
In [212]:
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_input,y_label, test_size=0.01)
x_test_df = pd.DataFrame(in_scaler.inverse_transform(x_test),columns=input_features)
y_test_df = pd.DataFrame(out_scaler.inverse_transform(y_test),columns=labels)
predict_val = model.predict(x_test,batch_size=1024*8)
predict_df = pd.DataFrame(out_scaler.inverse_transform(predict_val), columns=labels)
df_test=pd.concat([x_test_df,y_test_df],axis=1)
df_pred=pd.concat([x_test_df,predict_df],axis=1)
considering the scaling for input, rescale zeta
In [44]:
zeta_test=list(set(df_test['zeta']))
zeta_test.sort()
zeta_df=list(set(df['zeta']))
zeta_df.sort()
zeta_level = zeta_df
df_pred.zeta=df_pred.zeta.replace(zeta_test,zeta_df)
df_test.zeta=df_test.zeta.replace(zeta_test,zeta_df)
df_pred.head(5)
Out[44]:
In [191]:
r2_stats=pd.DataFrame()
r2s=[]
r2s_i=[]
maxs_0=[]
maxs_9=[]
for r2,name in zip(r2_score(df_test,df_pred,multioutput='raw_values'),df_test.columns):
r2s.append(r2)
maxs_0.append(df_test[df_test['zeta']==zeta_level[0]][name].max())
maxs_9.append(df_test[df_test['zeta']==zeta_level[9]][name].max())
for i in zeta_level:
r2s_i.append(r2_score(df_pred[df_pred['zeta']==i][name],
df_test[df_test['zeta']==i][name]))
r2_stats['name']=df_test.columns
r2_stats['z_scale']=[m_9/(m_0+1e-20) for m_9,m_0 in zip(maxs_9,maxs_0)]
r2_stats['total r2 = ']=r2s
tmp=np.asarray(r2s_i).reshape(-1,len(zeta_level))
for idx,z in enumerate(zeta_level):
r2_stats['r2 = '+str(z)]=tmp[:,idx]
# show all species
df_dnn_r2=r2_stats.drop(columns=['z_scale'])[3:]
df_dnn.to_csv('r2_table.csv')
df_dnn_r2
Out[191]:
In [211]:
df_test_show=df_test.sample(frac=0.3)
df_pred_show=df_pred.iloc[df_test_show.index]
for sp in labels:
x=df_test_show[sp]
y=df_pred_show[sp]
plt.scatter(x,y,c='k')
ax=plt.gca()
ax.set_aspect('equal')
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.savefig('{0}_r2.eps'.format(sp),format='eps',bbox_inches='tight', pad_inches=0)
!mkdir figs
!mv *.eps figs
In [204]:
from mpl_toolkits import mplot3d
def wireframe_plot(x,y,z,sp):
ax=plt.axes(projection='3d')
# make the panes transparent
ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
# make the grid lines transparent
# ax.xaxis._axinfo["grid"]['color'] = (1,1,1,0)
# ax.yaxis._axinfo["grid"]['color'] = (1,1,1,0)
# ax.zaxis._axinfo["grid"]['color'] = (1,1,1,0)
ax.set_axis_off()
ax.plot_wireframe(x,y,z)
plt.savefig('{0}_wireframe.eps'.format(sp),format='eps',bbox_inches='tight', pad_inches=0)
!rm *.eps
In [205]:
x=df[df.zeta==0]['f'].values
y=df[df.zeta==0]['pv'].values
x=x.reshape(501,-1)
y=y.reshape(501,-1)
for sp in labels:
for zl in zeta_level:
z=df[df.zeta==zl][sp].values
z=z.reshape(501,-1)
wireframe_plot(x,y,z,'{0}_zeta_{1}_tab'.format(sp,zl))
In [206]:
dnn_val = model.predict(x_input,batch_size=1024*8)
dnn_df = pd.DataFrame(out_scaler.inverse_transform(predict_val), columns=labels)
df_dnn=pd.concat([df[input_features],dnn_df],axis=1)
# df_dnn.head(5)
for sp in labels:
for zl in zeta_level:
z=df_dnn[df_dnn.zeta==zl][sp].values
z=z.reshape(501,-1)
wireframe_plot(x,y,z,'{0}_zeta_{1}_dnn'.format(sp,zl))
In [207]:
for sp in labels:
for zl in zeta_level:
z=df_dnn[df_dnn.zeta==zl][sp].values - df[df.zeta==zl][sp].values
z=z.reshape(501,-1)
wireframe_plot(x,y,z,'{0}_zeta_{1}_diff'.format(sp,zl))
In [208]:
!mv *.eps figs
In [ ]: