In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from decimal import Decimal as deci
from sklearn.cross_decomposition import PLSRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score as rsquare
from sklearn.model_selection import train_test_split
from scipy.stats import randint as sp_randint
from sklearn.model_selection import GridSearchCV as gsv
from sklearn.ensemble import RandomForestRegressor as rfr
get_ipython().magic('matplotlib inline')
plt.rcParams['figure.figsize'] = [10,8]
In [2]:
cf = rfr(n_jobs=-1)
max_f = list(np.arange(1,70,1))
max_f.append("auto")
cf_params = {"n_estimators":list(np.arange(1,25,1)), "max_features":max_f}
pls = PLSRegression(scale=False)
pls_params = {"n_components":list(np.arange(1,101,1))}
In [3]:
csv_path = "..\..\Data\csv"
here = os.getcwd()
os.chdir(csv_path)
zspectra = pd.read_csv('fitted_cest.csv', header = None).values.squeeze()
diff = pd.read_csv('diff.csv', header = None).values.squeeze()
conc = pd.read_csv('conc.csv', header = None).values.squeeze()
pH = pd.read_csv('pH.csv', header = None).values.squeeze()
concs = pd.read_csv('concs.csv', header = None).values.squeeze()
pHs = pd.read_csv('pHs.csv', header = None).values.squeeze()
rsq = pd.read_csv('rsq.csv', header= None).values.squeeze()
os.chdir(here)
In [4]:
def mymetric(yexp, ypred):
yexp=yexp.squeeze()
ypred=ypred.squeeze()
d = np.sqrt(mean_squared_error(yexp, ypred))
d = d / np.mean(yexp)
d = 100 * d
return d
In [5]:
def mystddev(yexp,ypred):
yexp=yexp.squeeze()
ypred=ypred.squeeze()
sy=np.std(ypred)
d = np.sum(yexp - ypred) / np.sqrt(np.sum((yexp - ypred)**2 ))
d = d / np.sqrt(ypred.shape[0])
d = d / np.mean(yexp)
d = 100 * d
sd=np.sqrt(np.square(d)*np.square(sy))
return sd
In [6]:
X = diff
Y = pH
Ys = np.sort(pHs)
In [7]:
X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=0.1, random_state=42)
grid_rfr = gsv(cf,cf_params,n_jobs = -1)
grid_rfr.fit(X_train,y_train)
y_hat_rfr = grid_rfr.predict(X_test)
mymetric(y_test,y_hat_rfr)
In [9]:
grid_rfr.best_estimator_
Out[9]:
In [10]:
grid_pls = gsv(pls,pls_params,n_jobs = -1)
grid_pls.fit(X_train,y_train)
y_hat_pls = grid_pls.predict(X_test)
mymetric(y_test,y_hat_pls)
Out[10]:
In [11]:
grid_pls.best_estimator_
Out[11]:
In [ ]: