In [1]:
lavec = [[20.406, 0, 0],
[0, 20.406, 0],
[0, 0, 20.406]]
xcoord = [[ 0.0000000000000000, 0.0000000000000000, 0.0000000000000000],
[ 0.0000000000000000, 0.0000000000000000, 0.5000000000000000],
[ 0.0000000000000000, 0.2500000000000000, 0.2500000000000000],
[ 0.0000000000000000, 0.2500000000000000, 0.7500000000000000],
[ 0.0000000000000000, 0.5000000000000000, 0.0000000000000000],
[ 0.0000000000000000, 0.5000000000000000, 0.5000000000000000],
[ 0.0000000000000000, 0.7500000000000000, 0.2500000000000000],
[ 0.0000000000000000, 0.7500000000000000, 0.7500000000000000],
[ 0.1250000000000000, 0.1250000000000000, 0.1250000000000000],
[ 0.1250000000000000, 0.1250000000000000, 0.6250000000000000],
[ 0.1250000000000000, 0.3750000000000000, 0.3750000000000000],
[ 0.1250000000000000, 0.3750000000000000, 0.8750000000000000],
[ 0.1250000000000000, 0.6250000000000000, 0.1250000000000000],
[ 0.1250000000000000, 0.6250000000000000, 0.6250000000000000],
[ 0.1250000000000000, 0.8750000000000000, 0.3750000000000000],
[ 0.1250000000000000, 0.8750000000000000, 0.8750000000000000],
[ 0.2500000000000000, 0.0000000000000000, 0.2500000000000000],
[ 0.2500000000000000, 0.0000000000000000, 0.7500000000000000],
[ 0.2500000000000000, 0.2500000000000000, 0.0000000000000000],
[ 0.2500000000000000, 0.2500000000000000, 0.5000000000000000],
[ 0.2500000000000000, 0.5000000000000000, 0.2500000000000000],
[ 0.2500000000000000, 0.5000000000000000, 0.7500000000000000],
[ 0.2500000000000000, 0.7500000000000000, 0.0000000000000000],
[ 0.2500000000000000, 0.7500000000000000, 0.5000000000000000],
[ 0.3750000000000000, 0.1250000000000000, 0.3750000000000000],
[ 0.3750000000000000, 0.1250000000000000, 0.8750000000000000],
[ 0.3750000000000000, 0.3750000000000000, 0.1250000000000000],
[ 0.3750000000000000, 0.3750000000000000, 0.6250000000000000],
[ 0.3750000000000000, 0.6250000000000000, 0.3750000000000000],
[ 0.3750000000000000, 0.6250000000000000, 0.8750000000000000],
[ 0.3750000000000000, 0.8750000000000000, 0.1250000000000000],
[ 0.3750000000000000, 0.8750000000000000, 0.6250000000000000],
[ 0.5000000000000000, 0.0000000000000000, 0.0000000000000000],
[ 0.5000000000000000, 0.0000000000000000, 0.5000000000000000],
[ 0.5000000000000000, 0.2500000000000000, 0.2500000000000000],
[ 0.5000000000000000, 0.2500000000000000, 0.7500000000000000],
[ 0.5000000000000000, 0.5000000000000000, 0.0000000000000000],
[ 0.5000000000000000, 0.5000000000000000, 0.5000000000000000],
[ 0.5000000000000000, 0.7500000000000000, 0.2500000000000000],
[ 0.5000000000000000, 0.7500000000000000, 0.7500000000000000],
[ 0.6250000000000000, 0.1250000000000000, 0.1250000000000000],
[ 0.6250000000000000, 0.1250000000000000, 0.6250000000000000],
[ 0.6250000000000000, 0.3750000000000000, 0.3750000000000000],
[ 0.6250000000000000, 0.3750000000000000, 0.8750000000000000],
[ 0.6250000000000000, 0.6250000000000000, 0.1250000000000000],
[ 0.6250000000000000, 0.6250000000000000, 0.6250000000000000],
[ 0.6250000000000000, 0.8750000000000000, 0.3750000000000000],
[ 0.6250000000000000, 0.8750000000000000, 0.8750000000000000],
[ 0.7500000000000000, 0.0000000000000000, 0.2500000000000000],
[ 0.7500000000000000, 0.0000000000000000, 0.7500000000000000],
[ 0.7500000000000000, 0.2500000000000000, 0.0000000000000000],
[ 0.7500000000000000, 0.2500000000000000, 0.5000000000000000],
[ 0.7500000000000000, 0.5000000000000000, 0.2500000000000000],
[ 0.7500000000000000, 0.5000000000000000, 0.7500000000000000],
[ 0.7500000000000000, 0.7500000000000000, 0.0000000000000000],
[ 0.7500000000000000, 0.7500000000000000, 0.5000000000000000],
[ 0.8750000000000000, 0.1250000000000000, 0.3750000000000000],
[ 0.8750000000000000, 0.1250000000000000, 0.8750000000000000],
[ 0.8750000000000000, 0.3750000000000000, 0.1250000000000000],
[ 0.8750000000000000, 0.3750000000000000, 0.6250000000000000],
[ 0.8750000000000000, 0.6250000000000000, 0.3750000000000000],
[ 0.8750000000000000, 0.6250000000000000, 0.8750000000000000],
[ 0.8750000000000000, 0.8750000000000000, 0.1250000000000000],
[ 0.8750000000000000, 0.8750000000000000, 0.6250000000000000]]
nat = len(xcoord)
kd = [14] * nat
In [2]:
import numpy as np
ndata = 30
force = np.loadtxt("force_random.dat").reshape((-1, nat, 3))[:ndata]
disp = np.loadtxt("disp_random.dat").reshape((-1, nat, 3))[:ndata]
samples = np.arange(30)
In [3]:
from alm import ALM
maxorder = 5
cutoff = [-1, -1, 15, 8, 8]
nbody = [2, 3, 3, 2, 2]
with ALM(lavec, xcoord, kd) as alm:
alm.set_verbosity(1)
alm.define(maxorder, cutoff, nbody)
alm.set_constraint(translation=True)
alm.set_displacement_and_force(disp, force)
X, y = alm.get_matrix_elements()
In [4]:
from sklearn.model_selection import GroupKFold
from sklearn.linear_model import lasso_path
from sklearn.preprocessing import StandardScaler
import math
force_ravel = force.reshape(-1)
print("force shape: ", np.shape(force_ravel))
print("Amat shape : ", np.shape(X))
groups = []
for i in range(30):
groups.extend([i] * 3 * nat)
eps = 1.0e-7
gkf = GroupKFold(n_splits=5)
n_alphas = 100
rms_train = np.zeros((n_alphas,5))
rms_test = np.zeros((n_alphas, 5))
counter = 0
alphas = np.logspace(-1, -6, num=n_alphas)
standardize = True
sc = StandardScaler()
for train, test in gkf.split(X,y,groups=groups):
force_train = force_ravel[train]
force_test = force_ravel[test]
fnorm_train = np.dot(force_train, force_train)
fnorm_test = np.dot(force_test, force_test)
# Set displacement and force data
X_train = X[train, :]
X_test = X[test, :]
y_train = y[train]
y_test = y[test]
if standardize:
scaler = sc.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
alphas_lasso, coefs_lasso, _ = lasso_path(X_train, y_train, eps=eps,
alphas = alphas,
verbose = True,
fit_intercept=False)
y_model_train = np.dot(X_train, coefs_lasso)
y_model_test = np.dot(X_test, coefs_lasso)
for ialpha in range(n_alphas):
y_diff_train = y_model_train[:,ialpha] - y_train
y_diff_test = y_model_test[:,ialpha] - y_test
residual_train = np.dot(y_diff_train, y_diff_train)
residual_test = np.dot(y_diff_test, y_diff_test)
rms_train[ialpha, counter] = math.sqrt(residual_train/fnorm_train)
rms_test[ialpha, counter] = math.sqrt(residual_test/fnorm_test)
counter += 1
rmse_mean = np.mean(rms_train, axis=1)
rmse_std = np.std(rms_train, axis=1, ddof=1)
cv_mean = np.mean(rms_test, axis=1)
cv_std = np.std(rms_test, axis=1, ddof=1)
print(rmse_mean)
print(cv_mean)
In [5]:
%matplotlib inline
import matplotlib.pyplot as plt
ax = plt.subplot(111)
ax.plot(alphas_lasso, rmse_mean, linestyle='--', marker='o', ms=5)
ax.plot(alphas_lasso, cv_mean, linestyle='--', marker='o', ms=5)
ax.set_xscale('log')
plt.show()
data = np.zeros((n_alphas, 5), dtype=np.float64)
data[:,0] = alphas
data[:,1] = rmse_mean
data[:,2] = rmse_std
data[:,3] = cv_mean
data[:,4] = cv_std
np.savetxt('sklearn_cvscore.dat', data)
In [6]:
from sklearn.linear_model import Lasso
alpha_opt = alphas[np.argmin(cv_mean)]
print(alpha_opt)
clf = Lasso(alpha=alpha_opt, fit_intercept=False, tol=eps)
if standardize:
scaler = sc.fit(X)
X = scaler.transform(X)
clf.fit(X, y)
fc = np.true_divide(clf.coef_, scaler.scale_)
else:
clf.fit(X, y)
fc = clf.coef_
print(fc)
print(np.linalg.norm(fc, 0))
In [7]:
alm = ALM(lavec, xcoord, kd)
alm.alm_new()
alm.define(maxorder, cutoff, nbody)
alm.set_constraint(translation=True)
alm.set_fc(fc)
fc_values1, elem_indices1 = alm.get_fc(1, mode='origin')
c = "xyz"
for (val, elem) in zip(fc_values1, elem_indices1):
v1 = elem[0] // 3
c1 = elem[0] % 3
v2 = elem[1] // 3
c2 = elem[1] % 3
print("%15.7f %d%s %d%s" % ((val, v1 + 1, c[c1], v2 + 1, c[c2])))
alm.alm_delete()