One of the long standing "skeletons in the closet" of GP techniques is the constant finding problem. It is widely acknoledged that the "ephemeral random constant", de facto the main solution proposed to this problem, is far from being satisfactory
Using dCGP, we are here able to succesfully learn constants as well as expressions during evolution thanks to the hybridization of the evolutionary strategy with what may, essentially, be seen as a second order back-propagation algorithm
NOTE: since v1.4 symbolic regression is performed via dedicated classes and not manipulating directly the dcgpy.expression
Lets first import dcgpy and pyaudi and set up things as to compute our CGP using the type "gdual" and thus get for free all derivatives
In [1]:
from dcgpy import expression_gdual_vdouble as expression
from dcgpy import kernel_set_gdual_vdouble as kernel_set
from pyaudi import gdual_vdouble as gdual
import pyaudi
from matplotlib import pyplot as plt
import numpy as np
from random import randint
%matplotlib inline
In [2]:
def run_experiment(dCGP, offsprings, max_gen, x, yt, screen_output):
# The offsprings chromosome, fitness and constant
chromosome = [1] * offsprings
fitness = [1] *offsprings
constant = [[1.,1.1]]*offsprings
# Init the best as the initial random dCGP
best_chromosome = dCGP.get()
best_constants = [1,1]
fit, _ = err2(dCGP, x, yt, best_constants)
best_fitness = fit
# Main loop over generations
for g in range(max_gen):
for i in range(offsprings):
dCGP.set(best_chromosome)
cumsum=0
dCGP.mutate_active(i+1)
fit, constant[i] = err2(dCGP, x, yt, best_constants)
fitness[i] = fit
chromosome[i] = dCGP.get()
for i in range(offsprings):
if fitness[i] <= best_fitness:
if (fitness[i] != best_fitness):
best_chromosome = chromosome[i]
best_fitness = fitness[i]
best_constants = constant[i]
dCGP.set(best_chromosome)
if screen_output:
print("New best found: gen: ", g, " value: ", fitness[i], dCGP.simplify(["x","c1","c2"]), "c =", best_constants)
if best_fitness < 1e-14:
break
return g, best_chromosome, best_constants
In [28]:
# The following functions create the target values for a gridded input x for different test problems
def data_P5(x):
return np.e * x**5 - np.pi*x**3 + x
def data_P6(x):
return (np.e*x**2-1) / (np.pi*(x + 2))
def data_P7(x):
return pyaudi.sin(np.e*x)+pyaudi.cos(np.pi*x)
In [4]:
# This is used to sum over the component of a vectorized coefficient, accounting for the fact that if its dimension
# is 1, then it could represent [a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a ...] with [a]
def collapse_vectorized_coefficient(x, N):
if len(x) == N:
return sum(x)
return x[0] * N
# This is the quadratic error of a dCGP expression when the constant value is cin. The error is computed
# over the input points xin (of type gdual, order 0 as we are not interested in expanding the program w.r.t. these)
# The target values are contained in yt (of type gdual, order 0 as we are not interested in expanding the program w.r.t. these)
def err(dCGP, xin, yt, c1, c2):
y = dCGP([xin,c1,c2])[0]
return (y-yt)**2 / len(xin.constant_cf)
# This is the quadratic error of the expression when the value of the constants are learned using a, one step,
# second order method.
def err2(dCGP, xin, yt, constants):
cin1 = constants[0]
cin2 = constants[1]
# Lets comupte the Taylor expansion (order 2) of the quadratic error around the point cin1, cin2
c1 = gdual([cin1], "c1", 2)
c2 = gdual([cin2], "c2", 2)
error = err(dCGP, x, yt, c1, c2)
#initial error for the current values of constants
ierr = sum(error.constant_cf)
# Number of points used in the data
N = len(xin.constant_cf)
# We try one step of the Newton method
for i in range(1):
G1 = error.get_derivative({"dc1":1})
G2 = error.get_derivative({"dc2":1})
H11 = error.get_derivative({"dc1":2})
H22 = error.get_derivative({"dc2":2})
H12 = error.get_derivative({"dc1":1, "dc2":1})
G1 = collapse_vectorized_coefficient(G1, N)
G2 = collapse_vectorized_coefficient(G2, N)
H11 = collapse_vectorized_coefficient(H11, N)
H22 = collapse_vectorized_coefficient(H22, N)
H12 = collapse_vectorized_coefficient(H12, N)
H = [[H11,H12],[H12, H22]]
det = np.linalg.det(H)
if det != 0:
invH = np.linalg.inv(H)
# We write the Newton update formula explicitly
dc1 = - invH[0,0] * G1 - invH[0,1] * G2
dc2 = - invH[1,0] * G1 - invH[1,1] * G2
dc1*=1
dc2*=1
c1+=dc1
c2+=dc2
error = err(dCGP, x, yt, c1, c2)
else:
break
# error after one Newton step
aerr = sum(error.constant_cf)
# if no improvement, take 5 steps of gradient descent with learing rate 0.05
if aerr >= ierr:
c1 = gdual([cin1], "c1", 1)
c2 = gdual([cin2], "c2", 1)
# We end up with a few gradient descent steps
for i in range(5):
G1 = sum(error.get_derivative({"dc1":1}))
G2 = sum(error.get_derivative({"dc2":1}))
dc1 = - G1
dc2 = - G2
dc1*=0.05
dc2*=0.05
c1+=dc1
c2+=dc2
error = err(dCGP, x, yt, c1, c2)
aerr = sum(error.constant_cf)
return aerr, [c1.constant_cf[0], c2.constant_cf[0]]
In [5]:
x = np.linspace(1,3,10)
x = gdual(x)
yt = data_P5(x)
In [6]:
# We run nexp experiments and accumulate statistic for the ERT
nexp = 100
offsprings = 4
max_gen=2000
res = []
kernels = kernel_set(["sum", "mul", "diff","pdiv"])()
print("restart: \t gen: \t expression:")
for i in range(nexp):
dCGP = expression(inputs=3, outputs=1, rows=1, cols=15, levels_back=16, arity=2, kernels=kernels, seed = randint(0,1233456))
g, best_chromosome, best_constant = run_experiment(dCGP, offsprings,max_gen,x,yt, screen_output=False)
res.append(g)
dCGP.set(best_chromosome)
if g < (max_gen-1):
print(i, "\t\t", res[i], "\t", dCGP(["x","c1","c2"]), " a.k.a ", dCGP.simplify(["x","c1","c2"]), "c = ", best_constant)
res = np.array(res)
In [8]:
mean_gen = sum(res) / sum(res<(max_gen-1))
print("ERT Expected run time = avg. number of dCGP evaluations needed: ", mean_gen * offsprings)
In [13]:
x = np.linspace(-2.1,1,10)
x = gdual(x)
yt = data_P6(x)
In [14]:
# We run nexp experiments and accumulate statistic for the ERT
nexp = 100
offsprings = 4
max_gen=10000
res = []
kernels = kernel_set(["sum", "mul", "diff","pdiv"])()
print("restart: \t gen: \t expression:")
for i in range(nexp):
dCGP = expression(inputs=3, outputs=1, rows=1, cols=15, levels_back=16, arity=2, kernels=kernels, seed = randint(0,1233456))
g, best_chromosome, best_constant = run_experiment(dCGP, offsprings,max_gen,x,yt, screen_output=False)
res.append(g)
dCGP.set(best_chromosome)
if g < (max_gen-1):
print(i, "\t\t", res[i], "\t", dCGP(["x","c1","c2"]), " a.k.a ", dCGP.simplify(["x","c1","c2"]), "c = ", best_constant)
res = np.array(res)
In [16]:
mean_gen = sum(res) / sum(res<(max_gen-1))
print("ERT Expected run time = avg. number of dCGP evaluations needed: ", mean_gen * offsprings)
In [39]:
x = np.linspace(-1,1,10)
x = gdual(x)
yt = data_P7(x)
In [47]:
# We run nexp experiments and accumulate statistic for the ERT
nexp = 100
offsprings = 4
max_gen=5000
res = []
kernels = kernel_set(["sum", "mul", "diff","div", "sin","cos"])()
print("restart: \t gen: \t expression:")
for i in range(nexp):
dCGP = expression(inputs=3, outputs=1, rows=1, cols=15, levels_back=16, arity=2, kernels=kernels, seed = randint(0,1233456))
g, best_chromosome, best_constant = run_experiment(dCGP, offsprings,max_gen,x,yt, screen_output=False)
res.append(g)
dCGP.set(best_chromosome)
if g < (max_gen-1):
print(i, "\t\t", res[i], "\t", dCGP(["x","c1","c2"]), " a.k.a ", dCGP.simplify(["x","c1","c2"]), "c = ", best_constant)
res = np.array(res)
In [48]:
mean_gen = sum(res) / sum(res<(max_gen-1))
print("ERT Expected run time = avg. number of dCGP evaluations needed: ", mean_gen * offsprings)
In [ ]: