In [ ]:
# Settings ####
n_original = 10
n_test = 100
n_search = 1
entropy_search_size = 10
search_iterations = 10
# Functions ####
# Generate synthetic data from a true distribution
def input_prior(n):
return(input_points)
def generating_function(input_space):
return(data_point)
# Estimate a model using logistic regression
def estimate_model(data):
return(model)
# Compute entropy of a predicted posterior over the input space
def entropy_pp(trace, samples=None, model=None, vars=None, size=None):
return(entropy)
# Compute entropy of joint posterior parameter distribution
def entropy_jp(trace, samples=None, model=None):
return(entropy)
# Compute entropy of marginal posterior parameter distribution
def entropy_mp(trace, samples=None, model=None, parameters=None):
return(entropy)
# Compute model accuracy on a data set
def evaluate_model_error(model, data):
return(error)
# Sample a potential test point in input space
def entropy_test_points(model, input_generator, n):
input_space_test = input_generator(n)
# Predicting output space of potential test point using current model
output_space_test = model.predict(input_space_test)
# Update model using this new synthetic point
data_test = pd.DataFrame({input_space_test, output_space_test})
model_test = estimate_model(data_current.append(data_test))
# Compute entropy of estimated posterior over the original input space
entropy_test = entropy_pp(model_test.trace, model=model_test)
return({"input"=input_space_test, "entropy": entropy_test})
# Identify a new set of points to examine
def entropy_search(model, data):
# Compute entropy of current posterior over the input space
entropy_current = entropy_pp(model.trace, model=model)
# Sampling potential points
entropy_test = [entropy_test_points(model, input_prior, n_search) for i in range(entropy_search_size)]
# Pick new point in input space to sample based on expected decrease in posterior entropy
# Or maybe based on KL-divergence (==conditional entropy) still?
entropy_difference = [i - entropy_current for i in entropy_test[:]["entropy"]]
# Force negative weights to 0
entropy_difference = [i if i > 0 else 0 for i in entropy_difference]
input_new = entropy_test[np.random.choice([range(len(entropy_difference)], p=entropy_difference)]["input"]
return(input_new)
# Procedures ####
# Generate synthetic training and test data
input_space_original = input_prior(n_original)
data_original = generating_function(input_space_original)
input_space_test = input_prior(n_test)
data_test = generating_function(input_space_test)
# Search for and explore new points
data_current = data_original
search_metrics = pd.DataFrame({"iteration": range(search_iterations),\
"entropy_posterior": NaN,\
"entropy_joint_parameters": NaN,\
"entropy_beta_x": NaN,\
"entropy_intercept": NaN,\
"entropy_noise": NaN,\
"error_training": NaN,\
"error_test": NaN\
})
for iteration in range(search_iterations):
# Estimate the model
model_current = estimate_model(data)
# Evaluate model performance and certainty
search_metrics[iteration, "entropy_posterior"] = entropy_pp(model_current.trace)
search_metrics[iteration, "entropy_joint_parameters"] = entropy_jp(model_current.trace)
search_metrics[iteration, "entropy_beta_x"] = entropy_mp(model_current.trace, parameters="beta_x")
search_metrics[iteration, "entropy_intercept"] = entropy_mp(model_current.trace, parameters="intercept")
search_metrics[iteration, "entropy_noise"] = entropy_mp(model_current.trace, parameters="noise")
search_metrics[iteration, "error_training"] = evaluate_model_error(model_current, data_current)
search_metrics[iteration, "error_test"] = evaluate_model_error(model_current, data_test)
# Select a new data point to explore based on expected entropy decrease
input_new = entropy_search(model_current, data_current)
# Generate value for new point explored from the true distribution
data_new = generating_function(input_new)
data_current = data_current.append(data_new)
# Save results
# Plot results