In this notebook, we'll learn a common pattern for storing the results of a model run. Specifically, we'll:
learn to save tabular data
We'll do this using the basic HIV model.
N.B.: We won't be dealing with RNG seeds in this notebook. However, please see the supplemental notebook for instruction on properly setting, using, and recording the RNG seed.
In [33]:
# Imports
from hiv_model import Model, Person
In [2]:
# Imports
import datetime
import os
import time
# Scientific computing imports
import numpy
import matplotlib.pyplot as plt
import networkx
import pandas
import seaborn; seaborn.set()
# Import widget methods
from IPython.html.widgets import *
In [3]:
# Create our test model
m = Model(grid_size=10, num_people=10)
for t in xrange(10):
m.step()
In [30]:
# Now, we'll define our methods to store a model's output
def store_model_parameters(model, run_output_path):
"""
Store model parameters from a model to the run output path.
"""
# Create parameters dictionary
model_parameters = {"grid_size": model.grid_size,
"num_people": model.num_people,
"min_subsidy": model.min_subsidy,
"max_subsidy": model.max_subsidy,
"min_condom_budget": model.min_condom_budget,
"max_condom_budget": model.max_condom_budget,
"condom_cost": model.condom_cost,
"min_prob_hookup": model.min_prob_hookup,
"max_prob_hookup": model.max_prob_hookup,
"prob_transmit": model.prob_transmit,
"prob_transmit_condom": model.prob_transmit_condom,
}
# Convert to dataframe and save
model_parameters_df = pandas.DataFrame(model_parameters.items(),
columns=["parameter", "value"])
model_parameters_df.to_csv(os.path.join(run_output_path, "parameters.csv"))
def store_model_csv(model, run_output_path):
"""
Store CSV data from a model to the run output path.
"""
# Create interaction dataframe
try:
interaction_df = pandas.DataFrame(model.history_interactions,
columns=["time", "agent_a", "agent_b", "use_condom", "is_transmission"])
except ValueError:
# Sometimes, we have no interactions in "sparse" parameter configurations.
interaction_df = pandas.DataFrame(columns=["time", "agent_a", "agent_b", "use_condom", "is_transmission"])
# Create time series data frame
tsdata_df = pandas.DataFrame(model.history_num_infected,
columns=["num_infected"])
tsdata_df["num_interactions"] = model.history_num_interactions
tsdata_df["num_interactions_condoms"] = model.history_num_interactions_condoms
# Save the dataframes
interaction_df.to_csv(os.path.join(run_output_path, "interactions.csv"))
tsdata_df.to_csv(os.path.join(run_output_path, "timeseries.csv"))
def store_model_figures(model, run_output_path):
"""
Store figures data from a model to the run output path.
"""
# Plot time series of infections and interactions.
f = plt.figure(figsize=(10, 8))
# Create our top panel
plt.subplot(211)
plt.plot(model.history_num_infected)
plt.legend(("Number of infections"), loc="best")
# Create our bottom panel and add the legend
plt.subplot(212)
plt.plot(numpy.array(model.history_num_interactions) - numpy.array(model.history_num_interactions_condoms))
plt.plot(model.history_num_interactions_condoms)
plt.legend(("Number of interactions without condoms",
"Number of interactions with condoms"),
loc="best")
plt.tight_layout()
# Save
plt.savefig(os.path.join(run_output_path, "infections_interactions.png"))
# Next, plot the initial and final space timesteps.
# Get colormap
cmap = seaborn.cubehelix_palette(light=1, as_cmap=True)
# Plot initial step.
f = plt.figure(figsize=(10, 10))
plt.title("Infected space at t={0}".format(0))
plt.pcolor(model.get_space_infected(0), vmin=-1, vmax=1, cmap=cmap)
ax = f.gca()
ax.set_aspect(1./ax.get_data_ratio())
plt.tight_layout()
plt.colorbar()
# Save
plt.savefig(os.path.join(run_output_path, "space_initial.png"))
# Plot final step
plt.title("Infected space at t={0}".format(model.t-1))
plt.pcolor(model.get_space_infected(model.t-1), vmin=-1, vmax=1, cmap=cmap)
ax = f.gca()
ax.set_aspect(1./ax.get_data_ratio())
plt.tight_layout()
plt.colorbar()
# Save
plt.savefig(os.path.join(run_output_path, "space_final.png"))
def store_model(model, output_path="output"):
"""
Store a model to the model output path.
"""
# First, we need to make sure the directory exists.
try:
os.makedirs(output_path)
except:
pass
"""
Next, we need to create a unique timestamp for the model.
We'll do that using a timestamp of the form: YYYYMMDD-Run#
We then need to create that directory too.
"""
timestamp_suffix = time.strftime("%Y%m%d")
run_id = 0
run_output_path = os.path.join(output_path,
"run-{0}-{1}".format(timestamp_suffix,
run_id))
# Get a unique run #
while os.path.exists(run_output_path):
run_id += 1
run_output_path = os.path.join(output_path,
"run-{0}-{1}".format(timestamp_suffix,
run_id))
try:
os.makedirs(run_output_path)
except:
pass
"""
Finally, we need to store data and figures to the path.
"""
store_model_parameters(model, run_output_path)
store_model_csv(model, run_output_path)
store_model_figures(model, run_output_path)
In [36]:
# Finally, test our output method with the model.
store_model(m)
In [37]:
# Set number of samples per value and steps per sample
num_samples = 10
num_steps = 100
# Set basic model parameters
grid_size = 10
num_people =10
# Set subsidy values to "sweep" over
subsidy_sweep_values = [0.0, 0.33, 0.66, 1.0]
prob_hookup_values = [0.1, 0.5, 0.9]
subsidy_sweep_output = []
# Iterate over subsidy
for subsidy_value in subsidy_sweep_values:
# Iterate over prob_hookup
for prob_hookup_value in prob_hookup_values:
print("Running {0} samples for subsidy value {1}, prob_hookup value {2}"\
.format(num_samples, subsidy_value, prob_hookup_value))
for n in xrange(num_samples):
# Output info
m = Model(grid_size=grid_size,
num_people=num_people,
min_condom_budget=0.0,
max_condom_budget=1.0,
min_prob_hookup=prob_hookup_value-0.1,
max_prob_hookup=prob_hookup_value+0.1,
min_subsidy=subsidy_value,
max_subsidy=subsidy_value)
# Run the model for num-steps
for t in xrange(num_steps):
m.step()
# Output our model
store_model(m)