In [4]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
In [5]:
class GaussianSampler(object):
def __init__(self, mean, var):
self.mean = mean
self.var = var
def sample(self):
return random.normalvariate(self.mean, self.var)
For each product, we'll model its usage time using a Gaussian distribution in days.
In [6]:
products = {"food" : GaussianSampler(30.0, 5.0),
"poop bags" : GaussianSampler(14.0, 2.0)}
The customer will have an inventory with a remaining time (given in days) for each item.
In [6]:
Probability of a purchase given the number of days remaining will be modeled using an exponential distribution.
In [7]:
class ExponentialPDF(object):
def __init__(self, avg_rate):
self.lambd = 1.0 / avg_rate
def probability(self, t):
return self.lambd * np.exp(-self.lambd * t)
In [8]:
customer_inventory = {"food" : 0,
"poop bags" : 0}
item_purchases = {"food": [],
"poop bags" : []}
pdf = ExponentialPDF(1.0)
for t in xrange(1, 101):
items = customer_inventory.keys()
print "Day", t
for item in items:
customer_inventory[item] = max(0, customer_inventory[item] - 1)
prob = pdf.probability(customer_inventory[item])
print item, customer_inventory[item], prob
prob = pdf.probability(customer_inventory[item])
r = random.random()
if r < prob:
additional_days = products[item].sample()
additional_days = max(0, additional_days)
print "Purchased", additional_days
customer_inventory[item] += additional_days
item_purchases[item].append(1)
else:
item_purchases[item].append(0)
In [9]:
plt.hold(True)
plt.plot(item_purchases["food"], "r-")
plt.plot(item_purchases["poop bags"], "g-")
Out[9]:
In the second version of the model, we'll have 4 structures:
In [136]:
class ExhaustableItem(object):
def __init__(self, usage_rate=None, amount_used_average=None, amount_used_variance=None, average_time_for_purchase_before_exhaustion=None):
"""
usage_rate is given in times/day -- used to determine when an item is used
amount_used_average and amount_used_variance are given in units/day -- used to determine how
much is used per usage.
"""
self.usage_rate = usage_rate
self.amount_used_average = amount_used_average
self.amount_used_variance = amount_used_variance
self.current_amount = 0.0
self.last_amount_increase_time = 0.0
self.exhaustion_time = 0.0
self.epsilon = 1e-5
self.average_time_for_purchase_before_exhaustion = average_time_for_purchase_before_exhaustion
def increase_amount_and_simulate(self, purchase_time, purchased_amount):
"""
Increase current amount, from a purchase.
purchase_time -- given in seconds since start of model
purchased_amount -- given in units
"""
# simulate forward to current time
# record amount left
# add new amount
# simulate forward until we run out of product
# record run out time
# (discard results of second simulation)
# if we don't have any, don't bother simulating
previous_amount = 0.0
previous_time = self.last_amount_increase_time
if self.current_amount != 0.0:
# we want the last step that does not go past time
new_time = self.last_amount_increase_time
new_amount = self.current_amount
while new_time < purchase_time:
previous_time = new_time
previous_amount = new_amount
new_time, new_amount = self._simulate_usage_step(previous_time, previous_amount)
self.current_amount = previous_amount + purchased_amount
self.last_amount_increase_time = purchase_time
# start next simulation from last accepted simulation step
new_time = previous_time
new_amount = self.current_amount
while new_amount > self.epsilon:
previous_time = new_time
previous_amount = new_amount
new_time, new_amount = self._simulate_usage_step(previous_time, previous_amount)
self.exhaustion_time = new_time
def _simulate_usage_step(self, start_time, initial_amount):
"""
Simulate 1 step of usage.
da/dt = -R(amount_used_average, amount_used_variance)
timestep is determined from exponential distribution:
f = \lambda \exp (-\lambda t) where \lambda = 1.0 / usage_rate
\Delta t sampled from f
Returns time after 1 step and remaining amount
"""
# given in days since last usage
timestep = random.expovariate(1.0 / self.usage_rate)
# Might be more realistic to model usage
# in units/time used since items are used
# in discrete rather than continuous
# quantities
# given in units/day
usage_rate = random.normalvariate(self.amount_used_average, self.amount_used_variance)
# can't use a negative amount :)
if usage_rate < 0.0:
usage_rate = 0.0
# given in units
usage_amount = usage_rate * np.sqrt(timestep)
amount = initial_amount - min(usage_amount, initial_amount)
time = start_time + timestep
return time, amount
def probability_purchase(self, time):
"""
Probability of a purchase at given time.
time is seconds since model start.
Probability is given by an exponential distribution:
f = \lambda \exp (-\lambda t) where \lambda = 1.0 / average_time_for_purchase_before_exhaustion
t is the amount of time from the curent time until the item
is exhausted
"""
time_until_exhaustion = max(self.exhaustion_time - time, 0.0)
lambd = 1.0 / self.average_time_for_purchase_before_exhaustion
prob = lambd * np.exp(-lambd * time_until_exhaustion)
return prob
In [137]:
model = ExhaustableItem(usage_rate=1.0, amount_used_average=0.5, amount_used_variance=0.2, average_time_for_purchase_before_exhaustion=3.0)
In [138]:
previous_time = 0.0
previous_amount = 30.0 #lbs
times = [0.0]
amounts = [30.0]
while previous_amount > model.epsilon:
new_time, new_amount = model._simulate_usage_step(previous_time, previous_amount)
print new_time, new_amount
times.append(new_time)
amounts.append(new_amount)
previous_time = new_time
previous_amount = new_amount
In [139]:
model.increase_amount_and_simulate(0.0, 30.0)
In [140]:
print model.exhaustion_time
In [141]:
model.exhaustion_time = 0.0
model.current_amount = 0.0
model.increase_amount_and_simulate(0.0, 30.0)
print model.exhaustion_time
In [142]:
times = range(0, 100)
probs = [model.probability_purchase(float(t)) for t in times]
plt.plot(times, probs)
Out[142]:
In [145]:
poopbag_model = ExhaustableItem(usage_rate=1.0, amount_used_average=2.5, amount_used_variance=0.5, average_time_for_purchase_before_exhaustion=3.0)
poopbag_model.exhaustion_time = 0.0
poopbag_model.current_amount = 0.0
poopbag_model.increase_amount_and_simulate(0.0, 120.0)
print poopbag_model.exhaustion_time
Transaction occurences will be modeled as:
$$p(t) = \max_{i \in items} p_i(t)$$
In [93]:
class TransactionSimulator(object):
def __init__(self, item_models=None, timestep=None):
self.item_models = item_models
self.timestep = timestep
self.time = 0.0 # days?
def probability(self, time):
prob = 0.0
for model in self.item_models:
prob = max(prob, model.probability_purchase(time))
return prob
def accept(self, time):
prob = self.probability(time)
r = random.random()
return r < prob
def simulate(self, max_steps=None):
for i in xrange(max_steps):
accepted = self.accept(self.time)
self.time += self.timestep
if accepted:
return self.time
return None
In [147]:
poopbag_model = ExhaustableItem(usage_rate=1.0, amount_used_average=2.5, amount_used_variance=0.5, average_time_for_purchase_before_exhaustion=3.0)
food_model = ExhaustableItem(usage_rate=1.0, amount_used_average=0.5, amount_used_variance=0.2, average_time_for_purchase_before_exhaustion=3.0)
poopbag_model.increase_amount_and_simulate(0.0, 120.0)
food_model.increase_amount_and_simulate(0.0, 30.0)
simulator = TransactionSimulator(item_models=[food_model, poopbag_model], timestep=1.0)
In [148]:
transactions = []
steps = []
for i in xrange(100):
trans_time = simulator.simulate(max_steps=1)
steps.append(i)
if trans_time is not None:
transactions.append(1.0)
else:
transactions.append(0.0)
In [149]:
plt.plot(steps, transactions)
Out[149]:
In [101]:
In [ ]: