The objective of this notebook is to test the ability of the sampling weighted choice to reproduce probabilities under different conditions.
Vary the inputs for the sample size, the number of agent, the number of alternatives or the probability function itself to see how this performs.
In [1]:
import numpy as np
import pandas as pd
from smartpy_core.sampling import *
from smartpy_core.choice import *
In [2]:
# define some inputs
num_tests = 1000
sample_size = 50
num_choosers = 500
num_alts = 100
cap_per_alt = 10
In [3]:
# create our choosers
choosers = pd.DataFrame({'col': np.arange(num_choosers) / 10.0})
choosers.head(10)
Out[3]:
In [4]:
# create alternatives
alts = pd.DataFrame({'alt_col': np.arange(num_alts) / 10.0})
alts['cap'] = cap_per_alt
alts['p'] = get_probs(np.exp(alts['alt_col']))
alts.head(10)
Out[4]:
define a simple probability function that just uses alternative values, see how well the results match the alternatives probabilities
In [5]:
# create a simple probability function that just uses the alternative values as utils
def simple_probs(interaction_data, num_choosers, sample_size):
u = np.exp(interaction_data['alt_col'].values).reshape(num_choosers, sample_size)
return u / u.sum(axis=1, keepdims=True)
In [6]:
# run the tests
res = alts.copy()
res['choices'] = 0
res['choices_w_cap'] = 0
for i in range(num_tests):
# 1st run w/out capcities
choices = choice_with_sampling(
choosers,
alts,
simple_probs,
sample_size=sample_size
)
if i % 100 == 0:
print 'on test: {}'.format(i)
choice_sums = choices.groupby('alternative_id').size().reindex(res.index).fillna(0)
res['choices'] += choice_sums
# next run with capacities
choices, capacities = capacity_choice_with_sampling(
choosers,
alts,
'cap',
simple_probs,
sample_size=sample_size
)
choice_sums = choices.value_counts().reindex(res.index).fillna(0)
res['choices_w_cap'] += choice_sums
if (choice_sums > cap_per_alt).any():
print 'we blew capacity!'
print choice_sums.max()
print capacities.min()
res['avg_choices'] = res['choices'] / num_tests
res['avg_choices_w_cap'] = res['choices_w_cap'] / num_tests
res['choices_p'] = get_probs(res['choices'])
res['choices_w_cap_p'] = get_probs(res['choices_w_cap'])
In [7]:
res.head(10)
Out[7]:
In [8]:
res.tail(10)
Out[8]:
use a social distance-esque approach to test matching agents with alternatives based on the charactersitics of both
In [9]:
choosers['c_grp'] = np.random.randint(0, 4, len(choosers))
choosers.head(10)
Out[9]:
In [10]:
alts['a_grp'] = np.random.randint(0, 4, len(alts))
alts.head(10)
Out[10]:
In [11]:
# a probability function that tries to match agents and choosers in a social distance like way
def match_probs(interaction_data, num_choosers, sample_size):
dist = 1.0 + (interaction_data['c_grp'] - interaction_data['a_grp']).abs()
u = np.exp(1 / dist**2).values.reshape(num_choosers, sample_size)
return u / u.sum(axis=1, keepdims=True)
In [12]:
# test without capacity
choices = choice_with_sampling(
choosers,
alts,
match_probs,
sample_size=sample_size
)
In [13]:
choices['c_grp'] = choosers['c_grp']
choices['a_grp'] = broadcast(alts['a_grp'], choices['alternative_id'])
choices.head(10)
Out[13]:
In [14]:
choices.groupby(['c_grp', 'a_grp']).size().to_frame()
Out[14]:
In [15]:
choices, capacities = capacity_choice_with_sampling(
choosers,
alts,
'cap',
match_probs,
sample_size=sample_size
)
In [16]:
c = choosers.copy()
c['alternative_id'] = choices
c['a_grp'] = broadcast(alts['a_grp'], c['alternative_id'])
In [17]:
c.groupby(['c_grp', 'a_grp']).size().to_frame()
Out[17]:
This is the typical way we would apply these, via coefficient from an estimation, however, here I'm just making them up.
In [22]:
coeff = pd.Series(
[0.000001, .5],
index=['col', 'alt_col']
)
coeff
Out[22]:
In [23]:
# run some tests
res = alts.copy()
res['choices'] = 0
res['choices_w_cap'] = 0
for i in range(num_tests):
if i % 100 == 0:
print 'on test: {}'.format(i)
# 1st run w/out capcities
choices = mnl_choice_with_sampling(
choosers,
alts,
coeff,
sample_size=sample_size
)
choice_sums = choices.value_counts().reindex(res.index).fillna(0)
res['choices'] += choice_sums
# next run with capacities
choices = mnl_choice_with_sampling(
choosers,
alts,
coeff,
sample_size=sample_size,
cap_col='cap'
)
choice_sums = choices.value_counts().reindex(res.index).fillna(0)
res['choices_w_cap'] += choice_sums
if (choice_sums > cap_per_alt).any():
print 'we blew capacity!'
print choice_sums.max()
print capacities.min()
res['avg_choices'] = res['choices'] / num_tests
res['avg_choices_w_cap'] = res['choices_w_cap'] / num_tests
res['choices_p'] = get_probs(res['choices'])
res['choices_w_cap_p'] = get_probs(res['choices_w_cap'])
In [24]:
res.head(10)
Out[24]:
In [25]:
res.tail(10)
Out[25]:
In [21]: