Monte Carlo Simulation with Python

Notebook to accompany article on Practical Business Python

Update to use numpy for faster loops based on comments here


In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
sns.set_style('whitegrid')

In [3]:
# Define the variables for the Percent to target based on historical results
avg = 1
std_dev = .1
num_reps = 500
num_simulations = 100000

In [4]:
# Show an example of calculating the percent to target
pct_to_target = np.random.normal(
    avg,
    std_dev,
    size=(num_reps, num_simulations)
)

In [5]:
pct_to_target[0:10]


Out[5]:
array([[1.00168889, 0.84322867, 0.89764919, ..., 1.01640703, 1.20373747,
        0.97037204],
       [1.028913  , 0.90536114, 0.924947  , ..., 1.04301131, 1.00578572,
        0.82234308],
       [0.92013836, 0.89966565, 0.9061088 , ..., 0.95344094, 1.04113555,
        1.19014847],
       ...,
       [0.90638344, 0.92727774, 1.06040697, ..., 0.98925006, 0.7699789 ,
        1.13980837],
       [1.05623333, 0.90113527, 1.17673089, ..., 0.88190302, 0.75553421,
        1.01923352],
       [1.0517061 , 1.02548497, 1.06281586, ..., 1.01414038, 1.03138342,
        1.01631576]])

In [6]:
# Another example for the sales target distribution
sales_target_values = [75_000, 100_000, 200_000, 300_000, 400_000, 500_000]
sales_target_prob = [.3, .3, .2, .1, .05, .05]
sales_target = np.random.choice(sales_target_values, p=sales_target_prob, 
                                size=(num_reps, num_simulations))

In [7]:
sales_target[0:10]


Out[7]:
array([[ 75000, 400000,  75000, ..., 100000, 400000,  75000],
       [ 75000, 300000, 200000, ..., 200000, 200000, 500000],
       [ 75000,  75000,  75000, ..., 200000, 100000, 300000],
       ...,
       [ 75000, 100000, 200000, ...,  75000, 200000, 200000],
       [ 75000,  75000,  75000, ..., 500000,  75000,  75000],
       [ 75000,  75000,  75000, ...,  75000, 100000, 500000]])

In [8]:
commission_percentages = np.take(
    np.array([0.02, 0.03, 0.04]),
    np.digitize(pct_to_target, bins=[.9, .99, 10])
)

In [9]:
commission_percentages[0:10]


Out[9]:
array([[0.04, 0.02, 0.02, ..., 0.04, 0.04, 0.03],
       [0.04, 0.03, 0.03, ..., 0.04, 0.04, 0.02],
       [0.03, 0.02, 0.03, ..., 0.03, 0.04, 0.04],
       ...,
       [0.03, 0.03, 0.04, ..., 0.03, 0.02, 0.04],
       [0.04, 0.03, 0.04, ..., 0.02, 0.02, 0.04],
       [0.04, 0.04, 0.04, ..., 0.04, 0.04, 0.04]])

In [10]:
total_commissions = (commission_percentages * sales_target).sum(axis=0)

In [11]:
total_commissions.std()


Out[11]:
96204.21537404737

In [12]:
# Show how to create the dataframe
df = pd.DataFrame(data={'Total_Commissions': total_commissions})
df.head()


Out[12]:
Total_Commissions
0 2982500.0
1 3003500.0
2 2950250.0
3 2902000.0
4 2762500.0

In [13]:
df.plot(kind='hist', title='Commissions Distribution')


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f6e5f13d2e8>

In [14]:
df.describe()


Out[14]:
Total_Commissions
count 1.000000e+05
mean 2.832281e+06
std 9.620470e+04
min 2.447500e+06
25% 2.766750e+06
50% 2.831250e+06
75% 2.896500e+06
max 3.279750e+06

In [ ]: