notebook.community

Edit and run



In [20]:

    
# Typical Python services scale with worker processes
# The main vulnerability is when those workers are blocking
# on downstream services, which causes queueing on your
# service.

# How many workers you run available for requests
num_workers = 40

# How many requests you can service per worker. Typically with
# Python services this is 1 (unless gevent)
requests_per_worker = 1

# How much QPS you expect your service to take
# typical and maximum
typical_qps, max_qps = 100, 200

assert typical_qps <= max_qps

# How fast on average your service responds in seconds
# If you don't know this, take a guess. It's not
# really used by the analysis but is plotted as a reference
typical_response_time_s = 0.100

# services that your service calls
# we care about worst case latency to these services
# because when those services get slow your Python workers block
# A good value is your load balancer or otherwise enforced timeout
#
# (name, **worst** case latency (s), QPS)
downstream_services = [
    ('database', 1, typical_qps),
    ('svc', 0.2, typical_qps / 2.0)
]



In [21]:

    
import random

import matplotlib.pyplot as plt
import numpy as np

# Model performance tradeoff
effective_workers = num_workers * requests_per_worker

safe_workers = 0.5 * effective_workers
danger_workers = 0.75 * effective_workers
all_workers = 1.0 * effective_workers

QPS = np.arange(max(1, int(typical_qps * 0.5)), int(max_qps * 1.50), 1) 

fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
ax.set_title(
    'Service Capacity with {0} Workers @ {1} Accept Concurrency'.format(
        num_workers, requests_per_worker
    )
)

safe_response_time = safe_workers / max_qps
if safe_response_time >= typical_response_time_s:
    trt_c = 'green'
else:
    trt_c = 'red'
    
ax.axhline(
    y=typical_response_time_s, color=trt_c,
    label='Estimated Response Time: {0:.3f} s'.format(typical_response_time_s)
)
ax.axvline(
    x=typical_qps, color='lightblue',
    label='Estimated Typical QPS: {0:.2f} qps'.format(typical_qps)
)
ax.axvline(
    x=max_qps,
    label='Estimated Max QPS: {0:.2f} qps'.format(max_qps)
)
ax.axhline(
    y=safe_response_time, color='green',
    label='Safe Response Time: {0:.3f} s'.format(safe_response_time)
)
    
# Littles Law: queue size = avg qps * latency in s
# Show Different Latency Zones
green = [safe_workers / qps for qps in QPS]
red = [danger_workers / qps for qps in QPS]
total = [all_workers / qps for qps in QPS]

ax.fill_between(
    QPS, 0, green, interpolate=True, color='green', alpha=0.2,
    label='Safe Latency given QPS'
)
ax.fill_between(
    QPS, green, red, interpolate=True, color='yellow', alpha=0.2,
    label='Warning Latency given QPS'
)
ax.fill_between(
    QPS, red, total, interpolate=True, color='red', alpha=0.2,
    label='Dangerous Latency given QPS'
)

ax.set_ylim([0, max(typical_response_time_s * 10, safe_response_time + 1)])
ax.set_xlim([min(QPS), max(QPS)])

ax.set_xlabel('QPS')
ax.set_ylabel('Required Average Latency (S)')
plt.legend(loc='upper right')

if safe_response_time < typical_response_time_s:
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    better_worker_count = (
        (max_qps * typical_response_time_s * 2) / requests_per_worker
    )
    msg = (
        'Service overloaded!!\n'
        'Estimated latency of {0}ms and\n'
        'Estimated max QPS of {1}qps requires:\n'
        '{2:0.2f} workers, not {3}'
    ).format(typical_response_time_s, max_qps, better_worker_count, num_workers)
    ax.text(0.05, 0.95, msg, transform=ax.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)


plt.show()



In [22]:

    
# Slow downstream modeling

typical_workers = typical_qps * typical_response_time_s
spare = effective_workers - typical_workers

min_svc_qps = min(d[2] for d in downstream_services)
max_svc_qps = max(d[2] for d in downstream_services)

svc_QPS = np.arange(max(1, int(min_svc_qps * 0.5)), int(max_svc_qps * 1.50), 1) 

svc_latency = [spare / qps for qps in svc_QPS]

good = {
    'x': [],
    'y': [],
    'labels': [],
    'color': 'green'
}
bad = {
    'x': [],
    'y': [],
    'labels': [],
    'color': 'red'
}

for s in downstream_services:
    d = {}
    if s[1] < (spare / s[2]):
        d = good
    else:
        d = bad
    d['x'].append(s[2])
    d['y'].append(s[1])
    d['labels'].append(s[0])


fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
ax.set_title('Vulnerability to Downstream Services')

for dat in (good, bad):
    ax.scatter(dat['x'], dat['y'], color=dat['color'], picker=True)
    for i, txt in enumerate(dat['labels']):
        xy = (dat['x'][i], dat['y'][i])
        xytext = (xy[0] + 1, xy[1] - (hash(txt) % 2) / 10.0)
        ax.annotate(txt, xy=xy, xytext=xytext,color=dat['color'])
    
zero = np.zeros(len(svc_latency))
ax.fill_between(
    svc_QPS, zero, svc_latency, interpolate=True, color='green', alpha=0.2,
    label='Safe Service Latency given QPS'
)
ax.set_ylim([0, max(typical_response_time_s * 10, safe_response_time + 1)])
ax.set_xlim([min(svc_QPS), max(svc_QPS)])

ax.set_xlabel('QPS')
ax.set_ylabel('Service Average Latency (S)')
plt.legend(loc='upper right')

plt.show()



In [ ]:



In [ ]: