In [ ]:
# Set up imports

import os
import glob
import itertools

import pandas as pd
import seaborn

In [ ]:
# Set up plotting

%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt

In [ ]:
# Custom overrides when running the notebook manually - uncomment and edit
os.environ['REQUEST_SAMPLE'] = '1000'
os.environ['REQUESTS_PATTERN'] = '/home/cpennington/work/loadtests/csm/2015-09-16-soak-with-gets-and-sets/requests-*'
#os.environ['REQUESTS_PATTERN'] = '/home/cpennington/work/edx-load-tests/requests-*'

In [ ]:
# Use globbing to find the list of request logs
request_log_pattern = os.environ.get('REQUESTS_PATTERN', './requests-*')
request_logs = glob.glob(request_log_pattern)

In [ ]:
sample_rate = int(os.environ.get('REQUEST_SAMPLE', 1))

# Read the raw request logs
raw_requests = pd.DataFrame()

chunk_size = 10000

if request_logs:
    for log in request_logs:
        for chunk_ix, chunk in enumerate(pd.read_csv(log, chunksize=chunk_size)):
            starting_row = (chunk_ix * chunk_size) % sample_rate 
            raw_requests = raw_requests.append(chunk.iloc[starting_row::sample_rate])
            

raw_requests.loc[:,"bucket"] = pd.cut(raw_requests['start_time'], 50)
raw_requests

In [ ]:
requests = raw_requests[["request_type", "name", "result", "response_time", "bucket", "start_time"]]

grid = seaborn.FacetGrid(
    data=requests,
    row="name",
    col="result",
    margin_titles=True,
    sharey=False,
    size=10,
    aspect=2,
)
grid = grid.map(seaborn.boxplot, "bucket", "response_time", whis=[5, 95])
grid.set_xticklabels(
        [pd.to_datetime(min(requests['start_time']), unit="s")] +
        48*[''] +
        [pd.to_datetime(max(requests['start_time']), unit="s")],
        rotation=90
    )

grid = seaborn.FacetGrid(
    data=requests,
    row="name",
    col="result",
    margin_titles=True,
    sharey=False,
    size=10,
    aspect=2,
)
grid = grid.map(seaborn.boxplot, "bucket", "response_time", showfliers=False, whis=[5, 95])
grid.set_xticklabels(
        [pd.to_datetime(min(requests['start_time']), unit="s")] +
        48*[''] +
        [pd.to_datetime(max(requests['start_time']), unit="s")],
        rotation=90
    )

In [ ]: