In [17]:
%matplotlib inline
import re
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns
In [4]:
def process_log(log):
"Process lines of the log and return a [id, latency] row for every valid data point"
for line in log:
id = re.findall('icmp_seq.(\d+)', line)
if id:
if 'timeout' in line:
yield [int(id[0]), 0, True]
else:
latency = re.findall('time=(\d+\.\d+)', line)
if latency:
yield [int(id[0]), float(latency[0]), False]
In [5]:
pings = None
with open("ping.log") as log:
pings = pd.DataFrame(list(process_log(log)), columns=['seq', 'latency', 'timeout'])
In [6]:
latency_p999 = pings.latency.quantile(.999)
pings_p999 = pings[pings.latency < latency_p999]
In [24]:
non_timeout = pings_p999[pings_p999.timeout != True]
non_timeout['log_latency'] = non_timeout.latency.apply(lambda l: math.log(l))
pd.DataFrame.hist(non_timeout, column='log_latency', bins=25)
Out[24]:
In [21]:
Out[21]: