In [1]:
import numpy as np
import pandas as pd
from cStringIO import StringIO
def parse_raw_graphite_metric(line):
head, body = line.rstrip('\n').split('|', 1)
name, since, until, freq = head.replace('\n', '').split(',')
since = float(since) * 1e9
until = float(until) * 1e9
freq = '%ss' % int(freq)
data = body.rstrip('\n').split(',')
index = pd.date_range(start=since, end=until, freq=freq, tz='UTC')
index = index[:len(data)]
S = pd.Series(
name = name,
data = data,
index = index,
)
S.replace('None', np.nan, inplace=True)
return S.astype(float)
def parse_raw_graphite(content):
# in testing on a 16MiB 350 metric data dump:
# - str.split ~ 11ms
# - custom lazy split ~ 9ms
# - cStringIO ~ 6ms
metrics = (
parse_raw_graphite_metric(line)
for line in StringIO(content)
)
return pd.DataFrame.from_items(
(metric.name, metric)
for metric in metrics
)
In [35]:
url = 'https://metric.mtsvc.net/render/?width=586&height=308&_salt=1399928295.7&from=-14days&target=sum(servers.cs.c01.web.c*.reqpersec)&target=sum(servers.cs.c02.web.c*.reqpersec)&target=sum(servers.cs.c03.web.n*.reqpersec)&target=sum(servers.cs.c10.web.n*.reqpersec)&rawData=True'
import requests
rv = requests.get(url)
In [6]:
df = parse_raw_graphite(rv.text)
df.head()
Out[6]:
In [12]:
rename = {x: x.split('.')[2] for x in df.columns}
rename
Out[12]:
In [13]:
df = df.rename(columns=rename)
df.head()
Out[13]:
In [20]:
%pylab inline
try:
df['c01'].plot()
except:
"There was a formatter problem"
In [24]:
df['c01'].hist(bins=100)
Out[24]:
In [27]:
import seaborn as sns
sns.jointplot(df['c01'], df['c10'], kind="hex")
Out[27]:
In [34]:
sns.boxplot(df)
Out[34]: