In [132]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
%matplotlib inline
pd.set_option('display.max_rows',8)

from pandas import *
from pandas.io.parsers import read_csv

In [133]:
ral = read_csv('cern30min.csv', index_col=False, header=None,
               names=['timestamp','host','size','duration'],
               usecols=['timestamp', 'duration'])
ral['timestamp'] = pd.to_datetime(ral['timestamp'],unit='s')
ral['time'] = ral['timestamp'] - min(ral['timestamp'])
ral.set_index('time', inplace=True)

In [134]:
ral.describe()


Out[134]:
duration
count 255.000000
mean 23.380024
std 2.256339
min 18.405646
25% 21.832625
50% 23.031074
75% 24.549370
max 38.850552

In [135]:
ral.plot(x=ral.index.astype('timedelta64[s]'), y='duration')


Out[135]:
<matplotlib.axes._subplots.AxesSubplot at 0x118e5ed68>