This notebook shows you step by step how you can transform text data from vmstat output file into a pandas DataFrame.
In [1]:
%less ../datasets/vmstat_loadtest.log
In [2]:
import pandas as pd
raw = pd.read_csv("../datasets/vmstat_loadtest.log", skiprows=1)
raw.head()
Out[2]:
In [3]:
columns = raw.columns.str.split().values[0]
print(columns)
In [4]:
data = raw.iloc[:,0].str.split(n=len(columns)-1).apply(pd.Series)
data.head()
Out[4]:
In [5]:
data.columns = columns
data.head()
Out[5]:
In [6]:
vmstat = data.iloc[:,:-1].apply(pd.to_numeric)
vmstat['UTC'] = pd.to_datetime(data['UTC'])
vmstat.head()
Out[6]:
In [7]:
cpu = vmstat[['us','sy','id','wa', 'st']]
cpu.head()
Out[7]:
In [8]:
%matplotlib inline
cpu.plot.area();