In [ ]:
import gzip
from collections import defaultdict
from collections import namedtuple
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import numpy as np
import math
from bokeh.charts import Scatter, output_file, show
from bokeh.models import HoverTool
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from collections import OrderedDict
%matplotlib
In [ ]:
def windowEvents(outFile,logLines,windowSec=120):
a = gzip.open(logLines,'rb')
o = gzip.open(outFile,'wb')
currentTime=0
oldTime=0
setup = False
outSet = set()
for line in a.readlines():
time, tid = line.strip().rstrip().split(',')
currentTime = int(float(time)/windowSec)
if not setup:
setup = True
oldTime = currentTime
if oldTime != currentTime:
if len(outSet) > 0:
outText = '%s\n'%(' '.join(str(i) for i in outSet))
o.write(outText)
outSet.clear()
outSet.add(tid)
oldTime = currentTime
o.close()
a.close()
In [ ]:
def readPatterns(patterns):
pat = open(patterns,'r').readlines()
listSetPat = list()
for p in pat:
line = p.strip().rstrip().split(' ')
listSetPat.append(set(line))
return (listSetPat,pat)
In [ ]:
def makeWindowSet(outFile):
w = gzip.open(outFile,'rb')
windowLines = w.readlines()
windowListSet = list()
for windowLine in windowLines:
line = windowLine.strip().rstrip().split(' ')
windowListSet.append(set(line))
return windowListSet
In [ ]:
def makeSwimLanes(swimOutFile,windowSet):
o = gzip.open(swimOutFile,'wb')
window = list()
for wset in windowListSet:
outSet = set()
for index, pset in enumerate(listSetPat):
if wset.issuperset(pset):
outSet.add(index)
outText = '%s\n'%(' '.join(str(i) for i in outSet))
window.append(outText.strip().rstrip())
o.write(outText)
outSet.clear()
o.close()
return window
In [ ]:
def getTemplates(templateFile):
templates = open(templateFile,'r')
templateDict = dict()
for i in templates.readlines():
tid,text = i.strip().rstrip().split(',',1)
templateDict[tid]=text
return templateDict
In [ ]:
def makeEventWords(e,templateDict,e2p):
words = set()
for logline in e2p[int(e)].rstrip().strip().split(' '):
for d in templateDict[logline].split(' '):
words.add(d)
return words
In [ ]:
def samplemat(swim,y,x,templateDict,e2p,plot=True):
mat = np.zeros((y,x))
xval =0
vizLine = list()
for s in swim:
if int(xval) >= int(x):
if plot:
plt.matshow(mat,cmap=plt.cm.bone_r)
plt.xlabel('time')
plt.ylabel('events')
return (mat,vizLine)
z = s.strip().rstrip().split(' ')
for items in z:
if items:
#print xval,items
mat[int(items),int(xval)] = 10000
words = makeEventWords(items,templateDict,e2p)
vizLine.append((xval,items,' '.join(words)))
xval = xval+1
In [ ]:
def writeVizFile(vizFile,samplematViz):
outFile = open(vizFile,'w')
for i in samplematViz:
temp = '%s,%s,%s\n'%(i[0],i[1],i[2])
outFile.write(temp)
outFile.close()
In [ ]:
colors = ['#FFC0CB', '#FFB6C1', '#FF69B4', '#FF1493', '#DB7093', '#C71585', '#FFA07A', '#FA8072', '#E9967A', '#F08080', '#CD5C5C', '#DC143C', '#B22222',
'#8B0000', '#FF0000', '#FF4500', '#FF6347', '#FF7F50', '#FF8C00', '#FFA500', '#FFFF00', '#FFFFE0', '#FFFACD', '#FAFAD2', '#FFEFD5', '#FFE4B5',
'#FFDAB9', '#EEE8AA', '#F0E68C', '#BDB76B', '#FFD700', '#FFF8DC', '#FFEBCD', '#FFE4C4', '#FFDEAD', '#F5DEB3', '#DEB887', '#D2B48C', '#BC8F8F',
'#F4A460', '#DAA520', '#B8860B', '#CD853F', '#D2691E', '#8B4513', '#A0522D', '#A52A2A', '#800000', '#556B2F', '#808000', '#6B8E23', '#9ACD32',
'#32CD32', '#00FF00', '#7CFC00', '#7FFF00', '#ADFF2F', '#00FF7F', '#00FA9A', '#90EE90', '#98FB98', '#8FBC8F', '#3CB371', '#2E8B57', '#228B22',
'#008000', '#006400', '#66CDAA', '#00FFFF', '#00FFFF', '#E0FFFF', '#AFEEEE', '#7FFFD4', '#40E0D0', '#48D1CC', '#00CED1', '#20B2AA', '#5F9EA0',
'#008B8B', '#008080', '#B0C4DE', '#B0E0E6', '#ADD8E6', '#87CEEB', '#87CEFA', '#00BFFF', '#1E90FF', '#6495ED', '#4682B4', '#4169E1', '#0000FF',
'#0000CD', '#00008B', '#000080', '#191970', '#E6E6FA', '#D8BFD8', '#DDA0DD', '#EE82EE', '#DA70D6', '#FF00FF', '#FF00FF', '#BA55D3', '#9370DB',
'#8A2BE2', '#9400D3', '#9932CC', '#8B008B', '#800080', '#4B0082', '#483D8B', '#6A5ACD', '#7B68EE', '#FFFFFF', '#FFFAFA', '#F0FFF0', '#F5FFFA',
'#F0FFFF', '#F0F8FF', '#F8F8FF', '#F5F5F5', '#FFF5EE', '#F5F5DC', '#FDF5E6', '#FFFAF0', '#FFFFF0', '#FAEBD7', '#FAF0E6', '#FFF0F5', '#FFE4E1',
'#DCDCDC', '#D3D3D3', '#C0C0C0', '#A9A9A9', '#808080', '#696969', '#778899', '#708090', '#2F4F4F', '#000000']
pallete = dict()
for i,color in enumerate(colors):
pallete[i] = color
def pal(x,pallete):
return pallete[(int(x)*20 + 5 )% 100]
def plotSwimLanes(swimFile,pal=pal):
df = pd.read_csv(swimFile, header=0,names=["window", "event",'words'],error_bad_lines=True)
source = ColumnDataSource(
data=dict(
x=df['window'],
y=df['event'],
desc=df['words'],
color = df['event'].map(lambda x: pal(x,pallete))
)
)
hover = HoverTool(
tooltips=[
("cluster", "@color:@y:@desc")
]
)
p = figure(tools="pan,wheel_zoom,box_zoom,reset,resize",
title="swim lanes" , x_axis_label = "Time",
y_axis_label = "Events (sets of logLines)")
p.add_tools(hover)
p.circle('x', 'y',color='color', size=3, source=source)
show(p)
In [ ]:
patterns = './PARIS_10sec_complete.results'
logLines = './timeId.out.gz'
templateFile = './templates.txt'
outFile = './windowLines.gz'
swimOutFile = './swimlanes.gz'
vizOutputFile = './viz.out'
In [ ]:
windowEvents(outFile,logLines,120)
In [ ]:
listSetPat,pat = readPatterns(patterns)
In [ ]:
windowListSet = makeWindowSet(outFile)
In [ ]:
windowedSwimLanes = makeSwimLanes(swimOutFile,windowListSet)
In [ ]:
templateDict = getTemplates(templateFile)
In [ ]:
a,w = samplemat(windowedSwimLanes,450,2000,templateDict,pat,plot=False)
In [ ]:
writeVizFile(vizOutputFile,w)
In [ ]:
plotSwimLanes(vizOutputFile)
In [ ]:
output_file('swimLanes.html')
In [ ]: