In [ ]:
import gzip
from collections import defaultdict
from collections import namedtuple
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import numpy as np

import math
from bokeh.charts import Scatter, output_file, show
from bokeh.models import HoverTool
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from collections import OrderedDict

%matplotlib

In [ ]:
def windowEvents(outFile,logLines,windowSec=120):

    a = gzip.open(logLines,'rb')
    o = gzip.open(outFile,'wb')

    currentTime=0
    oldTime=0

    setup = False
    outSet = set()
    for line in a.readlines():
        time, tid = line.strip().rstrip().split(',')
        currentTime = int(float(time)/windowSec)

        if not setup:
            setup = True
            oldTime = currentTime

        if oldTime != currentTime:
            if len(outSet) > 0:
                outText = '%s\n'%(' '.join(str(i) for i in outSet))
                o.write(outText)
                outSet.clear()

        outSet.add(tid)    
        oldTime = currentTime

    o.close()
    a.close()

In [ ]:
def readPatterns(patterns):
    pat = open(patterns,'r').readlines()
    listSetPat = list()

    for p in pat:
        line = p.strip().rstrip().split(' ')
        listSetPat.append(set(line))
    return (listSetPat,pat)

In [ ]:
def makeWindowSet(outFile):
    w = gzip.open(outFile,'rb')
    windowLines = w.readlines()

    windowListSet = list()

    for windowLine in windowLines:
        line = windowLine.strip().rstrip().split(' ')
        windowListSet.append(set(line))
    return windowListSet

In [ ]:
def makeSwimLanes(swimOutFile,windowSet):
    o = gzip.open(swimOutFile,'wb')
    window = list()
    for wset in windowListSet:
        outSet = set()
        for index, pset in enumerate(listSetPat):
            if wset.issuperset(pset):
                outSet.add(index)
        outText = '%s\n'%(' '.join(str(i) for i in outSet))
        window.append(outText.strip().rstrip())
        o.write(outText)
        outSet.clear()
    o.close()
    return window

In [ ]:
def getTemplates(templateFile):
    templates = open(templateFile,'r')
    templateDict = dict()
    for i in templates.readlines():
        tid,text = i.strip().rstrip().split(',',1)
        templateDict[tid]=text
    return templateDict

In [ ]:
def makeEventWords(e,templateDict,e2p):
    words = set()
    for logline in e2p[int(e)].rstrip().strip().split(' '):
        for d in templateDict[logline].split(' '):
            words.add(d)
    return words

In [ ]:
def samplemat(swim,y,x,templateDict,e2p,plot=True):
    mat = np.zeros((y,x))
    xval =0
    vizLine = list()
    for s in swim:
        if int(xval) >= int(x):
            if plot:
                plt.matshow(mat,cmap=plt.cm.bone_r)
                plt.xlabel('time')
                plt.ylabel('events')
            return (mat,vizLine)
        
        z = s.strip().rstrip().split(' ')

        for items in z:
            if items:
                #print xval,items
                mat[int(items),int(xval)] = 10000
                words = makeEventWords(items,templateDict,e2p)
                vizLine.append((xval,items,' '.join(words)))
        xval = xval+1

In [ ]:
def writeVizFile(vizFile,samplematViz):
    outFile = open(vizFile,'w')
    for i in samplematViz:
        temp = '%s,%s,%s\n'%(i[0],i[1],i[2])
        outFile.write(temp)
    outFile.close()

In [ ]:
colors = ['#FFC0CB', '#FFB6C1', '#FF69B4', '#FF1493', '#DB7093', '#C71585', '#FFA07A', '#FA8072', '#E9967A', '#F08080', '#CD5C5C', '#DC143C', '#B22222',
    '#8B0000', '#FF0000', '#FF4500', '#FF6347', '#FF7F50', '#FF8C00', '#FFA500', '#FFFF00', '#FFFFE0', '#FFFACD', '#FAFAD2', '#FFEFD5', '#FFE4B5',
    '#FFDAB9', '#EEE8AA', '#F0E68C', '#BDB76B', '#FFD700', '#FFF8DC', '#FFEBCD', '#FFE4C4', '#FFDEAD', '#F5DEB3', '#DEB887', '#D2B48C', '#BC8F8F',
    '#F4A460', '#DAA520', '#B8860B', '#CD853F', '#D2691E', '#8B4513', '#A0522D', '#A52A2A', '#800000', '#556B2F', '#808000', '#6B8E23', '#9ACD32',
    '#32CD32', '#00FF00', '#7CFC00', '#7FFF00', '#ADFF2F', '#00FF7F', '#00FA9A', '#90EE90', '#98FB98', '#8FBC8F', '#3CB371', '#2E8B57', '#228B22',
    '#008000', '#006400', '#66CDAA', '#00FFFF', '#00FFFF', '#E0FFFF', '#AFEEEE', '#7FFFD4', '#40E0D0', '#48D1CC', '#00CED1', '#20B2AA', '#5F9EA0',
    '#008B8B', '#008080', '#B0C4DE', '#B0E0E6', '#ADD8E6', '#87CEEB', '#87CEFA', '#00BFFF', '#1E90FF', '#6495ED', '#4682B4', '#4169E1', '#0000FF',
    '#0000CD', '#00008B', '#000080', '#191970', '#E6E6FA', '#D8BFD8', '#DDA0DD', '#EE82EE', '#DA70D6', '#FF00FF', '#FF00FF', '#BA55D3', '#9370DB',
    '#8A2BE2', '#9400D3', '#9932CC', '#8B008B', '#800080', '#4B0082', '#483D8B', '#6A5ACD', '#7B68EE', '#FFFFFF', '#FFFAFA', '#F0FFF0', '#F5FFFA',
    '#F0FFFF', '#F0F8FF', '#F8F8FF', '#F5F5F5', '#FFF5EE', '#F5F5DC', '#FDF5E6', '#FFFAF0', '#FFFFF0', '#FAEBD7', '#FAF0E6', '#FFF0F5', '#FFE4E1',
    '#DCDCDC', '#D3D3D3', '#C0C0C0', '#A9A9A9', '#808080', '#696969', '#778899', '#708090', '#2F4F4F', '#000000']

pallete = dict()

for i,color in enumerate(colors):
    pallete[i] = color

def pal(x,pallete):
    return pallete[(int(x)*20 + 5 )% 100]

def plotSwimLanes(swimFile,pal=pal):

    df = pd.read_csv(swimFile, header=0,names=["window", "event",'words'],error_bad_lines=True)
    source = ColumnDataSource(
    data=dict(
            x=df['window'],
            y=df['event'],
            desc=df['words'],
            color =  df['event'].map(lambda x: pal(x,pallete))
        )
    )

    hover = HoverTool(
        tooltips=[
            ("cluster", "@color:@y:@desc")
        ]
    )

    p = figure(tools="pan,wheel_zoom,box_zoom,reset,resize",
               title="swim lanes" , x_axis_label = "Time",
               y_axis_label = "Events (sets of logLines)")

    p.add_tools(hover)

    p.circle('x', 'y',color='color', size=3, source=source)

    show(p)

In [ ]:
patterns = './PARIS_10sec_complete.results'
logLines = './timeId.out.gz'
templateFile = './templates.txt'


outFile = './windowLines.gz'
swimOutFile = './swimlanes.gz'
vizOutputFile = './viz.out'

In [ ]:
windowEvents(outFile,logLines,120)

In [ ]:
listSetPat,pat = readPatterns(patterns)

In [ ]:
windowListSet = makeWindowSet(outFile)

In [ ]:
windowedSwimLanes = makeSwimLanes(swimOutFile,windowListSet)

In [ ]:
templateDict = getTemplates(templateFile)

In [ ]:
a,w = samplemat(windowedSwimLanes,450,2000,templateDict,pat,plot=False)

In [ ]:
writeVizFile(vizOutputFile,w)

In [ ]:
plotSwimLanes(vizOutputFile)

In [ ]:
output_file('swimLanes.html')

In [ ]: