In [130]:
from collections import defaultdict
from collections import namedtuple
import matplotlib.pyplot as plt
import numpy as np
%matplotlib
Returns = namedtuple('Returns',['left','top','matrix','topName','leftName'])


Using matplotlib backend: MacOSX

In [212]:
left = '/Users/dgrossman/data/matrix/PARIS_10secwindows_partial_clean.txt'
top = '/Users/dgrossman/data/matrix/tbird.log.out.logCluster.processed.10secwindows.loghound.300.out'

In [213]:
def makeMatrix(top,left,scaled=False):

    topF = open(top,'r')
    leftF = open(left,'r')
    topList = list()
    leftList = list()
    
    for i in topF.readlines():
        topList.append(set(i.split()))
        
    for i in leftF.readlines():
        leftList.append(set(i.split()))
    
    mat = defaultdict(int)
    
    print len(topList),len(leftList)
    
    for i in range(len(topList)):
        for j in range(len(leftList)):
            intersection = len(topList[i].intersection(leftList[j]))
            intersection = topList[i].intersection(leftList[j])
            t1 = (topList[i]).difference(intersection)
            t2 = (leftList[j]).difference(intersection)
            intersection = len(intersection)
            
            if scaled:
                intersection = intersection / (len(t1) + len(t2)+1)
            
            if intersection > 0:
                mat[(i,j)] = intersection
    return Returns(leftList,topList,mat,top,left)
    
def interesting(returns,thresh):
        out = defaultdict(int)
        
        for x in returns.matrix.itervalues():
            if x > 0:
                out[x] +=1
        print out
        
        for key,value in returns.matrix.iteritems():
            if value >= thresh:
                intersection = returns.left[key[1]].intersection(returns.top[key[0]])
                leftOnly = (returns.left[key[1]]).difference(intersection)
                topOnly = (returns.top[key[0]]).difference(intersection)
                print 'same:',len(intersection),',leftOnly:',len(leftOnly),',topOnly:',len(topOnly)
                if len(intersection)==16:
                    print returns.top[key[0]]
                    print returns.left[key[1]]

In [214]:
def samplemat(rtrn):
    xx = len(rtrn.top)
    yy = len(rtrn.left)
    aa = np.zeros((yy,xx))
    for key,value in rtrn.matrix.iteritems():
        aa[key[1],key[0]] = value
    return aa

In [215]:
def plotPowerCorner(a,mat):
    
    b=np.sum(a,axis=0)
    idx = b.argsort()
    c = np.take(a,idx,axis=1)

    d = np.sum(c,axis=1)
    idx = d.argsort()
    e = np.take(c,idx,axis=0)

   
    
    plt.matshow(e)
    plt.xlabel(mat.topName.split('/')[-1])
    plt.ylabel(mat.leftName.split('/')[-1])
    plt.show()

In [216]:
outx = makeMatrix(top,left)


1853 318

In [217]:
interesting(outx,5)


defaultdict(<type 'int'>, {1: 21234, 2: 4558, 3: 1023, 4: 391, 5: 167, 6: 63, 7: 1, 10: 8, 11: 3, 30: 5})
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 6 ,leftOnly: 4 ,topOnly: 1
same: 6 ,leftOnly: 4 ,topOnly: 3
same: 6 ,leftOnly: 0 ,topOnly: 1
same: 6 ,leftOnly: 12 ,topOnly: 0
same: 5 ,leftOnly: 19 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 7
same: 5 ,leftOnly: 1 ,topOnly: 11
same: 5 ,leftOnly: 1 ,topOnly: 6
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 9
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 5
same: 10 ,leftOnly: 0 ,topOnly: 2
same: 5 ,leftOnly: 1 ,topOnly: 7
same: 5 ,leftOnly: 1 ,topOnly: 6
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 7
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 1
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 4
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 5 ,leftOnly: 1 ,topOnly: 3
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 9
same: 5 ,leftOnly: 1 ,topOnly: 3
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 11 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 3
same: 5 ,leftOnly: 1 ,topOnly: 7
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 6 ,leftOnly: 0 ,topOnly: 9
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 3
same: 6 ,leftOnly: 0 ,topOnly: 10
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 3 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 2
same: 6 ,leftOnly: 0 ,topOnly: 1
same: 6 ,leftOnly: 12 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 2 ,topOnly: 1
same: 6 ,leftOnly: 12 ,topOnly: 0
same: 6 ,leftOnly: 262 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 10
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 10 ,leftOnly: 0 ,topOnly: 2
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 6 ,leftOnly: 18 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 9
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 6 ,leftOnly: 0 ,topOnly: 9
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 5 ,leftOnly: 22 ,topOnly: 7
same: 5 ,leftOnly: 1 ,topOnly: 3
same: 5 ,leftOnly: 2 ,topOnly: 0
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 30
same: 5 ,leftOnly: 1 ,topOnly: 6
same: 5 ,leftOnly: 1 ,topOnly: 7
same: 5 ,leftOnly: 1 ,topOnly: 6
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 6 ,leftOnly: 262 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 10
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 3 ,topOnly: 0
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 7
same: 6 ,leftOnly: 0 ,topOnly: 8
same: 6 ,leftOnly: 262 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 3
same: 10 ,leftOnly: 0 ,topOnly: 2
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 3
same: 30 ,leftOnly: 0 ,topOnly: 5
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 5
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 6 ,leftOnly: 12 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 11 ,leftOnly: 3 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 4
same: 5 ,leftOnly: 1 ,topOnly: 7
same: 6 ,leftOnly: 12 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 22 ,topOnly: 7
same: 6 ,leftOnly: 0 ,topOnly: 9
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 22 ,topOnly: 7
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 6 ,leftOnly: 0 ,topOnly: 7
same: 10 ,leftOnly: 0 ,topOnly: 2
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 6 ,leftOnly: 4 ,topOnly: 2
same: 5 ,leftOnly: 1 ,topOnly: 7
same: 5 ,leftOnly: 3 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 7
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 10 ,leftOnly: 0 ,topOnly: 1
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 6
same: 5 ,leftOnly: 3 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 10
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 6 ,leftOnly: 12 ,topOnly: 0
same: 5 ,leftOnly: 3 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 5
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 22 ,topOnly: 7
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 30 ,leftOnly: 0 ,topOnly: 1
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 7
same: 5 ,leftOnly: 22 ,topOnly: 7
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 2
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 6 ,leftOnly: 12 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 6 ,leftOnly: 2 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 1
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 4
same: 5 ,leftOnly: 1 ,topOnly: 5
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 4
same: 6 ,leftOnly: 0 ,topOnly: 10
same: 6 ,leftOnly: 0 ,topOnly: 10
same: 6 ,leftOnly: 4 ,topOnly: 3
same: 6 ,leftOnly: 4 ,topOnly: 3
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 30 ,leftOnly: 0 ,topOnly: 2
same: 5 ,leftOnly: 1 ,topOnly: 6
same: 5 ,leftOnly: 1 ,topOnly: 7
same: 6 ,leftOnly: 0 ,topOnly: 10
same: 6 ,leftOnly: 4 ,topOnly: 2
same: 6 ,leftOnly: 0 ,topOnly: 5
same: 6 ,leftOnly: 12 ,topOnly: 0
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 7 ,leftOnly: 0 ,topOnly: 0
same: 6 ,leftOnly: 262 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 9
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 1
same: 5 ,leftOnly: 1 ,topOnly: 5
same: 5 ,leftOnly: 3 ,topOnly: 0
same: 6 ,leftOnly: 262 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 7
same: 5 ,leftOnly: 1 ,topOnly: 3
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 7
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 2
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 4
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 6 ,leftOnly: 12 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 1
same: 5 ,leftOnly: 1 ,topOnly: 5
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 6 ,leftOnly: 0 ,topOnly: 8
same: 5 ,leftOnly: 3 ,topOnly: 0
same: 30 ,leftOnly: 0 ,topOnly: 2
same: 5 ,leftOnly: 1 ,topOnly: 5
same: 10 ,leftOnly: 14 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 1
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 6 ,leftOnly: 2 ,topOnly: 0
same: 5 ,leftOnly: 22 ,topOnly: 7
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 6 ,leftOnly: 262 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 10
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 6 ,leftOnly: 0 ,topOnly: 8
same: 11 ,leftOnly: 7 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 9
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 6
same: 6 ,leftOnly: 2 ,topOnly: 0
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 8
same: 5 ,leftOnly: 1 ,topOnly: 3
same: 6 ,leftOnly: 0 ,topOnly: 1
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 6 ,leftOnly: 0 ,topOnly: 10
same: 5 ,leftOnly: 3 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 8
same: 5 ,leftOnly: 22 ,topOnly: 6
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 10 ,leftOnly: 0 ,topOnly: 2
same: 6 ,leftOnly: 0 ,topOnly: 9
same: 10 ,leftOnly: 0 ,topOnly: 2
same: 30 ,leftOnly: 0 ,topOnly: 3
same: 5 ,leftOnly: 13 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 5 ,leftOnly: 263 ,topOnly: 0
same: 6 ,leftOnly: 0 ,topOnly: 9
same: 6 ,leftOnly: 0 ,topOnly: 1
same: 5 ,leftOnly: 1 ,topOnly: 5
same: 5 ,leftOnly: 1 ,topOnly: 6
same: 6 ,leftOnly: 262 ,topOnly: 0
same: 5 ,leftOnly: 1 ,topOnly: 9
same: 6 ,leftOnly: 262 ,topOnly: 0

In [218]:
a = samplemat(outx)
b=a*1000

In [219]:
plotPowerCorner(b,outx)

In [195]:
len(top)


Out[195]:
69

In [170]:
len(left)


Out[170]:
69

In [149]:
len(outx.left)


Out[149]:
80

In [150]:
len(outx.top)


Out[150]:
318

In [ ]: