Contour Similarity Algorithms Benchmark

Load stuff



In [1]:

    
# Load external jupyter stuff
%load_ext autoreload
%load_ext snakeviz



In [2]:

    
# jupyter settings
%autoreload 2
%matplotlib inline
%autocall 1

# imports
import numpy
import scipy
import pandas
import sklearn
import sklearn.cluster
import os
import glob
import json
from matplotlib import pyplot as plt

import itertools
import collections
import music21
import Levenshtein

from django.core.wsgi import get_wsgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "contourMetrics.settings.production")
application = get_wsgi_application()

import lib.contour
import cProfile

from django.db.models import Q
from lib.contour import ContourPoint as CP
from lib.contour import Contour as C

import lib.generator
import lib.utils
import lib.analysis
import lib

from apps.calculator import models









    



Automatic calling is: Smart



In [3]:

    
# Auxiliary function
def log_progress(sequence, every=None, size=None):
    # https://github.com/alexanderkuk/log-progress
    from ipywidgets import IntProgress, HTML, VBox
    from IPython.display import display

    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = int(size / 200)     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{index} / ?'.format(index=index)
                else:
                    progress.value = index
                    label.value = u'{index} / {size}'.format(
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = str(index or '?')

class PieceAux(object):
    def __init__(self, piece_model, voice=0):
        self.contours = [p.contour.normalized for p in piece_model.phrase_set.filter(voice=voice)]
        self.piece_id = piece_model.id
        self.piece_name = piece_model.name
        self.comparison = lib.analysis.GeneralComparison([lib.contour.Contour(c) for c in self.contours])
        self.algorithms = ['OSC', 'AGP']
        self.similarity_series = {algorithm: self.comparison.similarity_series(algorithm) for algorithm in self.algorithms}
        self._similarity_maps = {}
    
    def __repr__(self):
        return '<P {}>'.format(self.piece_name)

    def get_mean(self):
        return numpy.array(list(map(lambda a: self.similarity_series[a].mean(), self.algorithms)))
    
    def get_similarity_map(self, algorithm='OSC'):
        if algorithm not in self._similarity_maps:
            self._similarity_maps[algorithm] = lib.utils.ExtendedDataFrame(self.comparison.similarity_map(algorithm))
        return self._similarity_maps[algorithm]
    
    def plot_similarity_map(self, algorithm='OSC'):
        edf = self.get_similarity_map(algorithm)
        return edf.heat_plot()
        
    def get_adjacent_similarity(self, algorithm='OSC'):
        df = self.get_similarity_map(algorithm)
        arr = numpy.array(df).diagonal(1)
        return pandas.Series(arr, index=df.index[1:])
    
    def plot_adjacent_similarity(self, algorithm='OSC'):
        s = self.get_adjacent_similarity(algorithm)
        return s.plot(grid=True, title='Similarity with previous contour P.{} ({})'.format(self.piece_name, algorithm))
    
def make_similarity_df(phrases):
    # Get similarity means by piece
    s = []
    i = []
    for p in phrases:
        s.append(p.get_mean())
        i.append(p.piece_name)
    return pandas.DataFrame(s, index=i, columns=['OSC', 'AGP'])

Unexpected values



In [4]:

    
# Only 2-6 contours
csegs = [lib.contour.Contour(c['normalized']) for c in models.Contour.objects.filter(size__gt=1).filter(size__lt=6).values()]
gc = lib.analysis.GeneralComparison(csegs)



In [5]:

    
algorithms = ['OSC', 'AGP', 'EMB']
ind = list(map(lambda x: (tuple(x[0].sequence), tuple(x[1].sequence)), itertools.combinations(csegs, 2)))
seq = [gc.similarity_series(a) for a in log_progress(algorithms)]









    



/Users/marcossampaio/.virtualenvs/contourmetrics/lib/python3.6/site-packages/numpy/lib/function_base.py:2487: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/Users/marcossampaio/.virtualenvs/contourmetrics/lib/python3.6/site-packages/numpy/lib/function_base.py:2496: RuntimeWarning: divide by zero encountered in double_scalars
  c *= 1. / np.float64(fact)
/Users/marcossampaio/.virtualenvs/contourmetrics/lib/python3.6/site-packages/numpy/lib/function_base.py:2496: RuntimeWarning: invalid value encountered in multiply
  c *= 1. / np.float64(fact)
/Users/marcossampaio/.virtualenvs/contourmetrics/lib/python3.6/site-packages/numpy/lib/function_base.py:2569: RuntimeWarning: invalid value encountered in true_divide
  c /= stddev[:, None]
/Users/marcossampaio/.virtualenvs/contourmetrics/lib/python3.6/site-packages/numpy/lib/function_base.py:2570: RuntimeWarning: invalid value encountered in true_divide
  c /= stddev[None, :]



In [6]:

    
gc_df = pandas.DataFrame(seq).T
gc_df.columns = algorithms
gc_df.index=ind



In [7]:

    
# OSC < 0.1 AND AGP > 0.9
gc_df[(gc_df.OSC<0.1)&(gc_df.AGP>0.9)]









    Out[7]:






  
    
      
      OSC
      AGP
      EMB
    
  
  
    
      ((1, 0, 1), (1, 0, 2))
      0.0
      1.0
      0.625000
    
    
      ((1, 0, 1), (2, 0, 1))
      0.0
      1.0
      0.625000
    
    
      ((1, 0, 2), (2, 0, 1))
      0.0
      1.0
      0.750000
    
    
      ((1, 2, 0), (0, 1, 0))
      0.0
      1.0
      0.625000
    
    
      ((1, 2, 0), (0, 2, 1))
      0.0
      1.0
      0.750000
    
    
      ((0, 1, 0), (0, 2, 1))
      0.0
      1.0
      0.625000
    
    
      ((0, 1, 0, 1, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.673077
    
    
      ((0, 1, 0, 2, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.634615
    
    
      ((0, 1, 0, 2, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.653846
    
    
      ((0, 2, 0, 1, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.634615
    
    
      ((0, 2, 0, 2, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.653846
    
    
      ((0, 2, 0, 3, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.615385
    
    
      ((0, 2, 1, 2, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.750000
    
    
      ((0, 2, 1, 3, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.673077
    
    
      ((0, 3, 1, 3, 2), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.692308
    
    
      ((0, 3, 1, 4, 2), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.557692
    
    
      ((1, 0, 1, 0, 1), (2, 0, 1, 0, 2))
      0.0
      1.0
      0.673077
    
    
      ((1, 0, 1, 0, 2), (2, 0, 1, 0, 2))
      0.0
      1.0
      0.750000
    
    
      ((1, 0, 2, 0, 2), (2, 0, 1, 0, 2))
      0.0
      1.0
      0.653846
    
    
      ((0, 2, 1, 4, 3), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.538462
    
    
      ((0, 3, 0, 2, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.596154
    
    
      ((0, 3, 1, 2, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.788462
    
    
      ((0, 3, 1, 2, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.673077
    
    
      ((1, 2, 0, 2, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.653846
    
    
      ((1, 2, 0, 2, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.730769
    
    
      ((1, 2, 0, 3, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.596154
    
    
      ((1, 2, 0, 3, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.692308
    
    
      ((1, 2, 0, 3, 2), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.673077
    
    
      ((1, 2, 1, 2, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.750000
    
    
      ((1, 2, 1, 3, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.673077
    
    
      ...
      ...
      ...
      ...
    
    
      ((2, 0, 1, 0, 2), (1, 0, 2, 0, 3))
      0.0
      1.0
      0.692308
    
    
      ((2, 0, 1, 0, 2), (2, 0, 2, 1, 3))
      0.0
      1.0
      0.673077
    
    
      ((2, 0, 1, 0, 2), (3, 0, 2, 0, 1))
      0.0
      1.0
      0.692308
    
    
      ((2, 0, 1, 0, 2), (3, 1, 4, 0, 2))
      0.0
      1.0
      0.519231
    
    
      ((2, 0, 1, 0, 2), (3, 2, 3, 0, 1))
      0.0
      1.0
      0.596154
    
    
      ((2, 0, 1, 0, 2), (1, 0, 3, 0, 2))
      0.0
      1.0
      0.615385
    
    
      ((2, 0, 1, 0, 2), (3, 2, 4, 0, 1))
      0.0
      1.0
      0.519231
    
    
      ((2, 0, 1, 0, 2), (1, 0, 3, 2, 4))
      0.0
      1.0
      0.538462
    
    
      ((2, 0, 1, 0, 2), (2, 1, 4, 0, 3))
      0.0
      1.0
      0.519231
    
    
      ((1, 3, 0, 2, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.692308
    
    
      ((1, 3, 0, 4, 2), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.519231
    
    
      ((1, 3, 0, 2, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.615385
    
    
      ((1, 3, 1, 2, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.673077
    
    
      ((2, 3, 0, 2, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.673077
    
    
      ((2, 3, 1, 2, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.730769
    
    
      ((2, 3, 1, 3, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.692308
    
    
      ((2, 3, 1, 4, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.557692
    
    
      ((2, 4, 0, 3, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.519231
    
    
      ((2, 4, 1, 3, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.557692
    
    
      ((0, 1, 0, 3, 2), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.596154
    
    
      ((3, 4, 1, 2, 0), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.538462
    
    
      ((0, 3, 2, 3, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.788462
    
    
      ((3, 4, 0, 2, 1), (0, 2, 1, 2, 0))
      0.0
      1.0
      0.519231
    
    
      ((0, 2, 1, 2, 0), (0, 2, 1, 3, 0))
      0.0
      1.0
      0.788462
    
    
      ((0, 2, 1, 2, 0), (0, 4, 1, 3, 2))
      0.0
      1.0
      0.557692
    
    
      ((0, 2, 1, 2, 0), (2, 3, 0, 1, 0))
      0.0
      1.0
      0.596154
    
    
      ((0, 2, 1, 2, 0), (0, 2, 1, 3, 2))
      0.0
      1.0
      0.730769
    
    
      ((0, 2, 1, 2, 0), (1, 3, 0, 3, 2))
      0.0
      1.0
      0.615385
    
    
      ((0, 2, 1, 2, 0), (2, 3, 0, 4, 1))
      0.0
      1.0
      0.519231
    
    
      ((0, 2, 1, 2, 0), (1, 2, 0, 4, 3))
      0.0
      1.0
      0.519231
    
  

97 rows × 3 columns



In [8]:

    
# OSC > 0.9 AND OSC < 0.1
gc_df[(gc_df.AGP<0.1)&(gc_df.OSC>0.9)]









    Out[8]:






  
    
      
      OSC
      AGP
      EMB
    
  
  
    
      ((2, 1, 0), (0, 1, 2, 3))
      1.0
      0.0
      0.0
    
    
      ((2, 1, 0), (0, 1, 2, 3, 4))
      1.0
      0.0
      0.0
    
    
      ((0, 1, 2, 3), (1, 0))
      1.0
      0.0
      0.0
    
    
      ((0, 1, 2, 3), (3, 2, 1, 0))
      1.0
      0.0
      0.0
    
    
      ((0, 1, 2, 3), (4, 3, 2, 1, 0))
      1.0
      0.0
      0.0
    
    
      ((1, 0), (0, 1, 2, 3, 4))
      1.0
      0.0
      0.0
    
    
      ((0, 1, 2), (3, 2, 1, 0))
      1.0
      0.0
      0.0
    
    
      ((0, 1, 2), (4, 3, 2, 1, 0))
      1.0
      0.0
      0.0
    
    
      ((3, 2, 1, 0), (0, 1, 2, 3, 4))
      1.0
      0.0
      0.0
    
    
      ((3, 2, 1, 0), (0, 1))
      1.0
      0.0
      0.0
    
    
      ((0, 1, 2, 3, 4), (4, 3, 2, 1, 0))
      1.0
      0.0
      0.0
    
    
      ((4, 3, 2, 1, 0), (0, 1))
      1.0
      0.0
      0.0

Distribution



In [9]:

    
gc_df.describe()









    Out[9]:






  
    
      
      OSC
      AGP
      EMB
    
  
  
    
      count
      56280.000000
      56280.000000
      56280.000000
    
    
      mean
      0.987313
      0.693195
      0.574718
    
    
      std
      0.111919
      0.152539
      0.121426
    
    
      min
      0.000000
      0.000000
      0.000000
    
    
      25%
      1.000000
      0.571429
      0.500000
    
    
      50%
      1.000000
      0.750000
      0.576923
    
    
      75%
      1.000000
      0.750000
      0.653846
    
    
      max
      1.000000
      1.000000
      0.972973



In [10]:

    
gc_df.boxplot()









    Out[10]:





<matplotlib.axes._subplots.AxesSubplot at 0x11a8fa4e0>



In [11]:

    
gc_df.hist()









    Out[11]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x11923f198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119e5ab70>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x119ecc710>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119f1db38>]], dtype=object)

Correlation



In [12]:

    
gc_df.corr()



In [13]:

    
gc_df.plot(kind='scatter', x='OSC', y='AGP')









    Out[13]:





<matplotlib.axes._subplots.AxesSubplot at 0x11a09cf60>



In [14]:

    
gc_df.plot(kind='scatter', x='OSC', y='EMB')









    Out[14]:





<matplotlib.axes._subplots.AxesSubplot at 0x11a184f98>



In [15]:

    
gc_df.plot(kind='scatter', x='EMB', y='AGP')









    Out[15]:





<matplotlib.axes._subplots.AxesSubplot at 0x11a2662b0>

Chorales similarity means



In [16]:

    
chorales_pieces = models.Piece.objects.filter(collection__name='Chorales')
soprano_phrases = [PieceAux(p, 0) for p in log_progress(chorales_pieces)]
bass_phrases = [PieceAux(p, 3) for p in log_progress(chorales_pieces)]









    



/Users/marcossampaio/.virtualenvs/contourmetrics/lib/python3.6/site-packages/numpy/lib/function_base.py:2487: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/Users/marcossampaio/.virtualenvs/contourmetrics/lib/python3.6/site-packages/numpy/lib/function_base.py:2496: RuntimeWarning: divide by zero encountered in double_scalars
  c *= 1. / np.float64(fact)
/Users/marcossampaio/.virtualenvs/contourmetrics/lib/python3.6/site-packages/numpy/lib/function_base.py:2496: RuntimeWarning: invalid value encountered in multiply
  c *= 1. / np.float64(fact)



In [17]:

    
s_df = make_similarity_df(soprano_phrases)
b_df = make_similarity_df(bass_phrases)

Distribution



In [18]:

    
s_df.describe()



In [19]:

    
b_df.describe()



In [20]:

    
s_df.boxplot()









    Out[20]:





<matplotlib.axes._subplots.AxesSubplot at 0x11bb910b8>



In [21]:

    
b_df.boxplot()









    Out[21]:





<matplotlib.axes._subplots.AxesSubplot at 0x11bc3e438>



In [22]:

    
s_df.hist(), b_df.hist()









    Out[22]:





(array([[<matplotlib.axes._subplots.AxesSubplot object at 0x11bc68a90>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x11beccbe0>]], dtype=object),
 array([[<matplotlib.axes._subplots.AxesSubplot object at 0x119c734e0>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x11bfd4f98>]], dtype=object))

Correlation



In [23]:

    
s_df.corr().OSC.AGP, b_df.corr().OSC.AGP









    Out[23]:





(0.17980912140641339, -0.25425603607896358)



In [24]:

    
s_df.plot(kind='scatter', x='OSC', y='AGP', grid=True), b_df.plot(kind='scatter', x='OSC', y='AGP', grid=True)









    Out[24]:





(<matplotlib.axes._subplots.AxesSubplot at 0x11bf36ef0>,
 <matplotlib.axes._subplots.AxesSubplot at 0x11c369c88>)

The most and least similar chorales



In [25]:

    
v = 0.9
s_df[(s_df.OSC > v)&(s_df.AGP > v)]



In [26]:

    
v = 0.4
s_df[(s_df.OSC < v)&(s_df.AGP < v)]

The most similar



In [27]:

    
s1 = [s for s in soprano_phrases if s.piece_name=='233'][0]
s1.contours









    Out[27]:





[[1, 2, 3, 2, 1, 0],
 [2, 3, 4, 2, 1, 0],
 [0, 1, 2, 1, 0],
 [0, 1, 2, 1, 0],
 [1, 2, 3, 2, 1, 0],
 [2, 3, 4, 3, 2, 1, 0]]



In [28]:

    
s1.get_similarity_map('OSC')









    Out[28]:






  
    
      
      < 1 2 3 2 1 0 >
      < 2 3 4 2 1 0 >
      < 0 1 2 1 0 >
      < 0 1 2 1 0 >
      < 1 2 3 2 1 0 >
      < 2 3 4 3 2 1 0 >
    
  
  
    
      < 1 2 3 2 1 0 >
      1.000000
      0.990070
      0.929396
      0.929396
      1.000000
      0.987036
    
    
      < 2 3 4 2 1 0 >
      0.990070
      1.000000
      0.868283
      0.868283
      0.990070
      0.954672
    
    
      < 0 1 2 1 0 >
      0.929396
      0.868283
      1.000000
      1.000000
      0.929396
      0.976585
    
    
      < 0 1 2 1 0 >
      0.929396
      0.868283
      1.000000
      1.000000
      0.929396
      0.976585
    
    
      < 1 2 3 2 1 0 >
      1.000000
      0.990070
      0.929396
      0.929396
      1.000000
      0.987036
    
    
      < 2 3 4 3 2 1 0 >
      0.987036
      0.954672
      0.976585
      0.976585
      0.987036
      1.000000



In [29]:

    
s1.plot_similarity_map('OSC')









    





<matplotlib.figure.Figure at 0x119efcba8>



In [30]:

    
s1.plot_adjacent_similarity('OSC')









    Out[30]:





<matplotlib.axes._subplots.AxesSubplot at 0x11c52d6a0>



In [31]:

    
# AGP internal similarities
s1.get_similarity_map('AGP')









    Out[31]:






  
    
      
      < 1 2 3 2 1 0 >
      < 2 3 4 2 1 0 >
      < 0 1 2 1 0 >
      < 0 1 2 1 0 >
      < 1 2 3 2 1 0 >
      < 2 3 4 3 2 1 0 >
    
  
  
    
      < 1 2 3 2 1 0 >
      1.000000
      1.000000
      0.888889
      0.888889
      1.000000
      0.909091
    
    
      < 2 3 4 2 1 0 >
      1.000000
      1.000000
      0.888889
      0.888889
      1.000000
      0.909091
    
    
      < 0 1 2 1 0 >
      0.888889
      0.888889
      1.000000
      1.000000
      0.888889
      0.800000
    
    
      < 0 1 2 1 0 >
      0.888889
      0.888889
      1.000000
      1.000000
      0.888889
      0.800000
    
    
      < 1 2 3 2 1 0 >
      1.000000
      1.000000
      0.888889
      0.888889
      1.000000
      0.909091
    
    
      < 2 3 4 3 2 1 0 >
      0.909091
      0.909091
      0.800000
      0.800000
      0.909091
      1.000000



In [32]:

    
s1.plot_similarity_map('AGP')









    





<matplotlib.figure.Figure at 0x11c7e5978>



In [33]:

    
s1.plot_adjacent_similarity('AGP')









    Out[33]:





<matplotlib.axes._subplots.AxesSubplot at 0x11c83da58>

The least similar



In [34]:

    
s2 = [s for s in soprano_phrases if s.piece_name=='124'][0]
s2.contours









    Out[34]:





[[0],
 [3, 2, 1, 0],
 [0, 1, 2, 3, 4, 3, 2, 1, 0],
 [0],
 [3, 2, 1, 2, 1, 0],
 [0, 1, 2, 3, 4, 5, 4, 3, 2, 1]]



In [35]:

    
# OSC internal similarities
s2.get_similarity_map('OSC')









    Out[35]:






  
    
      
      < 0 >
      < 3 2 1 0 >
      < 0 1 2 3 4 3 2 1 0 >
      < 0 >
      < 3 2 1 2 1 0 >
      < 0 1 2 3 4 5 4 3 2 1 >
    
  
  
    
      < 0 >
      0.0
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
    
    
      < 3 2 1 0 >
      0.0
      1.000000
      0.716789
      0.0
      0.605203
      0.707940
    
    
      < 0 1 2 3 4 3 2 1 0 >
      0.0
      0.716789
      1.000000
      0.0
      0.139881
      0.990820
    
    
      < 0 >
      0.0
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
    
    
      < 3 2 1 2 1 0 >
      0.0
      0.605203
      0.139881
      0.0
      1.000000
      0.216541
    
    
      < 0 1 2 3 4 5 4 3 2 1 >
      0.0
      0.707940
      0.990820
      0.0
      0.216541
      1.000000



In [36]:

    
s2.plot_similarity_map('OSC')









    





<matplotlib.figure.Figure at 0x11c847710>



In [37]:

    
s2.plot_adjacent_similarity('OSC')









    Out[37]:





<matplotlib.axes._subplots.AxesSubplot at 0x11cb50470>



In [38]:

    
# AGP internal similarities
s2.get_similarity_map('AGP')









    Out[38]:






  
    
      
      < 0 >
      < 3 2 1 0 >
      < 0 1 2 3 4 3 2 1 0 >
      < 0 >
      < 3 2 1 2 1 0 >
      < 0 1 2 3 4 5 4 3 2 1 >
    
  
  
    
      < 0 >
      1.0
      0.000000
      0.000000
      1.0
      0.000000
      0.000000
    
    
      < 3 2 1 0 >
      0.0
      1.000000
      0.545455
      0.0
      0.750000
      0.500000
    
    
      < 0 1 2 3 4 3 2 1 0 >
      0.0
      0.545455
      1.000000
      0.0
      0.461538
      0.941176
    
    
      < 0 >
      1.0
      0.000000
      0.000000
      1.0
      0.000000
      0.000000
    
    
      < 3 2 1 2 1 0 >
      0.0
      0.750000
      0.461538
      0.0
      1.000000
      0.428571
    
    
      < 0 1 2 3 4 5 4 3 2 1 >
      0.0
      0.500000
      0.941176
      0.0
      0.428571
      1.000000



In [39]:

    
s2.plot_similarity_map('AGP')









    





<matplotlib.figure.Figure at 0x11ce514a8>



In [40]:

    
s2.plot_adjacent_similarity('AGP')









    Out[40]:





<matplotlib.axes._subplots.AxesSubplot at 0x11cf80748>

	OSC	AGP
count	370.000000	370.000000
mean	0.690155	0.645402
std	0.148880	0.090429
min	0.225145	0.308449
25%	0.591566	0.586671
50%	0.679591	0.642998
75%	0.797152	0.704137
max	1.000000	0.910707

	OSC	AGP
count	370.000000	370.000000
mean	0.553334	0.615559
std	0.131005	0.068001
min	0.056036	0.248631
25%	0.458412	0.578458
50%	0.538479	0.620698
75%	0.628838	0.655401
max	0.903703	0.833333

	OSC	AGP
233	0.954414	0.910707
350	0.944823	0.910707

	OSC	AGP	EMB
((1, 0, 1), (1, 0, 2))	0.0	1.0	0.625000
((1, 0, 1), (2, 0, 1))	0.0	1.0	0.625000
((1, 0, 2), (2, 0, 1))	0.0	1.0	0.750000
((1, 2, 0), (0, 1, 0))	0.0	1.0	0.625000
((1, 2, 0), (0, 2, 1))	0.0	1.0	0.750000
((0, 1, 0), (0, 2, 1))	0.0	1.0	0.625000
((0, 1, 0, 1, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.673077
((0, 1, 0, 2, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.634615
((0, 1, 0, 2, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.653846
((0, 2, 0, 1, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.634615
((0, 2, 0, 2, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.653846
((0, 2, 0, 3, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.615385
((0, 2, 1, 2, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.750000
((0, 2, 1, 3, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.673077
((0, 3, 1, 3, 2), (0, 2, 1, 2, 0))	0.0	1.0	0.692308
((0, 3, 1, 4, 2), (0, 2, 1, 2, 0))	0.0	1.0	0.557692
((1, 0, 1, 0, 1), (2, 0, 1, 0, 2))	0.0	1.0	0.673077
((1, 0, 1, 0, 2), (2, 0, 1, 0, 2))	0.0	1.0	0.750000
((1, 0, 2, 0, 2), (2, 0, 1, 0, 2))	0.0	1.0	0.653846
((0, 2, 1, 4, 3), (0, 2, 1, 2, 0))	0.0	1.0	0.538462
((0, 3, 0, 2, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.596154
((0, 3, 1, 2, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.788462
((0, 3, 1, 2, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.673077
((1, 2, 0, 2, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.653846
((1, 2, 0, 2, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.730769
((1, 2, 0, 3, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.596154
((1, 2, 0, 3, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.692308
((1, 2, 0, 3, 2), (0, 2, 1, 2, 0))	0.0	1.0	0.673077
((1, 2, 1, 2, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.750000
((1, 2, 1, 3, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.673077
...	...	...	...
((2, 0, 1, 0, 2), (1, 0, 2, 0, 3))	0.0	1.0	0.692308
((2, 0, 1, 0, 2), (2, 0, 2, 1, 3))	0.0	1.0	0.673077
((2, 0, 1, 0, 2), (3, 0, 2, 0, 1))	0.0	1.0	0.692308
((2, 0, 1, 0, 2), (3, 1, 4, 0, 2))	0.0	1.0	0.519231
((2, 0, 1, 0, 2), (3, 2, 3, 0, 1))	0.0	1.0	0.596154
((2, 0, 1, 0, 2), (1, 0, 3, 0, 2))	0.0	1.0	0.615385
((2, 0, 1, 0, 2), (3, 2, 4, 0, 1))	0.0	1.0	0.519231
((2, 0, 1, 0, 2), (1, 0, 3, 2, 4))	0.0	1.0	0.538462
((2, 0, 1, 0, 2), (2, 1, 4, 0, 3))	0.0	1.0	0.519231
((1, 3, 0, 2, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.692308
((1, 3, 0, 4, 2), (0, 2, 1, 2, 0))	0.0	1.0	0.519231
((1, 3, 0, 2, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.615385
((1, 3, 1, 2, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.673077
((2, 3, 0, 2, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.673077
((2, 3, 1, 2, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.730769
((2, 3, 1, 3, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.692308
((2, 3, 1, 4, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.557692
((2, 4, 0, 3, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.519231
((2, 4, 1, 3, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.557692
((0, 1, 0, 3, 2), (0, 2, 1, 2, 0))	0.0	1.0	0.596154
((3, 4, 1, 2, 0), (0, 2, 1, 2, 0))	0.0	1.0	0.538462
((0, 3, 2, 3, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.788462
((3, 4, 0, 2, 1), (0, 2, 1, 2, 0))	0.0	1.0	0.519231
((0, 2, 1, 2, 0), (0, 2, 1, 3, 0))	0.0	1.0	0.788462
((0, 2, 1, 2, 0), (0, 4, 1, 3, 2))	0.0	1.0	0.557692
((0, 2, 1, 2, 0), (2, 3, 0, 1, 0))	0.0	1.0	0.596154
((0, 2, 1, 2, 0), (0, 2, 1, 3, 2))	0.0	1.0	0.730769
((0, 2, 1, 2, 0), (1, 3, 0, 3, 2))	0.0	1.0	0.615385
((0, 2, 1, 2, 0), (2, 3, 0, 4, 1))	0.0	1.0	0.519231
((0, 2, 1, 2, 0), (1, 2, 0, 4, 3))	0.0	1.0	0.519231

	OSC	AGP	EMB
((2, 1, 0), (0, 1, 2, 3))	1.0	0.0	0.0
((2, 1, 0), (0, 1, 2, 3, 4))	1.0	0.0	0.0
((0, 1, 2, 3), (1, 0))	1.0	0.0	0.0
((0, 1, 2, 3), (3, 2, 1, 0))	1.0	0.0	0.0
((0, 1, 2, 3), (4, 3, 2, 1, 0))	1.0	0.0	0.0
((1, 0), (0, 1, 2, 3, 4))	1.0	0.0	0.0
((0, 1, 2), (3, 2, 1, 0))	1.0	0.0	0.0
((0, 1, 2), (4, 3, 2, 1, 0))	1.0	0.0	0.0
((3, 2, 1, 0), (0, 1, 2, 3, 4))	1.0	0.0	0.0
((3, 2, 1, 0), (0, 1))	1.0	0.0	0.0
((0, 1, 2, 3, 4), (4, 3, 2, 1, 0))	1.0	0.0	0.0
((4, 3, 2, 1, 0), (0, 1))	1.0	0.0	0.0

	OSC	AGP	EMB
count	56280.000000	56280.000000	56280.000000
mean	0.987313	0.693195	0.574718
std	0.111919	0.152539	0.121426
min	0.000000	0.000000	0.000000
25%	1.000000	0.571429	0.500000
50%	1.000000	0.750000	0.576923
75%	1.000000	0.750000	0.653846
max	1.000000	1.000000	0.972973

	OSC	AGP	EMB
OSC	1.000000	-0.030572	-0.011572
AGP	-0.030572	1.000000	0.414281
EMB	-0.011572	0.414281	1.000000

	< 1 2 3 2 1 0 >	< 2 3 4 2 1 0 >	< 0 1 2 1 0 >	< 0 1 2 1 0 >	< 1 2 3 2 1 0 >	< 2 3 4 3 2 1 0 >
< 1 2 3 2 1 0 >	1.000000	0.990070	0.929396	0.929396	1.000000	0.987036
< 2 3 4 2 1 0 >	0.990070	1.000000	0.868283	0.868283	0.990070	0.954672
< 0 1 2 1 0 >	0.929396	0.868283	1.000000	1.000000	0.929396	0.976585
< 0 1 2 1 0 >	0.929396	0.868283	1.000000	1.000000	0.929396	0.976585
< 1 2 3 2 1 0 >	1.000000	0.990070	0.929396	0.929396	1.000000	0.987036
< 2 3 4 3 2 1 0 >	0.987036	0.954672	0.976585	0.976585	0.987036	1.000000

	< 3 2 1 0 >	< 0 1 2 3 4 3 2 1 0 >	< 3 2 1 2 1 0 >	< 0 1 2 3 4 5 4 3 2 1 >
< 0 >	0.000000	0.000000	0.000000	0.000000
< 3 2 1 0 >	1.000000	0.716789	0.605203	0.707940
< 0 1 2 3 4 3 2 1 0 >	0.716789	1.000000	0.139881	0.990820
< 0 >	0.000000	0.000000	0.000000	0.000000
< 3 2 1 2 1 0 >	0.605203	0.139881	1.000000	0.216541
< 0 1 2 3 4 5 4 3 2 1 >	0.707940	0.990820	0.216541	1.000000