In [1]:
# Load external jupyter stuff
%load_ext autoreload
%load_ext snakeviz
In [2]:
# jupyter settings
%autoreload 2
%matplotlib inline
%autocall 1
# imports
import numpy
import scipy
import pandas
import sklearn
import sklearn.cluster
import os
import glob
import json
from matplotlib import pyplot as plt
import itertools
import collections
import music21
import Levenshtein
from django.core.wsgi import get_wsgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "contourMetrics.settings.production")
application = get_wsgi_application()
import lib.contour
import cProfile
from django.db.models import Q
from lib.contour import ContourPoint as CP
from lib.contour import Contour as C
import lib.generator
import lib.utils
import lib.analysis
import lib
from apps.calculator import models
In [3]:
# Auxiliary function
def log_progress(sequence, every=None, size=None):
# https://github.com/alexanderkuk/log-progress
from ipywidgets import IntProgress, HTML, VBox
from IPython.display import display
is_iterator = False
if size is None:
try:
size = len(sequence)
except TypeError:
is_iterator = True
if size is not None:
if every is None:
if size <= 200:
every = 1
else:
every = int(size / 200) # every 0.5%
else:
assert every is not None, 'sequence is iterator, set every'
if is_iterator:
progress = IntProgress(min=0, max=1, value=1)
progress.bar_style = 'info'
else:
progress = IntProgress(min=0, max=size, value=0)
label = HTML()
box = VBox(children=[label, progress])
display(box)
index = 0
try:
for index, record in enumerate(sequence, 1):
if index == 1 or index % every == 0:
if is_iterator:
label.value = '{index} / ?'.format(index=index)
else:
progress.value = index
label.value = u'{index} / {size}'.format(
index=index,
size=size
)
yield record
except:
progress.bar_style = 'danger'
raise
else:
progress.bar_style = 'success'
progress.value = index
label.value = str(index or '?')
class PieceAux(object):
def __init__(self, piece_model, voice=0):
self.contours = [p.contour.normalized for p in piece_model.phrase_set.filter(voice=voice)]
self.piece_id = piece_model.id
self.piece_name = piece_model.name
self.comparison = lib.analysis.GeneralComparison([lib.contour.Contour(c) for c in self.contours])
self.algorithms = ['OSC', 'AGP']
self.similarity_series = {algorithm: self.comparison.similarity_series(algorithm) for algorithm in self.algorithms}
self._similarity_maps = {}
def __repr__(self):
return '<P {}>'.format(self.piece_name)
def get_mean(self):
return numpy.array(list(map(lambda a: self.similarity_series[a].mean(), self.algorithms)))
def get_similarity_map(self, algorithm='OSC'):
if algorithm not in self._similarity_maps:
self._similarity_maps[algorithm] = lib.utils.ExtendedDataFrame(self.comparison.similarity_map(algorithm))
return self._similarity_maps[algorithm]
def plot_similarity_map(self, algorithm='OSC'):
edf = self.get_similarity_map(algorithm)
return edf.heat_plot()
def get_adjacent_similarity(self, algorithm='OSC'):
df = self.get_similarity_map(algorithm)
arr = numpy.array(df).diagonal(1)
return pandas.Series(arr, index=df.index[1:])
def plot_adjacent_similarity(self, algorithm='OSC'):
s = self.get_adjacent_similarity(algorithm)
return s.plot(grid=True, title='Similarity with previous contour P.{} ({})'.format(self.piece_name, algorithm))
def make_similarity_df(phrases):
# Get similarity means by piece
s = []
i = []
for p in phrases:
s.append(p.get_mean())
i.append(p.piece_name)
return pandas.DataFrame(s, index=i, columns=['OSC', 'AGP'])
In [4]:
# Only 2-6 contours
csegs = [lib.contour.Contour(c['normalized']) for c in models.Contour.objects.filter(size__gt=1).filter(size__lt=6).values()]
gc = lib.analysis.GeneralComparison(csegs)
In [5]:
algorithms = ['OSC', 'AGP', 'EMB']
ind = list(map(lambda x: (tuple(x[0].sequence), tuple(x[1].sequence)), itertools.combinations(csegs, 2)))
seq = [gc.similarity_series(a) for a in log_progress(algorithms)]
In [6]:
gc_df = pandas.DataFrame(seq).T
gc_df.columns = algorithms
gc_df.index=ind
In [7]:
# OSC < 0.1 AND AGP > 0.9
gc_df[(gc_df.OSC<0.1)&(gc_df.AGP>0.9)]
Out[7]:
In [8]:
# OSC > 0.9 AND OSC < 0.1
gc_df[(gc_df.AGP<0.1)&(gc_df.OSC>0.9)]
Out[8]:
In [9]:
gc_df.describe()
Out[9]:
In [10]:
gc_df.boxplot()
Out[10]:
In [11]:
gc_df.hist()
Out[11]:
In [12]:
gc_df.corr()
Out[12]:
In [13]:
gc_df.plot(kind='scatter', x='OSC', y='AGP')
Out[13]:
In [14]:
gc_df.plot(kind='scatter', x='OSC', y='EMB')
Out[14]:
In [15]:
gc_df.plot(kind='scatter', x='EMB', y='AGP')
Out[15]:
In [16]:
chorales_pieces = models.Piece.objects.filter(collection__name='Chorales')
soprano_phrases = [PieceAux(p, 0) for p in log_progress(chorales_pieces)]
bass_phrases = [PieceAux(p, 3) for p in log_progress(chorales_pieces)]
In [17]:
s_df = make_similarity_df(soprano_phrases)
b_df = make_similarity_df(bass_phrases)
In [18]:
s_df.describe()
Out[18]:
In [19]:
b_df.describe()
Out[19]:
In [20]:
s_df.boxplot()
Out[20]:
In [21]:
b_df.boxplot()
Out[21]:
In [22]:
s_df.hist(), b_df.hist()
Out[22]:
In [23]:
s_df.corr().OSC.AGP, b_df.corr().OSC.AGP
Out[23]:
In [24]:
s_df.plot(kind='scatter', x='OSC', y='AGP', grid=True), b_df.plot(kind='scatter', x='OSC', y='AGP', grid=True)
Out[24]:
In [25]:
v = 0.9
s_df[(s_df.OSC > v)&(s_df.AGP > v)]
Out[25]:
In [26]:
v = 0.4
s_df[(s_df.OSC < v)&(s_df.AGP < v)]
Out[26]:
In [27]:
s1 = [s for s in soprano_phrases if s.piece_name=='233'][0]
s1.contours
Out[27]:
In [28]:
s1.get_similarity_map('OSC')
Out[28]:
In [29]:
s1.plot_similarity_map('OSC')
In [30]:
s1.plot_adjacent_similarity('OSC')
Out[30]:
In [31]:
# AGP internal similarities
s1.get_similarity_map('AGP')
Out[31]:
In [32]:
s1.plot_similarity_map('AGP')
In [33]:
s1.plot_adjacent_similarity('AGP')
Out[33]:
In [34]:
s2 = [s for s in soprano_phrases if s.piece_name=='124'][0]
s2.contours
Out[34]:
In [35]:
# OSC internal similarities
s2.get_similarity_map('OSC')
Out[35]:
In [36]:
s2.plot_similarity_map('OSC')
In [37]:
s2.plot_adjacent_similarity('OSC')
Out[37]:
In [38]:
# AGP internal similarities
s2.get_similarity_map('AGP')
Out[38]:
In [39]:
s2.plot_similarity_map('AGP')
In [40]:
s2.plot_adjacent_similarity('AGP')
Out[40]: