In [1]:
from pylab import *
from bregman.suite import *
import scipy.signal as signal
import pdb
In [2]:
%matplotlib inline
ideal_chromatic_dissonance
and has 2 possible parameters: num_harmonics
and f0
. We set these parameters to 7
and 440
as default values. Whenever we "call" this function, it returns an array of chromatic dissonance values based on the parameters we have sent it for the number of harmonics and the fundamental frequency.
In [3]:
def ideal_chromatic_dissonance(num_harmonics=7, f0=440):
"""
One octave of chromatic dissonance values
"""
harms = arange(num_harmonics)+1
freqs = [f0*i for i in harms]
amps = [exp(-.5*i) for i in harms]
freqs2 = array([[f0*2**(k/12.)*i for i in harms] for k in range(0,13)])
all_amps = r_[amps,amps]
diss = []
for f in freqs2:
all_freqs = r_[freqs,f]
idx = all_freqs.argsort()
diss.append(dissonance_fun(all_freqs[idx], all_amps[idx]))
return array(diss)
F
, representing the features from a signal. Stored in F.X
is the Short-Time-Fourier-Transform. By finding peaks in our frequency transform, we can better understand where most of the energy of a signal is located in terms of its frequencies, rather than working with all frequency values. We'll need this function when calculating the dissonance.
In [4]:
def get_peaks(F):
"""
Extract peaks from linear spectrum in F
Algorithm 1: zero-crossings of derivative of smoothed spectrum
"""
X = F.X.copy()
b,a = signal.butter(10, .25) # lp filter coefficients
# Smoothing
signal.filtfilt(b,a,X,axis=0)
# Derivative
Xd = diff(X,axis=0)
# Zero crossing
thresh=1e-9
peak_idx = []
for i,x in enumerate(Xd.T):
idx = where((x[:-1]>thresh)&(x[1:]<-thresh))[0] + 1
if len(idx):
idx = idx[X[idx,i].argsort()][::-1]
peak_idx.append(idx)
return peak_idx
In [5]:
def audio_chromatic_scale(f0=440, num_harmonics=7):
N = 11025
nH = num_harmonics
H = vstack([harmonics(f0=f0*2**(k/12.),num_harmonics=nH, num_points=N) for k in arange(13)])
return H
In [6]:
def audio_chromatic_dissonance(f0=440, num_harmonics=7, num_peaks=10):
sr = 44100
nfft = 8192
afreq = sr/nfft
H = audio_chromatic_scale(f0=f0, num_harmonics=num_harmonics)
h0 = H[0]
diss = []
for i,h in enumerate(H):
F = LinearFrequencySpectrum((h0+h)/2.,nfft=nfft,wfft=nfft/2,nhop=nfft/4)
P = get_peaks(F)
frame = []
for j,p in enumerate(P):
freqs = afreq*p[:num_peaks] # take middle frame as reference
mags = F.X[p[:num_peaks],j]
idx = freqs.argsort()
frame.append(dissonance_fun(freqs[idx],mags[idx]))
diss.append(array(frame).mean())
return array(diss)
In [7]:
def dissonance_plot(f0=440, num_harmonics=7, num_peaks=10):
figure()
diss_i = ideal_chromatic_dissonance(f0=f0, num_harmonics=num_harmonics)
diss = audio_chromatic_dissonance(f0=f0, num_harmonics=num_harmonics, num_peaks=num_peaks)
plot(diss_i / diss_i.max(), linestyle='--', linewidth=2)
plot(diss / diss.max())
t_str = 'f0=%d, partials=%d, peaks=%d'%(f0,num_harmonics,num_peaks)
title('Dissonance (chromatic): '+t_str,fontsize=16)
legend(['ideal','estimated'])
xlabel('Pitch class (chroma)',fontsize=14)
ylabel('Dissonance',fontsize=14)
grid()
In [8]:
dissonance_plot(f0=440, num_harmonics=7, num_peaks=10)
f0
, num_harmonics
, and num_peaks
to see how they effect the resulting plot. What are each of these values doing?
In [9]:
notes = audio_chromatic_scale(f0 = 440, num_harmonics = 7)
print notes.shape
hstack
function to combine all notes into one long sequence, and the play function to play the entire sequence:
In [10]:
play(hstack(notes))
In [11]:
plt.plot(hstack(notes))
Out[11]:
notes
array is 13 x 11025. To access the first note, we can say: notes[0]
. We index the first note with 0 and the last one with 12. The dimension of each note is 11025 samples. Let's try visualizing just the first note and the first 400 samples of that note:
In [12]:
plt.plot(notes[0][:400])
Out[12]:
In [13]:
F = 440
N = 1
notes = audio_chromatic_scale(f0 = F, num_harmonics = N)
plt.plot(notes[0][:400])
play(hstack(notes))
dissonance_plot(f0 = F, num_harmonics = N, num_peaks = 2)
In [14]:
# Let's try a fundamental frequency of 440
F = 440
# With 7 harmonics
H = 7
# sr, or sample rate, describes how many samples we'd like in a single second of audio data
sr = 44100
# this is how many samples we'll use when calculating the FFT
nfft = 8192
# Let's get the signal
H = audio_chromatic_scale(f0 = F, num_harmonics = H)
# The first note is stored in H[0]
h0 = H[0]
In [15]:
F = LinearFrequencySpectrum(h0,nfft=nfft,wfft=nfft/2,nhop=nfft/4)
In [16]:
print F.X.shape
In [17]:
stfts = F.X[:100,:]
In [18]:
plt.plot(np.arange(stfts.shape[0]) * sr / nfft, stfts)
Out[18]:
In [19]:
P = get_peaks(F)
In [20]:
P[0] * sr / nfft
Out[20]:
In [20]: