In [84]:
%pylab inline
from scipy import *
import aifc
sound_file = "/Users/sean/Desktop/sound_files/8.aif" # C4+A4_PopOrgan F3_PopOrgan F4_CathedralOrgan A4_PopOrgan
######################### Useful Functions #########################
def notes_freqs(octaves=8):
""" generates a dictionary of notes for 'octaves' number of octaves, with their
corresponding frequency [Hz] """
note_counting = {'A':9, 'B':11, 'C':0, 'D':2, 'E':4, 'F':5, 'G':7}
notes = note_counting.keys()
notes.sort()
all_notes = {}
for octave in range(octaves):
for note in notes:
note_name = note + str(octave)
note_freq = 440.0*2**((note_counting[note]+3+12*(octave - 5))/12.0)
# note that frequencies are centered on A4 = 440 Hz
all_notes[note_name] = note_freq
return all_notes
def smooth(data, window):
""" Takes in a 1-D data array and uses a moving average to smooth it with window size
'window' and returns the smoothed data """
new_data = list(data)[:] # make sure we're making a copy, not a view. Need a copy for book-ends data
space = (window+1)/2
for i in range(space, len(data) - space):
new_data[i] = mean(data[i - space: i + space])
return new_data
######################### Load Data & FFT #########################
# load file and read in data
a = aifc.open(sound_file, 'rb')
RATE = a.getframerate()
data = a.readframes(a.getnframes())
# format data for plotting and analysis
integer_data = fromstring(data, dtype=np.int32).byteswap() # this line took a while to figure out properly
time = arange(size(integer_data)) / float(RATE) # time in seconds
# power spectrum analysis
fft_data = fft(integer_data)
n = len(integer_data)
nUniquePts = ceil((n+1)/2.0)
fft_data = abs(fft_data[:nUniquePts]) / float(n) # just take first half, and take magnitude
power_data = 2*(fft_data**2) # convert to power
freqArray = arange(0, nUniquePts, 1.0) * (RATE / float(n)) # create and noramlize / fit frequency vectory
######################### Peak Finding #########################
thresh = 100*mean(power_data) # peak threshold -- analyze on linear scale
freq_thresh = 2 # threshold in Hz for an identified peak to match a note's frequency
peaks_data = []
p_old = 0
# first, find number of peaks and their footprints
smoothed_data = smooth(power_data, 200)
for i, p in enumerate(smoothed_data): # smooth out peak clusters
if (p_old < thresh) and (p > thresh): # we just entered a new peak cluster
temp = [] # reset temp
temp.append( (freqArray[i], power_data[i], smoothed_data[i]) ) # and record the index and true value
elif p > thresh: # we are inside the peak cluster
temp.append( (freqArray[i], power_data[i], smoothed_data[i]) )
elif (p_old > thresh) and (p < thresh): # we just exited the peak cluster
peaks_data.append(temp) # add the peak cluster
p_old = p
# Next, find the actual positions of the peaks (for better accuracy: fit gaussian)
peaks = []
possible_notes = notes_freqs(20) # generate 20 octaves' worth of notes to consider
for peak in peaks_data:
peak.sort(key = lambda x: x[1]) # sort based on peak value
peak = peak[-1] # pull out the max
for note, freq in possible_notes.items(): # decide if it's a note, and if so, which one
if abs(peak[0] - freq) < freq_thresh:
freq, val, smoothpk = peak
peaks.append( (freq, val, note, smoothpk) )
######################### Plotting #########################
# Plot actual sound waveforms
title = "Plotted File: " + sound_file.split('/')[-1].split('.')[0]
fig, (axt, axf, axpeaks) = plt.subplots(3,1,figsize=(7, 13.5)) # fig size was (7, 9)
axt.plot(time, integer_data, color="red", linestyle="-")
my_title = axt.set_title(title)
axt.set_xlabel("Time [s]")
axt.set_ylabel("Amplitude")
axt.set_xlim(min(time), max(time))
# Plot power spectrum and label peaks
axf.plot(freqArray, 10*log10(power_data), color="blue", linestyle="-")
for peak in peaks: # plot and label peaks
axf.plot(peak[0], 10*log10(peak[1]), 'ro', ms=5)
axf.annotate(peak[2], xy=(peak[0], 10*log10(peak[1])), xycoords='data',
xytext=(-5, 15), textcoords='offset points',size=10)
axf.set_xlabel("Frequency [Hz]")
axf.set_ylabel("Power [dB]")
axf.set_xlim(min(freqArray), 5000)
# Plot smoothed power spectrum on linear axese with threshold, to show peak detection
axpeaks.plot(freqArray, smoothed_data, color="green", linestyle="-")
axpeaks.annotate("Peak detection visualization", xy=(.3, .7), xycoords='axes fraction', fontsize=14)
for peak in peaks: # plot and label peaks
axpeaks.annotate(peak[2], xy=(peak[0], peak[3]), xycoords='data',
xytext=(-5, 3), textcoords='offset points',size=10)
axpeaks.set_xlabel("Frequency [Hz]")
axpeaks.hlines(thresh, min(freqArray), max(freqArray))
axpeaks.set_ylabel("Power [Arbitrary Linear]")
axpeaks.set_xlim(min(freqArray), 2*peaks[-1][0])
######################### Output #########################
print("\nFor the file '{0:s}', the following possible notes were detected:".format(sound_file.split('/')[-1].split('.')[0]))
for peak in peaks:
print("{0:s}, at {1:.2f} Hz ({2:.2f} Hz is exact frequency for a {0:s})".format(peak[2], peak[0], possible_notes[peak[2]]))
In [83]: