In [84]:
%pylab inline
from scipy import *
import aifc

sound_file = "/Users/sean/Desktop/sound_files/8.aif" # C4+A4_PopOrgan F3_PopOrgan F4_CathedralOrgan A4_PopOrgan

######################### Useful Functions #########################
def notes_freqs(octaves=8):
    """ generates a dictionary of notes for 'octaves' number of octaves, with their 
        corresponding frequency [Hz] """
    note_counting = {'A':9, 'B':11, 'C':0, 'D':2, 'E':4, 'F':5, 'G':7}
    notes = note_counting.keys()
    notes.sort()
    all_notes = {}
    for octave in range(octaves):
        for note in notes:
            note_name = note + str(octave)
            note_freq = 440.0*2**((note_counting[note]+3+12*(octave - 5))/12.0)
            # note that frequencies are centered on A4 = 440 Hz
            all_notes[note_name] = note_freq
    return all_notes

def smooth(data, window):
    """ Takes in a 1-D data array and uses a moving average to smooth it with window size 
        'window' and returns the smoothed data """
    new_data = list(data)[:] # make sure we're making a copy, not a view.  Need a copy for book-ends data
    space = (window+1)/2
    for i in range(space, len(data) - space):
        new_data[i] = mean(data[i - space: i + space])
    return new_data

######################### Load Data & FFT #########################
# load file and read in data
a = aifc.open(sound_file, 'rb')
RATE = a.getframerate()
data = a.readframes(a.getnframes())

# format data for plotting and analysis
integer_data = fromstring(data, dtype=np.int32).byteswap() # this line took a while to figure out properly
time = arange(size(integer_data)) / float(RATE) # time in seconds

# power spectrum analysis
fft_data = fft(integer_data)
n = len(integer_data)
nUniquePts = ceil((n+1)/2.0)
fft_data = abs(fft_data[:nUniquePts]) / float(n) # just take first half, and take magnitude
power_data = 2*(fft_data**2) # convert to power
freqArray = arange(0, nUniquePts, 1.0) * (RATE / float(n)) # create and noramlize / fit frequency vectory

######################### Peak Finding #########################
thresh = 100*mean(power_data) # peak threshold -- analyze on linear scale
freq_thresh = 2 # threshold in Hz for an identified peak to match a note's frequency
peaks_data = []
p_old = 0

# first, find number of peaks and their footprints
smoothed_data = smooth(power_data, 200)
for i, p in enumerate(smoothed_data): # smooth out peak clusters
    if (p_old < thresh) and (p > thresh): # we just entered a new peak cluster
        temp = [] # reset temp
        temp.append( (freqArray[i], power_data[i], smoothed_data[i]) ) # and record the index and true value
    elif p > thresh: # we are inside the peak cluster
        temp.append( (freqArray[i], power_data[i], smoothed_data[i]) )
    elif (p_old > thresh) and (p < thresh): # we just exited the peak cluster
        peaks_data.append(temp) # add the peak cluster
    p_old = p

# Next, find the actual positions of the peaks (for better accuracy: fit gaussian)
peaks = []
possible_notes = notes_freqs(20) # generate 20 octaves' worth of notes to consider
for peak in peaks_data:
    peak.sort(key = lambda x: x[1]) # sort based on peak value
    peak = peak[-1] # pull out the max
    for note, freq in possible_notes.items(): # decide if it's a note, and if so, which one
        if abs(peak[0] - freq) < freq_thresh:
            freq, val, smoothpk = peak
            peaks.append( (freq, val, note, smoothpk) )
            
######################### Plotting #########################
# Plot actual sound waveforms
title = "Plotted File: " + sound_file.split('/')[-1].split('.')[0]
fig, (axt, axf, axpeaks) = plt.subplots(3,1,figsize=(7, 13.5)) # fig size was (7, 9)
axt.plot(time, integer_data, color="red", linestyle="-")
my_title = axt.set_title(title)
axt.set_xlabel("Time [s]")
axt.set_ylabel("Amplitude")
axt.set_xlim(min(time), max(time))

# Plot power spectrum and label peaks
axf.plot(freqArray, 10*log10(power_data), color="blue", linestyle="-")
for peak in peaks: # plot and label peaks
    axf.plot(peak[0], 10*log10(peak[1]), 'ro', ms=5)
    axf.annotate(peak[2], xy=(peak[0], 10*log10(peak[1])),  xycoords='data',
                 xytext=(-5, 15), textcoords='offset points',size=10)
axf.set_xlabel("Frequency [Hz]")
axf.set_ylabel("Power [dB]")
axf.set_xlim(min(freqArray), 5000)

# Plot smoothed power spectrum on linear axese with threshold, to show peak detection
axpeaks.plot(freqArray, smoothed_data, color="green", linestyle="-")
axpeaks.annotate("Peak detection visualization", xy=(.3, .7),  xycoords='axes fraction', fontsize=14)
for peak in peaks: # plot and label peaks
    axpeaks.annotate(peak[2], xy=(peak[0], peak[3]),  xycoords='data',
                 xytext=(-5, 3), textcoords='offset points',size=10)
axpeaks.set_xlabel("Frequency [Hz]")
axpeaks.hlines(thresh, min(freqArray), max(freqArray))
axpeaks.set_ylabel("Power [Arbitrary Linear]")
axpeaks.set_xlim(min(freqArray), 2*peaks[-1][0])

######################### Output #########################
print("\nFor the file '{0:s}', the following possible notes were detected:".format(sound_file.split('/')[-1].split('.')[0]))
for peak in peaks:
    print("{0:s}, at {1:.2f} Hz ({2:.2f} Hz is exact frequency for a {0:s})".format(peak[2], peak[0], possible_notes[peak[2]]))


Populating the interactive namespace from numpy and matplotlib

For the file '8', the following possible notes were detected:
F4, at 348.50 Hz (349.23 Hz is exact frequency for a F4)
F5, at 699.44 Hz (698.46 Hz is exact frequency for a F5)
C6, at 1047.44 Hz (1046.50 Hz is exact frequency for a C6)
WARNING: pylab import has clobbered these variables: ['power', 'show_config', 'log2', 'arccos', 'arctanh', 'title', 'fft', 'sqrt', '__version__', 'test', 'log', 'log10', 'arcsin']
`%pylab --no-import-all` prevents importing * from pylab and numpy

In [83]: