In [1]:

    
import sys
sys.path.insert(2, '../src/')
# project imports
import read_audio, fingerprinting, match
from scipy.io import wavfile

Are two files the same?



In [3]:

    
fingerprinting = reload(fingerprinting)



In [5]:

    
mono1 = read_audio.get_mono('../data/Sor1929.wav')
mono2 = read_audio.get_mono('../data/Sor1929.wav')



In [44]:

    
fingerprints = [fingerprinting.get_fingerprints(m) for m in (mono1, mono2)]



In [11]:

    
fingerprints[0] == fingerprints[1]









    Out[11]:





True

testing how different stuff can be



In [96]:

    
def jaccard(s1, s2):
    return float(len(s1.intersection(s2))) / len(s1.union(s2))



In [118]:

    
def get_difference(f1, f2):
    mono1, mono2 = match.get_mono(f1), match.get_mono(f2)
    fingerprints = [fingerprinting.get_fingerprints(m, plot=[True, True]) for m in (mono1, mono2)]
    fingerprints = map(set, fingerprints)
#    return fingerprints[0] - fingerprints[1]
    return jaccard(fingerprints[0], fingerprints[1])



In [119]:

    
get_difference('../data/z03.wav', '../data/z04.wav')









    












    












    












    












    Out[119]:





0.5166908563134979

Testing updated matching



In [34]:

    
import match
import datastore
import fingerprinting



In [35]:

    
%load_ext autoreload
%autoreload 2









    



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload



In [37]:

    
os.listdir('../data/A4new')









    Out[37]:





['curieuse.mp3',
 'curieuse.wav',
 'curieuse2.wav',
 'janacek.mp3',
 'janacek.wav',
 'janacek2.wav',
 'JanacekSinfoniettaPart1.mp3',
 'maynard.mp3',
 'maynard.wav',
 'maynard2.wav',
 'MMW.mp3',
 'Piste1.mp3',
 'README',
 'rimsky.mp3',
 'rimsky.wav',
 'rimsky2.wav',
 'RussianEasterOverture.mp3',
 'sons.mp3',
 'sons.wav',
 'sons2.wav',
 'WhoopeeTiYiYo.mp3']



In [15]:

    
# fingerprint all the files
for f in os.listdir('../data/'):
    
    if f.split('.')[1] == 'mp3':
        continue
    if 'Sor' not in f and 'bad' not in f:
        continue
    if '4959' in f or '1929' in f:
        continue
    datastore.add_fingerprints(os.path.join('../data/', f))



In [12]:

    
def get_match(fpath):
    samples = read_audio.get_mono(fpath)
    hashes = fingerprinting.get_fingerprints(samples)
    song_id = match.get_matches(hashes)
    return datastore.get_song_file_from_id(song_id)



In [63]:

    
from collections import defaultdict

def is_match(f1, f2):
    """Returns True if f1 matches to f2.
    """
    mono1, mono2 = read_audio.get_mono(f1), read_audio.get_mono(f2)
    fingerprints = [fingerprinting.get_fingerprints(m) for m in (mono1, mono2)]

    # {offset_diff -> {fingerprint -> count}}
    match_counter = defaultdict(int)
    max_count = 0
    for print1, offset1 in fingerprints[0]:
        for print2, offset2 in fingerprints[1]:
            if print1 == print2:
                offset_diff = abs(offset1 - offset2)
                match_counter[offset_diff] += 1
                count = match_counter[offset_diff]
                if count > max_count:
                    max_count = count
    if max_count > 5:
        return True



In [62]:

    
is_match('../data/A4new/curieuse.wav', '../data/A4new/curieuse.wav')









    



# of prints in file 1:  72
# of prints in file 2:  72






    Out[62]:





True



In [96]:

    
import subprocess
from os.path import basename

def resample_wav(fpath):
    """Resamples the given wav to 44100hz. using LAME.
    Really bad, though. 
    First converts to mp3 while resampling, then converts back to WAV. 
    TODO: make this faster.
    """
    FNULL = open(os.devnull, 'w')
    # add 44-1 to end of the resampled filepath
    resampled_mp3 = '/tmp/44-1_' + basename(fpath) + '.mp3'
    # resample, convert to mp3.
    args = ['-V2', '--silent', '--abr', '24', '--vbr-new', '--resample', '44.1', fpath, resampled_mp3]
    res = subprocess.call(LAME_CMD + args,
                          stdout=FNULL,
                          stderr=subprocess.STDOUT)
            
    # now we have a 44.1kHz mp3 at resampled_file_path. convert to wav:
    resamp_fpath = read_audio.create_temp_wav_file(resampled_file_path)
    os.remove(resampled_file_path)

    if res != 0:
        raise Exception("Call to lame failed. It's either not installed or it failed.")
    #return resampled_file_path
    return resamp_fpath



In [97]:

    
resample('../data/A4new/rimsky.wav')









    Out[97]:





'/tmp/44-1_rimsky.wav'



In [98]:

    
wavfile.read('/tmp/44-1_rimsky.wav')









    Out[98]:





(44100, array([[   0,    0],
        [   0,    0],
        [   0,    0],
        ..., 
        [-428, -344],
        [-374, -384],
        [-255, -328]], dtype=int16))



In [101]:

    
LAME_CMD = ['/usr/bin/env', 'lame']



In [119]:

    
import resampling



In [ ]:

    
resampling.resample_mp3



In [117]:

    
def resample_mp3(fpath):
    tmp_wav = read_audio.create_temp_wav_file(fpath)

    return resample_wav(tmp_wav)



In [124]:

    
samplerate, channels = wavfile.read('/tmp/44-1_rimsky.wav')



In [125]:

    
samplerate









    Out[125]:





44100



In [128]:

    
len(channels) / samplerate









    Out[128]:





20