In [2]:
import numpy as np
import matplotlib.pyplot as plt
import scikits.audiolab as audiolab
import os
import os.path as path
import fnmatch
import itertools
from subprocess import call
from scipy.io import wavfile
from collections import defaultdict
from sklearn.cross_validation import train_test_split
from sklearn.svm import SVC
from collections import namedtuple
import sklearn.metrics as metrics
In [3]:
import spectral
from textgrid import TextGrid
In [4]:
def rglob(rootdir, pattern):
for root, _, files in os.walk(rootdir):
for basename in files:
if fnmatch.fnmatch(basename, pattern):
yield path.join(root, basename)
In [5]:
def play(sig, fs, start_sec, end_sec):
audiolab.play(sig[start_sec * fs: end_sec * fs], fs)
In [6]:
bluemonkeydir = '/home/mwv/data/monkey_sounds/Blue_monkeys/'
In [6]:
# sox the wav files
for wav in itertools.chain(rglob(bluemonkeydir, '*.wav'), rglob(bluemonkeydir, '*.WAV')):
call(['sox',
wav,
path.join(bluemonkeydir,
path.splitext(path.basename(wav))[0] + '_soxed.wav')])
In [7]:
# read textgrids
annot = defaultdict(list) # dict from filename to list of intervals
for textgridfile in rglob(path.join(bluemonkeydir, 'textgrids'), '*.TextGrid'):
key = path.splitext(path.basename(textgridfile))[0]
try:
tg = TextGrid.read(textgridfile)
except Exception as e:
print key
raise e
tier = tg.tiers[0]
for interval in tier:
if interval.mark.strip() != '':
annot[key].append((interval.mark.strip(), interval.start-tier.start))
In [9]:
labelset = sorted(list(set(x[0] for f in annot.itervalues() for x in f)))
label2idx = dict(zip(labelset, range(len(labelset))))
idx2label = dict(zip(range(len(labelset)), labelset))
nsamples = sum(map(len, annot.values()))
print 'number of samples:', nsamples
print 'unique labels:', labelset
nsamples_per_label = defaultdict(int)
for vals in annot.itervalues():
for mark, _ in vals:
nsamples_per_label[mark] += 1
print nsamples_per_label
In [11]:
# load one wave file to get samplerate
wav = rglob(bluemonkeydir, '*.wav').next()
fs, sound = wavfile.read(wav)
print 'sampling rate:', fs
print 'nchannels:', len(sound.shape)
In [12]:
nfilt = 40
wlen = 0.025
frate = 100
encoder = spectral.Spectral(nfilt=nfilt, fs=fs, wlen=wlen, frate=frate, compression='log', nfft=1024,
do_dct=False, do_deltas=False, do_deltasdeltas=False)
NFRAMES = 50 # 500 ms window after call start
In [16]:
# load data
X = np.empty((nsamples, NFRAMES*nfilt), dtype=np.double)
y = np.empty((nsamples,), dtype=np.uint8)
idx = 0
for fname in annot:
wav = path.join(bluemonkeydir, 'audio', fname + '.wav')
if not path.exists(wav):
print 'missing wave file:', fname
continue
_, sig = wavfile.read(wav)
if len(sig.shape) > 1:
sig = (sig[:,0] + sig[:,1]) / 2
spec = encoder.transform(sig)
for mark, start in annot[fname]:
if int(start*frate) + NFRAMES >= spec.shape[0]:
s = spec[int(start*frate): int(start*frate)+NFRAMES]
extra_zeros = np.zeros((int(start*frate)+NFRAMES-spec.shape[0], nfilt), dtype=s.dtype)
x = np.vstack((s, extra_zeros))
X[idx] = np.hstack(x)
else:
X[idx] = np.hstack(spec[int(start*frate):int(start*frate)+NFRAMES])
y[idx] = label2idx[mark]
idx += 1
In [19]:
imshow(np.mean(X.reshape(nsamples, NFRAMES, nfilt)[y==1], axis=0).T[:,:30], interpolation='nearest')
Out[19]:
In [20]:
imshow(np.mean(X.reshape(nsamples, NFRAMES, nfilt)[y==0], axis=0).T[:,:30], interpolation='nearest')
Out[20]:
In [32]:
# split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# and magic
clf = SVC(kernel='linear', C=1)
clf.fit(X_train, y_train)
print clf
In [33]:
print X_train.shape
print X_test.shape
In [34]:
y_pred = clf.predict(X_test)
print metrics.confusion_matrix(y_test, y_pred)
print metrics.classification_report(y_test, y_pred)
In [16]:
titimonkeydir = '/home/mwv/data/monkey_sounds/Titi_monkey_new/'
In [23]:
annot_file = '/home/mwv/data/monkey_sounds/titi_monkeys.csv'
annot = defaultdict(list)
for line in open(annot_file, 'r'):
fname, start, label = line.strip().split(':')
fname = fname.strip()
start = float(start)
label = label.strip()
annot[fname].append((label, start))
In [138]:
labelset = sorted(list(set(x[0] for f in annot.itervalues() for x in f)))
label2idx = dict(zip(labelset, range(len(labelset))))
idx2label = dict(zip(range(len(labelset)), labelset))
nsamples = sum(map(len, annot.values()))
print 'number of samples:', nsamples
print 'unique labels:', labelset
nsamples_per_label = defaultdict(int)
for vals in annot.itervalues():
for mark, _ in vals:
nsamples_per_label[mark] += 1
print nsamples_per_label
In [185]:
# load one wave file to get samplerate
wav = rglob(titimonkeydir, '*_soxed.wav').next()
fs, sound = wavfile.read(wav)
print 'sampling rate:', fs
print 'nchannels:', len(sound.shape)
In [140]:
nfilt = 40
wlen = 0.025
frate = 100
encoder = spectral.Spectral(nfilt=nfilt, fs=fs, wlen=wlen, frate=frate, compression='log', nfft=1024,
do_dct=False, do_deltas=False, do_deltasdeltas=False)
NFRAMES = 50 # 500 ms window after call start
In [141]:
# load data
X = np.empty((nsamples, NFRAMES*nfilt), dtype=np.double)
y = np.empty((nsamples,), dtype=np.uint8)
idx = 0
for fname in annot:
wav = path.join(titimonkeydir, fname + '_soxed.wav')
if not path.exists(wav):
print fname
continue
_, sig = wavfile.read(wav)
if len(sig.shape) > 1:
sig = (sig[:,0] + sig[:,1]) / 2
spec = encoder.transform(sig)
for mark, start in annot[fname]:
if int(start*frate) + NFRAMES >= spec.shape[0]:
s = spec[int(start*frate): int(start*frate)+NFRAMES]
extra_zeros = np.zeros((int(start*frate)+NFRAMES-spec.shape[0], nfilt), dtype=s.dtype)
x = np.vstack((s, extra_zeros))
X[idx] = np.hstack(x)
else:
X[idx] = np.hstack(spec[int(start*frate):int(start*frate)+NFRAMES])
y[idx] = label2idx[mark]
idx += 1
if idx < nsamples:
X = X[:idx]
y = y[:idx]
In [156]:
# split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# and magic
clf = SVC(kernel='linear', C=0.5)
clf.fit(X_train, y_train)
print clf
y_pred = clf.predict(X_test)
print metrics.confusion_matrix(y_test, y_pred)
print metrics.classification_report(y_test, y_pred)
In [120]:
import seaborn
import cPickle as pickle
from scipy.special import expit
from sklearn.metrics import roc_curve, auc, f1_score, precision_score, recall_score
import eval_mad as em
reload(em)
Out[120]:
In [132]:
monkey = 'Blue_monkeys'
pred_lambda = em.get_lambda_est(monkey)
exp_calls = em.get_exp_call_activity(monkey, 0.05, 0.025)
In [133]:
keys = sorted(list(set(exp_calls.keys()).intersection(set(pred_lambda.keys()))))
pred_all = np.hstack((pred_lambda[k] for k in keys))
exp_all = np.hstack((exp_calls[k] for k in keys))
In [121]:
def get_score_at(exp, pred_lambda, threshold):
pred = (pred_lambda > threshold).astype(np.uint8)
return f1_score(exp, pred), precision_score(exp, pred), recall_score(exp, pred)
In [137]:
ths = np.arange(0., 1.0, .05)
scores = np.zeros((ths.shape[0], 3))
for idx, x in enumerate(ths):
scores[idx] = get_score_at(exp_all, pred_all, x)
figure(figsize=(10,10))
plot(ths, scores[:, 0], label='fscore')
plot(ths, scores[:, 1], label='precision')
plot(ths, scores[:, 2], label='recall')
legend(loc='best')
Out[137]:
In [226]:
import scipy.optimize
res = scipy.optimize.minimize_scalar(get_fscore_at, bounds=(-10,10), method='bounded')
p = res.x
print 'recall: {0}\nprecision: {1}\nfscore: {2}\n'.format(*get_score_at(p))
In [35]:
import mielke_replication as mr
reload(mr)
Out[35]:
In [36]:
X, y, labelset = mr.load_all()
In [39]:
from itertools import repeat, chain
combined_labels = []
for monkey in labelset:
for label in labelset[monkey]:
combined_labels.append(monkey + '-' + label)
In [40]:
combined_labels
Out[40]:
In [27]:
annot = mr.get_annotation('Titi_monkeys')
labelcounts = defaultdict(int)
for fragments in annot.itervalues():
for fragment in fragments:
labelcounts[fragment.mark] += 1
In [28]:
labelcounts.keys
Out[28]:
In [193]:
seaborn.set(style='white', palette='muted')
for monkey in ['Titi_monkeys', 'Blue_monkeys', 'colobus']:
fig = plt.figure()
seaborn.despine(left=True)
annot = mr.get_annotation(monkey)
lengths = defaultdict(list)
for fragments in annot.itervalues():
for fragment in fragments:
lengths[fragment.mark].append(fragment.interval.end - fragment.interval.start)
lengths = {k: np.array(v) for k, v in lengths.iteritems() if len(v) > 50}
for label in lengths:
seaborn.distplot(lengths[label], label='{0} [{1:.2f}s-{2:.2f}s]'.format(
label, lengths[label].min(), lengths[label].max()))
plt.legend(loc='best')
plt.yticks([])
plt.savefig('duration_{0}.png'.format(monkey))
In [ ]: