In [1]:
from configparser import ConfigParser
import os
from pathlib import Path
import json
import shutil

import numpy as np
import tqdm

import vak

In [2]:
BIRDS = [
#    'bl26lb16',
    'gy6or6',
#    'or60yw70',
#    'gr41rd51',
]

CONFIGS_DIR = Path('../../src/configs/')
BF_CONFIGS = sorted(list(CONFIGS_DIR.glob('*BFSongRepository*ini')))


configs_by_bird = {
    bird: [bf_config for bf_config in BF_CONFIGS if bird in str(bf_config)][0]
    for bird in BIRDS
}

In [3]:
BFSongRepo = Path('~/Documents/data/BFSongRepository/').expanduser()

all_notmats = list(BFSongRepo.glob('*/*/*.not.mat'))
bird_date_dirs = set([notmat.parents[0] for notmat in all_notmats])

copy all .cbins with .not.mats into a sub-directory


In [4]:
# for bird_date_dir in bird_date_dirs:
#     has_notmat = bird_date_dir.joinpath('has_notmat')
#     has_notmat.mkdir(exist_ok=True)
#     notmats_this_date_dir = sorted(list(bird_date_dir.glob('*.not.mat')))
#     print(f'\ncopying annotated songs in {bird_date_dir} into sub-directory')
#     for notmat in tqdm.tqdm(notmats_this_date_dir):
#         shutil.copy(notmat, dst=has_notmat)
#         cbin = notmat.parent.joinpath(
#             Path(notmat.stem).stem
#         )
#         shutil.copy(cbin, dst=has_notmat)  # cbin_file, stem.stem removes .not.mat
#         rec = notmat.parent.joinpath(
#             Path(Path(notmat.stem).stem).stem + '.rec'
#         )
#         shutil.copy(rec, dst=has_notmat)
#         tmp = notmat.parent.joinpath(
#             Path(Path(notmat.stem).stem).stem + '.tmp'
#         )
#        shutil.copy(tmp, dst=has_notmat)


  6%|▌         | 11/196 [00:00<00:01, 99.01it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/gr41rd51/062112 into sub-directory
100%|██████████| 196/196 [00:01<00:00, 104.82it/s]
 22%|██▏       | 11/51 [00:00<00:00, 104.13it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/or60yw70/100112 into sub-directory
100%|██████████| 51/51 [00:00<00:00, 114.32it/s]
 11%|█         | 8/75 [00:00<00:00, 76.89it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/bl26lb16/042112 into sub-directory
100%|██████████| 75/75 [00:01<00:00, 64.44it/s]
 15%|█▌        | 11/73 [00:00<00:00, 107.84it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/or60yw70/092812 into sub-directory
100%|██████████| 73/73 [00:00<00:00, 110.07it/s]
 29%|██▉       | 61/212 [00:00<00:00, 602.51it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/gy6or6/032412 into sub-directory
100%|██████████| 212/212 [00:00<00:00, 646.38it/s]
  9%|▊         | 10/117 [00:00<00:01, 98.25it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/or60yw70/092912 into sub-directory
100%|██████████| 117/117 [00:01<00:00, 99.67it/s]
  3%|▎         | 11/378 [00:00<00:03, 109.46it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/gr41rd51/062212 into sub-directory
100%|██████████| 378/378 [00:03<00:00, 100.65it/s]
  2%|▏         | 3/162 [00:00<00:06, 23.00it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/bl26lb16/041912 into sub-directory
100%|██████████| 162/162 [00:02<00:00, 59.42it/s]
  9%|▉         | 10/111 [00:00<00:01, 96.75it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/or60yw70/093012 into sub-directory
100%|██████████| 111/111 [00:01<00:00, 110.34it/s]
 23%|██▎       | 50/215 [00:00<00:00, 496.61it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/gy6or6/032212 into sub-directory
100%|██████████| 215/215 [00:00<00:00, 574.96it/s]
 43%|████▎     | 74/172 [00:00<00:00, 736.47it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/gy6or6/032512 into sub-directory
100%|██████████| 172/172 [00:00<00:00, 686.11it/s]
 12%|█▏        | 11/92 [00:00<00:00, 105.18it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/gr41rd51/061912 into sub-directory
100%|██████████| 92/92 [00:00<00:00, 110.25it/s]
  4%|▍         | 8/202 [00:00<00:02, 76.18it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/bl26lb16/042012 into sub-directory
100%|██████████| 202/202 [00:03<00:00, 58.52it/s]
 17%|█▋        | 12/70 [00:00<00:00, 114.33it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/gr41rd51/062012 into sub-directory
100%|██████████| 70/70 [00:00<00:00, 111.72it/s]
 39%|███▉      | 63/162 [00:00<00:00, 619.36it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/gy6or6/032312 into sub-directory
100%|██████████| 162/162 [00:00<00:00, 631.60it/s]
  5%|▌         | 11/203 [00:00<00:01, 101.53it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/gr41rd51/062312 into sub-directory
100%|██████████| 203/203 [00:01<00:00, 108.29it/s]
  9%|▉         | 8/87 [00:00<00:01, 76.57it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/or60yw70/092712 into sub-directory
100%|██████████| 87/87 [00:00<00:00, 96.31it/s]
100%|██████████| 39/39 [00:00<00:00, 552.91it/s]
copying annotated songs in /home/nickledave/Documents/data/BFSongRepository/gy6or6/032612 into sub-directory

get dirs to predict for each bird


In [4]:
dirs_to_predict = {}
for bird in BIRDS:
    these = [
        bird_date_dir for bird_date_dir in bird_date_dirs
        if bird in str(bird_date_dir)
    ]
    these = [path.joinpath('has_notmat')
             for path in these]
    dirs_to_predict[bird] = these

In [5]:
spect_params = {'fft_size': 512,
                'step_size': 62,
                'freq_cutoffs': [500, 10000],
                'thresh': 6.25,
                'transform_type': 'log_spect'}
sp_nt = vak.config.spectrogram.SpectConfig(**spect_params)

In [6]:
NETWORKS = vak.network._load()

In [7]:
for bird in BIRDS:
    print(f'predicting segments and labels for bird: {bird}')
    config_ini = configs_by_bird[bird]
    config_obj = ConfigParser()
    config_obj.read(config_ini)

    data_config = vak.config.data.parse_data_config(config_obj, config_ini)
    train_config = vak.config.train.parse_train_config(config_obj, config_ini)
    net_config = vak.config.parse._get_nets_config(config_obj, train_config.networks)

    results_dir = config_obj['OUTPUT']['results_dir_made_by_main_script']
    checkpoint_path = str(Path(results_dir).joinpath('TweetyNet'))
    spect_scaler_path = str(Path(results_dir).joinpath('spect_scaler'))

    # TODO: fix path
    print(f'\tgetting labelmap from {train_config.train_vds_path}')
    train_vds = vak.dataset.VocalizationDataset.load(train_config.train_vds_path)
    train_vds = train_vds.load_spects()
    labelmap = train_vds.labelmap

    bird_dirs_predict = dirs_to_predict[bird]    
    for dir_to_predict in bird_dirs_predict:
        stem = f'{dir_to_predict.parents[0].name}.{dir_to_predict.name}'

        X_train = train_vds.spects_list()
        X_train = np.concatenate(X_train, axis=1)
        Y_train = train_vds.lbl_tb_list()
        Y_train = np.concatenate(Y_train)
        # transpose so rows are time bins
        X_train = X_train.T
        freq_bins = X_train.shape[-1]  # number of columns
        
        test_vds_fname = str(dir_to_predict.joinpath(
            f'{stem}.test.vds.json'
        ))

        test_vds = vak.dataset.prep(str(dir_to_predict),
                                    annot_format='notmat',
                                    labelset=data_config.labelset,
                                    output_dir=dir_to_predict,
                                    save_vds=False,
                                    vds_fname=test_vds_fname,
                                    return_vds=True,
                                    return_path=False,
                                    audio_format='cbin',
                                    spect_params=sp_nt)

        net_name, net_config = list(net_config.items())[0]
        n_classes = len(labelmap)
        net_config_dict = net_config._asdict()
        net_config_dict['n_syllables'] = n_classes
        if 'freq_bins' in net_config_dict:
            net_config_dict['freq_bins'] = freq_bins

        X_test = test_vds.spects_list()
        X_test = np.concatenate(X_test, axis=1)
        # transpose so rows are time bins
        X_test = X_test.T
        Y_test = test_vds.lbl_tb_list()
        Y_test = np.concatenate(Y_test)

        (X_train,
         _,
         num_batches_train) = vak.utils.data.reshape_data_for_batching(X_train,
                                                                       net_config.batch_size,
                                                                       net_config.time_bins,
                                                                       Y_train)

        # Notice we don't reshape Y_test
        (X_test,
         _,
         num_batches_test) = vak.utils.data.reshape_data_for_batching(X_test,
                                                                      net_config.batch_size,
                                                                      net_config.time_bins,
                                                                      Y_test)
        
        
        print("running test on data from {dir_to_predict}")
        (Y_pred_train,
         Y_pred_test,
         Y_pred_train_labels,
         Y_pred_test_labels,
         train_err,
         train_lev,
         train_syl_err_rate,
         test_err,
         test_lev,
         test_syl_err_rate) = vak.core.learncurve.test_one_model(net_name,
                                                                      net_config_dict,
                                                                      NETWORKS,
                                                                      n_classes,
                                                                      labelmap,
                                                                      checkpoint_path,
                                                                      X_train,
                                                                      Y_train,
                                                                      num_batches_train,
                                                                      X_test,
                                                                      Y_test,
                                                                      num_batches_test)

        print(f'error on training set: {train_err}')
        print(f'Levenstein distance on training set: {train_lev}')
        print(f'syllable error rate on training set: {train_syl_err_rate}')
        print(f'error on test set: {test_err}')
        print(f'Levenstein distance on test set: {test_lev}')
        print(f'syllable error rate on test set: {test_syl_err_rate}')
        
        err_dict = {
            'train_err': train_err,
            'train_lev': train_lev,
            'train_syl_err_rate': train_syl_err_rate,
            'test_err': test_err,
            'test_lev': test_lev,
            'test_syl_err_rate': test_syl_err_rate,
        }
        with open(os.path.joinpath(dir_to_predict,'test.json')) as fp:
            json.dump(err_dict, fp)
        
        predict_vds_fname = str(dir_to_predict.joinpath(
            f'{stem}.predict.vds.json'
        ))
        print(f'\tmaking dataset for predictions from {dir_to_predict}')
        predict_vds = vak.dataset.prep(str(dir_to_predict),
                                       audio_format='cbin',
                                       spect_params=sp_nt,
                                       return_vds=True,
                                       return_path=False)
        predict_vds = predict_vds.clear_spects()
        predict_vds.save(json_fname=predict_vds_fname)

        print(f'\trunning vak.core.predict on {dir_to_predict}')
        vak.core.predict(
            predict_vds_path=vds_fname,
            checkpoint_path=checkpoint_path,
            networks=net_config,
            labelmap=labelmap,
            spect_scaler_path=spect_scaler_path)


  File "<ipython-input-7-e37b62efa538>", line 116
    with open(os.path.joinpath(dir_to_predict,'test.json') as fp:
                                                            ^
SyntaxError: invalid syntax

In [ ]: