In [1]:
import numpy as np
import librosa
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
files = librosa.util.find_files("pcsnpny-20150204-mkj/wav")
file = files[0]
sig1, sr1 = librosa.core.load(file, sr=None)
sig2, sr2 = librosa.core.load(files[1], sr=None)
print(sig1.shape, sr1, sig2.shape, sr2)
In [3]:
class CNN2RNN(nn.Module):
def __init__(self, conv_in_channels, conv_out_features, ws, hs, rnn_hidden_size, rnn_output_size):
super(CNN2RNN, self).__init__()
self.cin_channels = conv_in_channels
self.cout_features = conv_out_features
self.rnn_hid = rnn_hidden_size
self.rnn_out = rnn_output_size
# hard coding vars so I know what they are.
n_layers = 2 # number of layers of RNN
batch_size = 1
kernel2d = 3
# hidden initialization
self.hidden = self.init_hidden(n_layers, batch_size, rnn_hidden_size)
# net layer types
self.c1 = nn.Conv1d(conv_in_channels, conv_out_features+kernel2d-1, ws, stride=hs)
self.c2 = nn.Conv2d(1, conv_out_features, kernel2d)
self.gru = nn.GRU(conv_out_features*conv_out_features, rnn_hidden_size, n_layers,
batch_first=True, bidirectional=False)
self.dense = nn.Linear(rnn_hidden_size, 1)
def forward(self, input):
print("input size: {}".format(input.size()))
conv_out = self.c1(input)
print("conv1d out: {}".format(conv_out.size()))
conv2_out = self.c2(conv_out.unsqueeze(0))
print("conv2d out: {}".format(conv2_out.size()))
gru_in = conv2_out.view(input.size(0), -1, self.cout_features * self.cout_features)
print("gru in: {}".format(gru_in.size()))
gru_out, self.hidden = self.gru(gru_in, self.hidden)
print("gru out: {}".format(gru_out.size()))
dense_in = gru_out.view(gru_in.size(1)*input.size(0) ,-1)
print("dense in: {}".format(dense_in.size()))
out_space = self.dense(dense_in)
out = F.sigmoid(out_space)
out = out.view(input.size(0), -1)
return(out)
def init_hidden(self, nl, bat_dim, hid_dim):
# The axes: (num_layers, minibatch_size, hidden_dim)
# see docs
return (Variable(torch.zeros(nl, bat_dim, hid_dim)))
ws=640
hs=ws//2
nb=64
net = CNN2RNN(1, nb, ws, hs, 1024, 2)
inputs1 = torch.Tensor(sig1)
inputs1.unsqueeze_(0)
inputs1.unsqueeze_(0)
print(net(Variable(inputs1)).size())
inputs2 = torch.Tensor(sig2)
inputs2.unsqueeze_(0)
inputs2.unsqueeze_(0)
print(net(Variable(inputs2)).size())
In [ ]:
torch