In [6]:
import numpy as np
import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import os

import matplotlib.pyplot as plt
from IPython.display import display, Audio
%matplotlib notebook

In [4]:
def find_files(dir):
    return [os.path.join(dir,x) for x in os.listdir(dir) if os.path.isfile(os.path.join(dir, x))]

files = find_files("data/waves_yesno")

In [9]:
sig, sr = torchaudio.load(files[2])
sig = torchaudio.transforms.Scale()(sig)

X = sig[7900:8900].transpose(0, 1) * 10
X.unsqueeze_(0)
#X.unsqueeze_(0)
display(X.size())

x_max, idx_max = torch.nn.functional.max_pool1d(X, 100, 25, 13, return_indices=True)
x_min, idx_min = torch.nn.functional.max_pool1d(-X, 100, 25, 13, return_indices=True)
x_max.squeeze(), idx_max.data.numpy(), -x_min.squeeze(), idx_min.data.numpy()
max_list = list(zip(idx_max.data.squeeze().numpy(), x_max.data.squeeze()))
min_list = list(zip(idx_min.data.squeeze().numpy(), -x_min.data.squeeze()))
combined_list = sorted(max_list + min_list)
display(combined_list)


torch.Size([1, 1, 1000])
[(39, -0.648193359375),
 (53, 0.966796875),
 (53, 0.966796875),
 (104, -0.9051513671875),
 (104, -0.9051513671875),
 (104, -0.9051513671875),
 (118, 1.31011962890625),
 (118, 1.31011962890625),
 (168, -1.84478759765625),
 (168, -1.84478759765625),
 (168, -1.84478759765625),
 (168, -1.84478759765625),
 (178, 1.91741943359375),
 (178, 1.91741943359375),
 (178, 1.91741943359375),
 (178, 1.91741943359375),
 (226, -1.46392822265625),
 (226, -1.46392822265625),
 (239, 1.43524169921875),
 (239, 1.43524169921875),
 (239, 1.43524169921875),
 (282, -0.94512939453125),
 (343, -1.25518798828125),
 (343, -1.25518798828125),
 (343, -1.25518798828125),
 (361, 1.47003173828125),
 (378, 1.640625),
 (378, 1.640625),
 (414, -1.5948486328125),
 (414, -1.5948486328125),
 (423, 2.0025634765625),
 (423, 2.0025634765625),
 (423, 2.0025634765625),
 (423, 2.0025634765625),
 (476, -1.59820556640625),
 (485, 1.990966796875),
 (508, -1.619873046875),
 (508, -1.619873046875),
 (538, -2.08831787109375),
 (538, -2.08831787109375),
 (546, 2.00653076171875),
 (546, 2.00653076171875),
 (599, -2.55279541015625),
 (599, -2.55279541015625),
 (599, -2.55279541015625),
 (599, -2.55279541015625),
 (608, 2.11395263671875),
 (622, 2.373046875),
 (622, 2.373046875),
 (661, -2.41485595703125),
 (684, 2.3974609375),
 (684, 2.3974609375),
 (722, -2.6776123046875),
 (722, -2.6776123046875),
 (730, 2.626953125),
 (745, 2.7105712890625),
 (745, 2.7105712890625),
 (784, -2.9150390625),
 (784, -2.9150390625),
 (784, -2.9150390625),
 (784, -2.9150390625),
 (807, 2.7520751953125),
 (807, 2.7520751953125),
 (807, 2.7520751953125),
 (807, 2.7520751953125),
 (845, -2.86590576171875),
 (845, -2.86590576171875),
 (845, -2.86590576171875),
 (853, 2.7276611328125),
 (906, -2.56103515625),
 (914, 2.86346435546875),
 (914, 2.86346435546875),
 (914, 2.86346435546875),
 (914, 2.86346435546875),
 (967, -2.66265869140625),
 (967, -2.66265869140625)]

In [83]:
X2 = sig[8000:8100].transpose(0,1).unsqueeze(0)
display(X2)
conv_op = nn.Conv1d(1, 3, kernel_size=10, dilation=2, stride=9, bias=False)
conv_op.weight = nn.Parameter(torch.cat((torch.ones(1, 1, 1), torch.rand(2, 1, 1))))
print(conv_op)
X2_conv = conv_op(Variable(X2))
X2_conv


( 0 ,.,.) = 

Columns 0 to 8 
  -0.0624 -0.0716 -0.0797 -0.0856 -0.0905 -0.0900 -0.0818 -0.0692 -0.0501

Columns 9 to 17 
  -0.0287 -0.0072  0.0182  0.0451  0.0731  0.0965  0.1147  0.1244  0.1286

Columns 18 to 26 
   0.1310  0.1280  0.1182  0.1014  0.0768  0.0516  0.0299  0.0112 -0.0106

Columns 27 to 35 
  -0.0326 -0.0532 -0.0644 -0.0667 -0.0625 -0.0581 -0.0517 -0.0411 -0.0269

Columns 36 to 44 
  -0.0071  0.0119  0.0245  0.0345  0.0388  0.0443  0.0449  0.0386  0.0284

Columns 45 to 53 
   0.0139  0.0010 -0.0069 -0.0127 -0.0211 -0.0263 -0.0274 -0.0225 -0.0141

Columns 54 to 62 
  -0.0041  0.0028  0.0041  0.0074  0.0062 -0.0039 -0.0194 -0.0416 -0.0692

Columns 63 to 71 
  -0.0992 -0.1273 -0.1520 -0.1707 -0.1809 -0.1845 -0.1731 -0.1436 -0.0975

Columns 72 to 80 
  -0.0462  0.0043  0.0543  0.1037  0.1487  0.1815  0.1917  0.1761  0.1512

Columns 81 to 89 
   0.1140  0.0734  0.0276 -0.0191 -0.0643 -0.0964 -0.1084 -0.1012 -0.0851

Columns 90 to 98 
  -0.0621 -0.0371 -0.0034  0.0344  0.0718  0.0935  0.0977  0.0902  0.0768

Columns 99 to 99 
   0.0605
[torch.FloatTensor of size 1x1x100]
Conv1d(1, 3, kernel_size=(10,), stride=(9,), dilation=(2,), bias=False)
Out[83]:
Variable containing:
(0 ,.,.) = 

Columns 0 to 8 
  -0.0624 -0.0287  0.1310 -0.0326 -0.0071  0.0139 -0.0041 -0.0992 -0.0462
 -0.0210 -0.0097  0.0441 -0.0110 -0.0024  0.0047 -0.0014 -0.0334 -0.0156
 -0.0180 -0.0083  0.0378 -0.0094 -0.0020  0.0040 -0.0012 -0.0286 -0.0133

Columns 9 to 11 
   0.1140 -0.0621  0.0605
  0.0384 -0.0209  0.0204
  0.0329 -0.0179  0.0175
[torch.FloatTensor of size 1x3x12]

In [144]:
# Convolutions
X = torch.arange(0, 10).view(1, 1, -1)
display(X)
conv_op = nn.Conv1d(1, 1, 2, dilation=3, padding=1, bias=False)
conv_op.weight = nn.Parameter(torch.ones(conv_op.weight.size()))
display(conv_op(Variable(X)))
tconv_op = nn.ConvTranspose1d(1, 1, 2, dilation=2, bias=False)
tconv_op.weight = nn.Parameter(torch.ones(tconv_op.weight.size()))
display(tconv_op(Variable(X)))


(0 ,.,.) = 
   0   1   2   3   4   5   6   7   8   9
[torch.FloatTensor of size 1x1x10]
Variable containing:
(0 ,.,.) = 
   2   3   5   7   9  11  13  15   7
[torch.FloatTensor of size 1x1x9]
Variable containing:
(0 ,.,.) = 
   0   1   2   4   6   8  10  12  14  16   8   9
[torch.FloatTensor of size 1x1x12]

In [155]:
# Pooling
X = torch.cat((torch.arange(0, 10).view(1, 1, -1), torch.rand(1, 1, 10)))
display(X)
pool_op = nn.AdaptiveMaxPool1d(5)
pool_op(X)


(0 ,.,.) = 

Columns 0 to 8 
   0.0000  1.0000  2.0000  3.0000  4.0000  5.0000  6.0000  7.0000  8.0000

Columns 9 to 9 
   9.0000

(1 ,.,.) = 

Columns 0 to 8 
   0.1731  0.8068  0.1422  0.7546  0.9261  0.5195  0.4176  0.6777  0.1002

Columns 9 to 9 
   0.1254
[torch.FloatTensor of size 2x1x10]
Out[155]:
Variable containing:
(0 ,.,.) = 
  1.0000  3.0000  5.0000  7.0000  9.0000

(1 ,.,.) = 
  0.8068  0.7546  0.9261  0.6777  0.1254
[torch.FloatTensor of size 2x1x5]

In [163]:
# Padding
X = torch.arange(0, 10).view(1, 1, 1, -1)
nn.ReflectionPad2d((1, 1, 0, 0))(X)


Out[163]:
Variable containing:
(0 ,0 ,.,.) = 
   1   0   1   2   3   4   5   6   7   8   9   8
[torch.FloatTensor of size 1x1x1x12]

In [ ]:


In [ ]: