%load_ext autoreload
import matplotlib.pyplot as plt
from matplotlib import rcParams
import numpy as np
import pandas as pd
import seaborn as sns

import os
import torch
from torch import nn
from torchvision import models

from torchsummary import summary
from ray import tune

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
  %reload_ext autoreload

exp_config = dict(
    #### model related
#    network=tune.grid_search(["vgg19_bn_kw", "vgg19_bn"]),
    model=tune.grid_search(["BaseModel", "SparseModel", "SET", "DSNN"]),
    # model=tune.grid_search(["SET", "DSNN", "DSNN_Flip", "DSNN_Correct"]),
    stats_mean=(0.4914, 0.4822, 0.4465),
    stats_std=(0.2023, 0.1994, 0.2010),
    #### optimizer related
    #### sparse related
    kwinners=True # moved to a parameter

from networks import VGG19
model = VGG19(exp_config)

<bound method VGG19._kwinners of VGG19()>
<class 'nupic.torch.modules.k_winners.KWinners2d'>
KWinners2d(channels=10, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)

how to replace RELUs with another non-linear activation function

model = models.vgg19_bn()

model = models.vgg19_bn()
config = {
    'percent_on': 0.3,
    'boost_strength': 1.4,
    'boost_strength_factor': 0.7
new_features = []
for layer in model.features:
    # remove max pooling
    if isinstance(layer, nn.MaxPool2d):
    elif isinstance(layer, nn.Conv2d):
        last_conv_out_channels = layer.out_channels
    # switch ReLU to kWinners2d
    elif isinstance(layer, nn.ReLU):
    # otherwise add it as normal
model.features = nn.Sequential(*new_features)

  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): KWinners2d(channels=64, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): KWinners2d(channels=64, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): KWinners2d(channels=128, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (9): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): KWinners2d(channels=128, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (12): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): KWinners2d(channels=256, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (15): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (16): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (17): KWinners2d(channels=256, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (18): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (19): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (20): KWinners2d(channels=256, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (21): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (23): KWinners2d(channels=256, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (24): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (26): KWinners2d(channels=512, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (27): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (28): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (29): KWinners2d(channels=512, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (31): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (32): KWinners2d(channels=512, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (33): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (34): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (35): KWinners2d(channels=512, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (36): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (37): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (38): KWinners2d(channels=512, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (39): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (40): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (41): KWinners2d(channels=512, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (42): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (43): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (44): KWinners2d(channels=512, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
    (45): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (46): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (47): KWinners2d(channels=512, n=0, percent_on=0.3, boost_strength=1.4, duty_cycle_period=1000)
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace)
    (2): Dropout(p=0.5)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace)
    (5): Dropout(p=0.5)
    (6): Linear(in_features=4096, out_features=1000, bias=True)

summary(model, input_size=(3,32,32))

        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,792
       BatchNorm2d-2           [-1, 64, 32, 32]             128
        KWinners2d-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 64, 32, 32]          36,928
       BatchNorm2d-5           [-1, 64, 32, 32]             128
        KWinners2d-6           [-1, 64, 32, 32]               0
            Conv2d-7          [-1, 128, 32, 32]          73,856
       BatchNorm2d-8          [-1, 128, 32, 32]             256
        KWinners2d-9          [-1, 128, 32, 32]               0
           Conv2d-10          [-1, 128, 32, 32]         147,584
      BatchNorm2d-11          [-1, 128, 32, 32]             256
       KWinners2d-12          [-1, 128, 32, 32]               0
           Conv2d-13          [-1, 256, 32, 32]         295,168
      BatchNorm2d-14          [-1, 256, 32, 32]             512
       KWinners2d-15          [-1, 256, 32, 32]               0
           Conv2d-16          [-1, 256, 32, 32]         590,080
      BatchNorm2d-17          [-1, 256, 32, 32]             512
       KWinners2d-18          [-1, 256, 32, 32]               0
           Conv2d-19          [-1, 256, 32, 32]         590,080
      BatchNorm2d-20          [-1, 256, 32, 32]             512
       KWinners2d-21          [-1, 256, 32, 32]               0
           Conv2d-22          [-1, 256, 32, 32]         590,080
      BatchNorm2d-23          [-1, 256, 32, 32]             512
       KWinners2d-24          [-1, 256, 32, 32]               0
           Conv2d-25          [-1, 512, 32, 32]       1,180,160
      BatchNorm2d-26          [-1, 512, 32, 32]           1,024
       KWinners2d-27          [-1, 512, 32, 32]               0
           Conv2d-28          [-1, 512, 32, 32]       2,359,808
      BatchNorm2d-29          [-1, 512, 32, 32]           1,024
       KWinners2d-30          [-1, 512, 32, 32]               0
           Conv2d-31          [-1, 512, 32, 32]       2,359,808
      BatchNorm2d-32          [-1, 512, 32, 32]           1,024
       KWinners2d-33          [-1, 512, 32, 32]               0
           Conv2d-34          [-1, 512, 32, 32]       2,359,808
      BatchNorm2d-35          [-1, 512, 32, 32]           1,024
       KWinners2d-36          [-1, 512, 32, 32]               0
           Conv2d-37          [-1, 512, 32, 32]       2,359,808
      BatchNorm2d-38          [-1, 512, 32, 32]           1,024
       KWinners2d-39          [-1, 512, 32, 32]               0
           Conv2d-40          [-1, 512, 32, 32]       2,359,808
      BatchNorm2d-41          [-1, 512, 32, 32]           1,024
       KWinners2d-42          [-1, 512, 32, 32]               0
           Conv2d-43          [-1, 512, 32, 32]       2,359,808
      BatchNorm2d-44          [-1, 512, 32, 32]           1,024
       KWinners2d-45          [-1, 512, 32, 32]               0
           Conv2d-46          [-1, 512, 32, 32]       2,359,808
      BatchNorm2d-47          [-1, 512, 32, 32]           1,024
       KWinners2d-48          [-1, 512, 32, 32]               0
AdaptiveAvgPool2d-49            [-1, 512, 7, 7]               0
           Linear-50                 [-1, 4096]     102,764,544
             ReLU-51                 [-1, 4096]               0
          Dropout-52                 [-1, 4096]               0
           Linear-53                 [-1, 4096]      16,781,312
             ReLU-54                 [-1, 4096]               0
          Dropout-55                 [-1, 4096]               0
           Linear-56                 [-1, 1000]       4,097,000
Total params: 143,678,248
Trainable params: 143,678,248
Non-trainable params: 0
Input size (MB): 0.01
Forward/backward pass size (MB): 129.39
Params size (MB): 548.09
Estimated Total Size (MB): 677.49

from nupic.torch.modules import KWinners2d
new_features = []
for layer in model.features:
    if isinstance(layer, nn.MaxPool2d):
    elif isinstance(layer, nn.ReLU):
        new_features.append(KWinners2d(0, percent_on=0.3))

model.features = nn.Sequential(*new_features)
summary(model, input_size=(3,32,32))

RuntimeError                              Traceback (most recent call last)
<ipython-input-46-c069c7653900> in <module>
      1 model.features = nn.Sequential(*new_features)
----> 2 summary(model, input_size=(3,32,32))

~/miniconda3/envs/numenta/lib/python3.7/site-packages/torchsummary/ in summary(model, input_size, batch_size, device)
     70     # make a forward pass
     71     # print(x.shape)
---> 72     model(*x)
     74     # remove these hooks

~/miniconda3/envs/numenta/lib/python3.7/site-packages/torch/nn/modules/ in __call__(self, *input, **kwargs)
    491             result = self._slow_forward(*input, **kwargs)
    492         else:
--> 493             result = self.forward(*input, **kwargs)
    494         for hook in self._forward_hooks.values():
    495             hook_result = hook(self, input, result)

~/miniconda3/envs/numenta/lib/python3.7/site-packages/torchvision/models/ in forward(self, x)
     41     def forward(self, x):
---> 42         x = self.features(x)
     43         x = self.avgpool(x)
     44         x = x.view(x.size(0), -1)

~/miniconda3/envs/numenta/lib/python3.7/site-packages/torch/nn/modules/ in __call__(self, *input, **kwargs)
    491             result = self._slow_forward(*input, **kwargs)
    492         else:
--> 493             result = self.forward(*input, **kwargs)
    494         for hook in self._forward_hooks.values():
    495             hook_result = hook(self, input, result)

~/miniconda3/envs/numenta/lib/python3.7/site-packages/torch/nn/modules/ in forward(self, input)
     90     def forward(self, input):
     91         for module in self._modules.values():
---> 92             input = module(input)
     93         return input

~/miniconda3/envs/numenta/lib/python3.7/site-packages/torch/nn/modules/ in __call__(self, *input, **kwargs)
    491             result = self._slow_forward(*input, **kwargs)
    492         else:
--> 493             result = self.forward(*input, **kwargs)
    494         for hook in self._forward_hooks.values():
    495             hook_result = hook(self, input, result)

~/nta/nupic.torch/nupic/torch/modules/ in forward(self, x)
    268         if
--> 269             x = F.KWinners2d.apply(x, self.duty_cycle, self.k, self.boost_strength)
    270             self.update_duty_cycle(x)

~/nta/nupic.torch/nupic/torch/functions/ in forward(ctx, x, duty_cycles, k, boost_strength)
    165             target_density = float(k) / (x.shape[1] * x.shape[2] * x.shape[3])
    166             boost_factors = torch.exp((target_density - duty_cycles) * boost_strength)
--> 167             boosted = x.detach() * boost_factors
    168         else:
    169             boosted = x.detach()

RuntimeError: The size of tensor a (64) must match the size of tensor b (0) at non-singleton dimension 1

In [44]:

from nupic.torch.modules import KWinners
for idx, layer in enumerate(model.features):
    if isinstance(layer, nn.ReLU):
        model.features[idx] = KWinners(0, 0.3)
    elif is

  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (17): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (19): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (20): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (21): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (22): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (23): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (24): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (25): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (26): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (27): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (28): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (29): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (31): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (32): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (33): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (34): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (35): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (36): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (37): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (38): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (39): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (40): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (41): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (42): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (43): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (44): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (45): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (46): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (47): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (48): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (49): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (50): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (51): KWinners(n=0, percent_on=0.3, boost_strength=1.0, duty_cycle_period=1000)
    (52): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace)
    (2): Dropout(p=0.5)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace)
    (5): Dropout(p=0.5)
    (6): Linear(in_features=4096, out_features=1000, bias=True)

class cauchy_activation(nn.Module):
    def __init__(self):
        super(cauchy_activation, self).__init__()
    def activation(self, inp):
        return pdf_cauchy_distribution(self.inp)