In [12]:
import os

import ray
from ray import tune

import torch # to remove later
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import models
import networks
from datasets import PreprocessedSpeechDataLoader, VaryingDataLoader
from nupic.research.frameworks.pytorch.image_transforms import RandomNoise

from torchsummary import summary

import math

import torch
from torch import nn
from torchvision import models
from nupic.torch.modules import Flatten, KWinners, KWinners2d
from networks_module.layers import DSConv2d, RandDSConv2d, SparseConv2d

In [2]:
config = dict(
    device=("cuda" if torch.cuda.device_count() > 0 else "cpu"),
    dataset_name="PreprocessedGSC",
    data_dir="~/nta/datasets/gsc",
    batch_size_train=(4, 16),
    batch_size_test=1000,

    # ----- Network Related ------
    # SE
    # model=tune.grid_search(["BaseModel", "SparseModel", "DSNNMixedHeb", "DSNNConvHeb"]),
    model="DSNNConvHeb",
    network="gsc_conv_heb",

    # ----- Optimizer Related ----
    optim_alg="SGD",
    momentum=0,
    learning_rate=0.01,
    weight_decay=0.01,
    lr_scheduler="StepLR",
    lr_gamma=0.90,
    use_kwinners = True,
    # use_kwinners=tune.grid_search([True, False]),

    # ----- Dynamic-Sparse Related  - FC LAYER -----
    epsilon=184.61538/3, # 0.1 in the 1600-1000 linear layer
    sparse_linear_only = True,
    start_sparse=1,
    end_sparse=-1, # don't get last layer
    weight_prune_perc=0.15,
    hebbian_prune_perc=0.60,
    pruning_es=True,
    pruning_es_patience=0,
    pruning_es_window_size=5,
    pruning_es_threshold=0.02,
    pruning_interval=1,

    # ----- Dynamic-Sparse Related  - CONV -----
    prune_methods='dynamic',
    hebbian_prune_frac=0.99,
    magnitude_prune_frac=0.0,
    sparsity=0.98,
    update_nsteps=50,
    prune_dims=tuple(),

    # ----- Additional Validation -----
    test_noise=False,
    noise_level=0.1,

    # ----- Debugging -----
    debug_weights=True,
    debug_sparse=True,
)

In [3]:
network = networks.gsc_conv_heb(config=config)

In [4]:
summary(network, input_size=(1, 32, 32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
          DSConv2d-1           [-1, 64, 28, 28]           1,664
         _NullConv-2           [-1, 25, 28, 28]             625
       BatchNorm2d-3           [-1, 64, 28, 28]               0
         MaxPool2d-4           [-1, 64, 14, 14]               0
        KWinners2d-5           [-1, 64, 14, 14]               0
          DSConv2d-6           [-1, 64, 10, 10]         102,464
         _NullConv-7         [-1, 1600, 10, 10]       2,560,000
       BatchNorm2d-8           [-1, 64, 10, 10]               0
         MaxPool2d-9             [-1, 64, 5, 5]               0
       KWinners2d-10             [-1, 64, 5, 5]               0
          Flatten-11                 [-1, 1600]               0
           Linear-12                 [-1, 1000]       1,601,000
      BatchNorm1d-13                 [-1, 1000]               0
         KWinners-14                 [-1, 1000]               0
           Linear-15                   [-1, 12]          12,012
================================================================
Total params: 4,277,765
Trainable params: 1,717,140
Non-trainable params: 2,560,625
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 2.48
Params size (MB): 16.32
Estimated Total Size (MB): 18.81
----------------------------------------------------------------

In [5]:
network(torch.rand(10,1,32,32));

In [6]:
network.features


Out[6]:
Sequential(
  (0): DSConv2d(
    1, 64, kernel_size=(5, 5), stride=(1, 1)
    (grouped_conv): _NullConv(25, 25, kernel_size=(5, 5), stride=(1, 1), groups=25, bias=False)
  )
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): KWinners2d(channels=64, n=12544, percent_on=0.095, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000)
  (4): DSConv2d(
    64, 64, kernel_size=(5, 5), stride=(1, 1)
    (grouped_conv): _NullConv(102400, 1600, kernel_size=(5, 5), stride=(1, 1), groups=1600, bias=False)
  )
  (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): KWinners2d(channels=64, n=1600, percent_on=0.125, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000)
)

In [7]:
network.classifier


Out[7]:
Sequential(
  (0): Flatten()
  (1): Linear(in_features=1600, out_features=1000, bias=True)
  (2): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  (3): KWinners(n=1000, percent_on=0.1, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000)
  (4): Linear(in_features=1000, out_features=12, bias=True)
)

In [17]:
network.named_modules


Out[17]:
<bound method Module.named_modules of GSCHeb(
  (features): Sequential(
    (0): DSConv2d(
      1, 64, kernel_size=(5, 5), stride=(1, 1)
      (grouped_conv): _NullConv(25, 25, kernel_size=(5, 5), stride=(1, 1), groups=25, bias=False)
    )
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): KWinners2d(channels=64, n=12544, percent_on=0.095, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000)
    (4): DSConv2d(
      64, 64, kernel_size=(5, 5), stride=(1, 1)
      (grouped_conv): _NullConv(102400, 1600, kernel_size=(5, 5), stride=(1, 1), groups=1600, bias=False)
    )
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): KWinners2d(channels=64, n=1600, percent_on=0.125, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000)
  )
  (classifier): Sequential(
    (0): Flatten()
    (1): Linear(in_features=1600, out_features=1000, bias=True)
    (2): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
    (3): KWinners(n=1000, percent_on=0.1, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000)
    (4): Linear(in_features=1000, out_features=12, bias=True)
  )
)>

In [18]:
for name, module in network.named_modules():
    # if it is a dsconv layer
    if isinstance(module, DSConv2d):
        print(name)
        print(module.__class__)


features.0
<class 'networks_module.layers.DSConv2d'>
features.4
<class 'networks_module.layers.DSConv2d'>

In [ ]: