In [1]:
import os

import ray
from ray import tune

import torch # to remove later
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import models
import networks
from datasets import PreprocessedSpeechDataLoader, VaryingDataLoader
from nupic.research.frameworks.pytorch.image_transforms import RandomNoise

from torchsummary import summary

In [2]:
config = dict(
    device=("cuda" if torch.cuda.device_count() > 0 else "cpu"),
    dataset_name="PreprocessedGSC",
    data_dir="~/nta/datasets/gsc",
    batch_size_train=(4, 16),
    batch_size_test=1000,

    # ----- Network Related ------
    # SE
    # model=tune.grid_search(["BaseModel", "SparseModel", "DSNNMixedHeb", "DSNNConvHeb"]),
    model="DSNNConvHeb",
    network="gsc_conv_heb",

    # ----- Optimizer Related ----
    optim_alg="SGD",
    momentum=0,
    learning_rate=0.01,
    weight_decay=0.01,
    lr_scheduler="StepLR",
    lr_gamma=0.90,
    use_kwinners = True,
    # use_kwinners=tune.grid_search([True, False]),

    # ----- Dynamic-Sparse Related  - FC LAYER -----
    epsilon=184.61538/3, # 0.1 in the 1600-1000 linear layer
    sparse_linear_only = True,
    start_sparse=1,
    end_sparse=-1, # don't get last layer
    weight_prune_perc=0.15,
    hebbian_prune_perc=0.60,
    pruning_es=True,
    pruning_es_patience=0,
    pruning_es_window_size=5,
    pruning_es_threshold=0.02,
    pruning_interval=1,

    # ----- Dynamic-Sparse Related  - CONV -----
    prune_methods=['dynamic', 'dynamic'],
    hebbian_prune_frac=[0.99, 0.99],
    magnitude_prune_frac=[0.0, 0.0],
    sparsity=[0.98, 0.98],
    update_nsteps=[50, 50],
    prune_dims=tuple(),

    # ----- Additional Validation -----
    test_noise=False,
    noise_level=0.1,

    # ----- Debugging -----
    debug_weights=True,
    debug_sparse=True,
)

In [3]:
network = networks.gsc_conv_heb(config=config)
summary(network, input_size=(1, 32, 32))


['dynamic', 'dynamic']
defining the arg
defining the arg
defining the arg
defining the arg
defining the arg
defining the arg
defining the arg
defining the arg
[{'hebbian_prune_frac': 0.99, 'sparsity': 0.98, 'prune_dims': (), 'update_nsteps': 50}, {'hebbian_prune_frac': 0.99, 'sparsity': 0.98, 'prune_dims': (), 'update_nsteps': 50}]
[('classifier.0', Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1))), ('classifier.4', Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1)))]
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1           [-1, 64, 28, 28]           1,664
       BatchNorm2d-2           [-1, 64, 28, 28]               0
         MaxPool2d-3           [-1, 64, 14, 14]               0
        KWinners2d-4           [-1, 64, 14, 14]               0
            Conv2d-5           [-1, 64, 10, 10]         102,464
       BatchNorm2d-6           [-1, 64, 10, 10]               0
         MaxPool2d-7             [-1, 64, 5, 5]               0
        KWinners2d-8             [-1, 64, 5, 5]               0
           Flatten-9                 [-1, 1600]               0
           Linear-10                 [-1, 1000]       1,601,000
      BatchNorm1d-11                 [-1, 1000]               0
         KWinners-12                 [-1, 1000]               0
           Linear-13                   [-1, 12]          12,012
================================================================
Total params: 1,717,140
Trainable params: 1,717,140
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.11
Params size (MB): 6.55
Estimated Total Size (MB): 7.67
----------------------------------------------------------------

In [7]:
named_convs = [
    (name, layer) for name, layer in network.named_modules()
]

In [9]:
named_convs[0]


Out[9]:
('', GSCHeb(
   (classifier): Sequential(
     (0): Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1))
     (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
     (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
     (3): KWinners2d(channels=64, n=12544, percent_on=0.095, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000)
     (4): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1))
     (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
     (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
     (7): KWinners2d(channels=64, n=1600, percent_on=0.125, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000)
     (8): Flatten()
     (9): Linear(in_features=1600, out_features=1000, bias=True)
     (10): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
     (11): KWinners(n=1000, percent_on=0.1, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000)
     (12): Linear(in_features=1000, out_features=12, bias=True)
   )
   (classifier.0): DSConv2d(
     1, 64, kernel_size=(5, 5), stride=(1, 1)
     (grouped_conv): _NullConv(25, 25, kernel_size=(5, 5), stride=(1, 1), groups=25, bias=False)
   )
   (classifier.4): DSConv2d(
     64, 64, kernel_size=(5, 5), stride=(1, 1)
     (grouped_conv): _NullConv(102400, 1600, kernel_size=(5, 5), stride=(1, 1), groups=1600, bias=False)
   )
 ))

In [10]:
named_convs[1]


Out[10]:
('classifier', Sequential(
   (0): Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1))
   (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
   (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
   (3): KWinners2d(channels=64, n=12544, percent_on=0.095, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000)
   (4): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1))
   (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
   (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
   (7): KWinners2d(channels=64, n=1600, percent_on=0.125, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000)
   (8): Flatten()
   (9): Linear(in_features=1600, out_features=1000, bias=True)
   (10): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
   (11): KWinners(n=1000, percent_on=0.1, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000)
   (12): Linear(in_features=1000, out_features=12, bias=True)
 ))

In [13]:
named_convs[4]


Out[13]:
('classifier.2',
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))

In [ ]: