This just demonstrates that the Intrinsic Dimension idea works for a simple MNIST-CNN model.
Of possible interest : This PyTorch version is 'generic' in the sense that it should be able
to take arbitrary parameterised models and investigate their Intrinsic Dimension (without
writing specialised Module_IntrinsicDimension
classes for each module type).
In [ ]:
# Install the stuff required (and not provided by Google by default)
!pip3 install torch
!pip3 install torchvision
In [ ]:
import torch
import torch.nn.functional as F
from torchvision import datasets, transforms
In [ ]:
n_epochs = 5
batch_size, batch_log_interval, lr = 32, 600, 0.01
seed = 10
try_cuda = True
In [ ]:
use_cuda = try_cuda and torch.cuda.is_available()
torch.manual_seed(seed)
device = torch.device("cuda" if use_cuda else "cpu")
In [ ]:
loader_kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
mnist_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=True, download=True, transform=mnist_transform),
batch_size=batch_size, shuffle=True, **loader_kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=False, transform=mnist_transform),
batch_size=1000, shuffle=False, **loader_kwargs)
In [ ]:
class RegularCNNModel(torch.nn.Module):
def __init__(self):
super(RegularCNNModel, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 16, kernel_size=3)
self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=3)
#self.conv2_drop = torch.nn.Dropout2d()
self.fc1 = torch.nn.Linear(32*5*5, 50)
self.fc2 = torch.nn.Linear(50, 10)
def forward(self, x):
x = self.conv1(x)
x = F.max_pool2d( x, 2)
x = F.relu(x)
x = self.conv2(x)
x = F.max_pool2d( x, 2)
x = F.relu(x)
#print(x.size())
x = x.view(-1, 32*5*5)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
In [ ]:
def parameter_count(model):
param_tot=0
for name, param in model.named_parameters():
if param.requires_grad:
#print(name, param.data.size(), v_size)
param_size = 1
for d in list(param.data.size()):
param_size *= d
param_tot += param_size
return param_tot
In [ ]:
def train(model, optimizer, epoch_num):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % batch_log_interval == 0:
print('Train Epoch: {} [{: 6d}/{: 6d} ({:2.0f}%)]\tLoss: {:.4f}'.format(
epoch_num, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model):
model.eval()
test_loss, correct = 0., 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).item() # sum up batch loss
pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
pct_correct = 100. * correct / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.1f}%)\n'.format(
test_loss, correct, len(test_loader.dataset), pct_correct))
return pct_correct
In [ ]:
def use_model(model): # , **kwargs
_model = model.to(device)
#optimizer = optim.SGD(_model.parameters(), lr=args.lr, momentum=momentum)
optimizer = torch.optim.Adam(_model.parameters(), lr=lr)
return _model, optimizer
def get_stats_for(model, n_epochs=n_epochs):
print("Total model parameters : %d" % (parameter_count(model),) )
_model, optimizer = use_model(model)
for epoch in range(1, n_epochs + 1):
train(_model, optimizer, epoch)
pct_correct = test(_model)
return pct_correct
get_stats_for(RegularCNNModel())
In [ ]:
In [ ]:
In [ ]:
class IntrinsicDimensionWrapper(torch.nn.Module):
def __init__(self, module, intrinsic_dimension):
super(IntrinsicDimensionWrapper, self).__init__()
self.m = [module] # Hide this from inspection by get_parameters()
self.name_base_localname = []
self.initial_value = dict()
self.random_matrix = dict()
V = torch.nn.Parameter( torch.zeros( (intrinsic_dimension,1) ).to(device) )
self.register_parameter('V', V)
v_size = (intrinsic_dimension, )
for name, param in module.named_parameters():
if param.requires_grad:
print(name, param.data.size(), v_size)
# Saves the current param.data (initial values are the 'origin' of the search)
self.initial_value[name] = v0 = torch.tensor(param.data, requires_grad=False) .to(device)
matrix_size = v0.size() + v_size
self.random_matrix[name] = (
torch.randn(matrix_size, requires_grad=False).to(device)
/ intrinsic_dimension**0.5
)
base, localname = module, name
while '.' in localname:
prefix, localname = localname.split('.', 1)
#print(prefix, name)
base = base.__getattr__(prefix)
self.name_base_localname.append( (name, base, localname) )
for name,base,localname in self.name_base_localname:
delattr(base, localname)
def forward(self, x):
for name,base,localname in self.name_base_localname:
#print(self.initial_value[name].size(), self.random_matrix[name].size(), self.V.size(),
# torch.matmul(self.random_matrix[name], self.V).size())
ray = torch.matmul(self.random_matrix[name], self.V)
param = self.initial_value[name] + torch.squeeze(ray, -1)
setattr(base, localname, param)
module = self.m[0]
x = module(x)
return x
#model_single= torch.nn.Linear(3,4)
model_single = IntrinsicDimensionWrapper( torch.nn.Linear(3, 4), 10 )
#[p[0] for p in model_test.named_parameters()]
[ (p.view(-1), p.requires_grad) for p in model_single.parameters() ]
#model_test.initial_value
In [ ]:
In [ ]:
x = torch.tensor( [1., 5., -1.25] ).to(device)
model_single(x)
In [ ]:
#model_test.V.data[3]=0.
model_single.V.data[3]+=.005
model_single(x)
In [ ]:
opt_test = torch.optim.Adam(model_single.parameters(), lr=lr)
model_single.train()
#data, target = data.to(device), target.to(device)
x_batch = torch.unsqueeze(x,0)
data, target = x_batch, torch.tensor( [1,] ).to(device)
opt_test.zero_grad()
output = model_single(data)
loss = F.nll_loss(output, target)
print(loss)
loss.backward()
opt_test.step()
model_single.V.requires_grad,
#model_single.m[0].weight.grad, model_single.m[0].weight.grad
In [ ]:
In [ ]:
In [ ]:
## Now, let's build the CNN model with Intrinsic Dimension Wrapping...
intrinsic_dimension_guess = 5000
In [ ]:
model_base = RegularCNNModel()
#[name for name,param in model_base.named_parameters()]
model_wrapped = IntrinsicDimensionWrapper( model_base, intrinsic_dimension_guess )
[name for name,param in model_wrapped.named_parameters()]
#[param for param in model_wrapped.parameters()]
In [ ]:
get_stats_for( model_wrapped )
In [ ]:
In [ ]: