In [3]:
a = torch.Tensor({1,2,3,4,5})
a:mul(3)
print(a)


Out[3]:
  3
  6
  9
 12
 15
[torch.DoubleTensor of dimension 5]


In [1]:
require 'torch'
require 'paths'

In [2]:
mnist = {}

mnist.path_remote = 'https://s3.amazonaws.com/torch7/data/mnist.t7.tgz'
mnist.path_dataset = 'mnist.t7'
mnist.path_trainset = paths.concat(mnist.path_dataset, 'train_32x32.t7')
mnist.path_testset = paths.concat(mnist.path_dataset, 'test_32x32.t7')

In [3]:
function mnist.download()
   if not paths.filep(mnist.path_trainset) or not paths.filep(mnist.path_testset) then
      local remote = mnist.path_remote
      local tar = paths.basename(remote)
      os.execute('wget ' .. remote .. '; ' .. 'tar xvf ' .. tar .. '; rm ' .. tar)
   end
end

In [4]:
function mnist.loadTrainSet(maxLoad, geometry)
   return mnist.loadDataset(mnist.path_trainset, maxLoad, geometry)
end

In [5]:
function mnist.loadTestSet(maxLoad, geometry)
   return mnist.loadDataset(mnist.path_testset, maxLoad, geometry)
end

In [6]:
function mnist.loadDataset(fileName, maxLoad)
   mnist.download()

   local f = torch.load(fileName, 'ascii')
   local data = f.data:type(torch.getdefaulttensortype())
   local labels = f.labels

   local nExample = f.data:size(1)
   if maxLoad and maxLoad > 0 and maxLoad < nExample then
      nExample = maxLoad
      print('<mnist> loading only ' .. nExample .. ' examples')
   end
   data = data[{{1,nExample},{},{},{}}]
   labels = labels[{{1,nExample}}]
   print('<mnist> done')

   local dataset = {}
   dataset.data = data
   dataset.labels = labels

   function dataset:normalize(mean_, std_)
      local mean = mean or data:view(data:size(1), -1):mean(1)
      local std = std_ or data:view(data:size(1), -1):std(1, true)
      for i=1,data:size(1) do
         data[i]:add(-mean[1][i])
         if std[1][i] > 0 then
            tensor:select(2, i):mul(1/std[1][i])
         end
      end
      return mean, std
   end

   function dataset:normalizeGlobal(mean_, std_)
      local std = std_ or data:std()
      local mean = mean_ or data:mean()
      data:add(-mean)
      data:mul(1/std)
      return mean, std
   end

   function dataset:size()
      return nExample
   end

   local labelvector = torch.zeros(10)

   setmetatable(dataset, {__index = function(self, index)
                 local input = self.data[index]
                 local class = self.labels[index]
                 local label = labelvector:zero()
                 label[class] = 1
                 local example = {input, label}
                                       return example
   end})

   return dataset
end

In [7]:
nbTrainingPatches = 60000
nbTestingPatches = 10000
geometry = {32,32}

In [8]:
-- create training set and normalize
trainData = mnist.loadTrainSet(nbTrainingPatches, geometry)
trainData:normalizeGlobal(mean, std)

-- create test set and normalize
testData = mnist.loadTestSet(nbTestingPatches, geometry)
testData:normalizeGlobal(mean, std)


Out[8]:
<mnist> done	
Out[8]:
<mnist> done	

In [18]:
print(testData.data:size())


Out[18]:
 10000
     1
    32
    32
[torch.LongStorage of size 4]


In [19]:
print(trainData.data:size())


Out[19]:
 60000
     1
    32
    32
[torch.LongStorage of size 4]