Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Normalize supports arbitrary dimensions
Browse files Browse the repository at this point in the history
Assert that dim is integer in Normalize
fmassa committed Apr 13, 2016
1 parent 9cffea5 commit 20f4bcc
Showing 3 changed files with 59 additions and 39 deletions.
69 changes: 33 additions & 36 deletions Normalize.lua
Original file line number Diff line number Diff line change
@@ -1,34 +1,35 @@
local Normalize, parent = torch.class('nn.Normalize', 'nn.Module')

function Normalize:__init(p,eps)
function Normalize:__init(p, dim, eps)
parent.__init(self)
assert(p,'p-norm not provided')
assert(p > 0, p..'-norm not supported')
self.p = p
self.dim = dim or -1
self.eps = eps or 1e-10
end

function Normalize:updateOutput(input)
assert(input:dim() <= 2, 'only 1d layer supported')
local input_size = input:size()
if input:dim() == 1 then
input = input:view(1,-1)
self.dim = self.dim or -1
assert(math.abs(self.dim) <= input:dim(),
'input has less dimensions than the normalization dimension')
assert(self.dim % 1 == 0, 'dimension should be an integer')
local dim = self.dim
if dim < 0 then
dim = input:dim() + dim + 1
end

self._output = self._output or input.new()
self.norm = self.norm or input.new()
self.buffer = self.buffer or input.new()

self._output:resizeAs(input)

if self.p == math.huge then
-- specialization for the infinity norm
self._indices = self._indices or
(torch.type(self.output) == 'torch.CudaTensor' and
torch.CudaTensor() or torch.LongTensor())

self.buffer:abs(input)
torch.max(self.norm, self._indices, self.buffer, 2)
torch.max(self.norm, self._indices, self.buffer, dim)
self.norm:add(self.eps)
else
self.normp = self.normp or input.new()
@@ -37,41 +38,37 @@ function Normalize:updateOutput(input)
else
self.buffer:pow(input,self.p)
end
self.normp:sum(self.buffer,2):add(self.eps)
self.normp:sum(self.buffer, dim):add(self.eps)
self.norm:pow(self.normp,1/self.p)
end
self._output:cdiv(input, self.norm:view(-1,1):expandAs(input))
self.output:cdiv(input, self.norm:expandAs(input))

self.output:view(self._output, input_size)
return self.output
end

function Normalize:updateGradInput(input, gradOutput)
assert(input:dim() <= 2, 'only 1d layer supported')
assert(gradOutput:dim() <= 2, 'only 1d layer supported')

local input_size = input:size()
if input:dim() == 1 then
input = input:view(1,-1)
self.dim = self.dim or -1
assert(math.abs(self.dim) <= input:dim(),
'input has less dimensions than the normalization dimension')
assert(self.dim % 1 == 0, 'dimension should be an integer')
local dim = self.dim
if dim < 0 then
dim = input:dim() + dim + 1
end

local n = input:size(1) -- batch size
local d = input:size(2) -- dimensionality of vectors

self._gradInput = self._gradInput or input.new()
self.cross = self.cross or input.new()
-- compute diagonal term with gradOutput
self._gradInput:resize(n,d)
self.gradInput:resizeAs(input)
if self.p == math.huge then
-- specialization for the inf case
self._gradInput:cmul(self.norm:view(n,1,1):expand(n,d,1),gradOutput)
self.gradInput:cmul(self.norm:expandAs(gradOutput),gradOutput)
self.buffer:resizeAs(input):zero()
self.cross:resize(n,1)
self.cross:gather(input,2,self._indices)
self.cross:resizeAs(self.norm)
self.cross:gather(input,dim,self._indices)
self.cross:cdiv(self.norm)
self.buffer:scatter(2,self._indices,self.cross)
self.buffer:scatter(dim,self._indices,self.cross)
else
self._gradInput:cmul(self.normp:view(n,1):expand(n,d), gradOutput)
self.gradInput:cmul(self.normp:expandAs(gradOutput), gradOutput)
-- small optimizations for different p
-- buffer = input*|input|^(p-2)
if self.p % 2 ~= 0 then
@@ -91,39 +88,39 @@ function Normalize:updateGradInput(input, gradOutput)
end
end
-- compute cross term in two steps
self.cross:resize(n,1)
self.cross:resizeAs(self.norm)

-- instead of having a huge temporary matrix (b1*b2),
-- do the computations as b1*(b2*gradOutput). This avoids redundant
-- computation and also a huge buffer of size n*d^2
self.buffer2 = self.buffer2 or input.new() -- nxd
self.buffer2:cmul(input, gradOutput)
self.cross:sum(self.buffer2, 2)
self.cross:sum(self.buffer2, dim)

self.buffer:cmul(self.cross:expandAs(self.buffer))
self._gradInput:add(-1, self.buffer)
self.gradInput:add(-1, self.buffer)

-- reuse cross buffer for normalization
if self.p == math.huge then
self.cross:cmul(self.norm,self.norm)
else
self.cross:cmul(self.normp,self.norm)
end
self._gradInput:cdiv(self.cross:expand(n,d))
self.gradInput:cdiv(self.cross:expandAs(gradOutput))

self.gradInput:view(self._gradInput, input_size)
return self.gradInput
end

function Normalize:__tostring__()
local s
-- different prints if the norm is integer
if self.p % 1 == 0 then
s = '%s(%d)'
s = '%s(%d,%d)'
else
s = '%s(%f)'
s = '%s(%f,%d)'
end
return string.format(s,torch.type(self),self.p)
local dim = self.dim or -1
return string.format(s,torch.type(self),self.p, dim)
end

function Normalize:type(type, tensorCache)
14 changes: 11 additions & 3 deletions doc/simple.md
Original file line number Diff line number Diff line change
@@ -1149,11 +1149,11 @@ print(B) -- output
## Normalize ##

```lua
module = nn.Normalize(p, [eps])
module = nn.Normalize(p, [dim], [eps])
```
Normalizes the input Tensor to have unit `L_p` norm. The smoothing parameter `eps` prevents division by zero when the input contains all zero elements (default = `1e-10`).
Normalizes the input Tensor to have unit `L_p` norm over dimension `dim` (by default -1, i.e., the last dimension). The smoothing parameter `eps` prevents division by zero when the input contains all zero elements (default = `1e-10`).

Input can be 1D or 2D (in which case it's considered as in batch mode)
The `dim` parameter can take both positivs and negative values (in which case it is counted from the end). Negative dimensions are specially useful if one wants to be invariant to batch-mode.

```lua
A = torch.randn(3, 5)
@@ -1163,6 +1163,14 @@ B = m:forward(A) -- B is also 3 x 5
print(torch.norm(B, 2, 2)) -- norms is [1, 1, 1]
```

Here is an example of normalizing the feature maps of an image
```lua
I = torch.randn(2, 3, 2, 2)
m = nn.Normalize(1, -3) -- the third from the last element
B = m:forward(I)
print(torch.norm(B, 1, 2))
```

`Normalize` has a specialized implementation for the `inf` norm, which corresponds to the maximum norm.
```lua
A = torch.randn(3,5)
15 changes: 15 additions & 0 deletions test.lua
Original file line number Diff line number Diff line change
@@ -624,6 +624,21 @@ function nntest.Normalize()
mytester:assertlt(err, precision, 'error norm '..p..' on state ')
end

-- test on different dimensions
for _,p in pairs({1,2,3,4,torch.uniform()*math.random(1,10),math.huge}) do
local ini = math.random(3,5)
local inj = math.random(3,5)
local ink = math.random(3,5)
local inl = math.random(3,5)
local dim = math.random(1,4)
local input = torch.Tensor(inl, ink, inj, ini):zero()

local module = nn.Normalize(p, dim)

local err = jac.testJacobian(module, input, -2, 2)
mytester:assertlt(err, precision, 'error norm '..p..' on state ')
end

-- test IO correctness
local ini = math.random(3,5)
local inj = math.random(3,5)

0 comments on commit 20f4bcc

Please sign in to comment.