diff --git a/VolumetricDilatedMaxPooling.lua b/VolumetricDilatedMaxPooling.lua
new file mode 100644
index 000000000..050e2c917
--- /dev/null
+++ b/VolumetricDilatedMaxPooling.lua
@@ -0,0 +1,64 @@
+local THNN = require 'nn.THNN'
+local VolumetricDilatedMaxPooling, parent = torch.class('nn.VolumetricDilatedMaxPooling', 'nn.VolumetricMaxPooling')
+
+function VolumetricDilatedMaxPooling:__init(kT, kW, kH, dT, dW, dH, padT, padW, padH, dilationT, dilationW, dilationH)
+ parent.__init(self, kT, kW, kH, dT, dW, dH, padT, padW, padH)
+
+ self.dilationT = dilationT or 1
+ self.dilationW = dilationW or 1
+ self.dilationH = dilationH or 1
+
+end
+
+function VolumetricDilatedMaxPooling:updateOutput(input)
+ local dims = input:dim()
+ self.itime = input:size(dims-2)
+ self.iheight = input:size(dims-1)
+ self.iwidth = input:size(dims)
+
+ self.indices = self.indices or input.new()
+ input.THNN.VolumetricDilatedMaxPooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.indices:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ self.dilationT, self.dilationW, self.dilationH,
+ self.ceil_mode
+ )
+ return self.output
+end
+
+function VolumetricDilatedMaxPooling:updateGradInput(input, gradOutput)
+ input.THNN.VolumetricDilatedMaxPooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.indices:cdata(),
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ self.dilationT, self.dilationW, self.dilationH
+ )
+ return self.gradInput
+end
+
+function VolumetricDilatedMaxPooling:clearState()
+ if self.indices then
+ self.indices:set()
+ end
+ return parent.clearState(self)
+end
+
+function VolumetricDilatedMaxPooling:__tostring__()
+ local s = string.format('%s(%dx%dx%d, %d,%d,%d', torch.type(self),
+ self.kT, self.kW, self.kH, self.dT, self.dW, self.dH)
+ if (self.padT or self.padW or self.padH) and
+ (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padT.. ',' .. self.padW .. ','.. self.padH
+ end
+ s = s .. ', ' .. self.dilationT .. ',' .. self.dilationW .. ',' .. self.dilationH
+ s = s .. ')'
+
+ return s
+end
diff --git a/doc/convolution.md b/doc/convolution.md
index 96d92d9b3..b1a0d4c7c 100644
--- a/doc/convolution.md
+++ b/doc/convolution.md
@@ -37,6 +37,7 @@ a kernel for computing the weighted average in a neighborhood ;
* [VolumetricFullConvolution](#nn.VolumetricFullConvolution) : a 3D full convolution over an input video (a sequence of images) ;
* [VolumetricDilatedConvolution](#nn.VolumetricDilatedConvolution) : a 3D dilated convolution over an input image ;
* [VolumetricMaxPooling](#nn.VolumetricMaxPooling) : a 3D max-pooling operation over an input video.
+ * [VolumetricDilatedMaxPooling](#nn.VolumetricDilatedMaxPooling) : a 3D dilated max-pooling operation over an input video ;
* [VolumetricAveragePooling](#nn.VolumetricAveragePooling) : a 3D average-pooling operation over an input video.
* [VolumetricMaxUnpooling](#nn.VolumetricMaxUnpooling) : a 3D max-unpooling operation.
* [VolumetricReplicationPadding](#nn.VolumetricReplicationPadding) : Pads a volumetric feature map with the value at the edge of the input borders. ;
@@ -1022,6 +1023,30 @@ Applies 3D max-pooling operation in `kTxkWxkH` regions by step size
`dTxdWxdH` steps. The number of output features is equal to the number of
input planes / dT. The input can optionally be padded with zeros. Padding should be smaller than half of kernel size. That is, `padT < kT/2`, `padW < kW/2` and `padH < kH/2`.
+
+### VolumetricDilatedMaxPooling ###
+
+```lua
+module = nn.VolumetricDilatedMaxPooling(kT, kW, kH [, dT, dW, dH, padT, padW, padH, dilationT, dilationW, dilationH])
+```
+
+Also sometimes referred to as **atrous pooling**.
+Applies 3D dilated max-pooling operation in `kTxkWxkH` regions by step size
+`dTxdWxdH` steps. The number of output features is equal to the number of
+input planes. If `dilationT`, `dilationW` and `dilationH` are not provided, this is equivalent to performing normal `nn.VolumetricMaxPooling`.
+
+If the input image is a 4D tensor `nInputPlane x depth x height x width`, the output
+image size will be `nOutputPlane x otime x oheight x owidth` where
+
+```lua
+otime = op((depth - (dilationT * (kT - 1) + 1) + 2*padT) / dT + 1)
+owidth = op((width - (dilationW * (kW - 1) + 1) + 2*padW) / dW + 1)
+oheight = op((height - (dilationH * (kH - 1) + 1) + 2*padH) / dH + 1)
+```
+
+`op` is a rounding operator. By default, it is `floor`. It can be changed
+by calling `:ceil()` or `:floor()` methods.
+
### VolumetricAveragePooling ###
diff --git a/init.lua b/init.lua
index 98edfc532..70027a18c 100644
--- a/init.lua
+++ b/init.lua
@@ -128,6 +128,7 @@ require('nn.VolumetricConvolution')
require('nn.VolumetricFullConvolution')
require('nn.VolumetricDilatedConvolution')
require('nn.VolumetricMaxPooling')
+require('nn.VolumetricDilatedMaxPooling')
require('nn.VolumetricMaxUnpooling')
require('nn.VolumetricAveragePooling')
require('nn.VolumetricBatchNormalization')
diff --git a/lib/THNN/generic/THNN.h b/lib/THNN/generic/THNN.h
index 319ffc798..d7ecb13e8 100644
--- a/lib/THNN/generic/THNN.h
+++ b/lib/THNN/generic/THNN.h
@@ -1122,6 +1122,26 @@ TH_API void THNN_(VolumetricMaxPooling_updateGradInput)(
int dT, int dW, int dH,
int pT, int pW, int pH);
+TH_API void THNN_(VolumetricDilatedMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *indices,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH,
+ int dilationT, int dilationW, int dilationH,
+ bool ceilMode);
+TH_API void THNN_(VolumetricDilatedMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *indices,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH,
+ int dilationT, int dilationW, int dilationH);
+
TH_API void THNN_(VolumetricMaxUnpooling_updateOutput)(
THNNState *state,
THTensor *input,
diff --git a/lib/THNN/generic/VolumetricDilatedMaxPooling.c b/lib/THNN/generic/VolumetricDilatedMaxPooling.c
new file mode 100644
index 000000000..0db41aef3
--- /dev/null
+++ b/lib/THNN/generic/VolumetricDilatedMaxPooling.c
@@ -0,0 +1,415 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricDilatedMaxPooling.c"
+#else
+
+static void THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)(
+ real *input_p,
+ real *output_p,
+ real *indz_p,
+ long nslices,
+ long itime,
+ long iwidth,
+ long iheight,
+ long otime,
+ long owidth,
+ long oheight,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ int dilationT,
+ int dilationW,
+ int dilationH)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ /* loop over output */
+ long i, j, ti;
+ for (ti = 0; ti < otime; ti++)
+ {
+ for (i = 0; i < oheight; i++)
+ {
+ for (j = 0; j < owidth; j++)
+ {
+ /* local pointers */
+
+ long start_t = ti * dT - pT;
+ long start_h = i * dH - pH;
+ long start_w = j * dW - pW;
+
+ long kernel_t = fminf(kT, kT + start_t);
+ long kernel_h = fminf(kH, kH + start_h);
+ long kernel_w = fminf(kW, kW + start_w);
+
+ while(start_t < 0)
+ start_t += dilationT;
+ while(start_h < 0)
+ start_h += dilationH;
+ while(start_w < 0)
+ start_w += dilationW;
+
+ real *ip = input_p + k * itime * iwidth * iheight
+ + start_t * iwidth * iheight + start_h * iwidth + start_w;
+ real *op = output_p + k * otime * owidth * oheight
+ + ti * owidth * oheight + i * owidth + j;
+ real *indzp = indz_p + k * otime * owidth * oheight
+ + ti * owidth * oheight + i * owidth + j;
+
+ /* compute local max: */
+ real maxval = -THInf;
+ int x,y,z;
+ int mx, my, mz;
+
+ for (z = 0; z < kernel_t; z++)
+ {
+ for (y = 0; y < kernel_h; y++)
+ {
+ for (x = 0; x < kernel_w; x++)
+ {
+ if ((start_t + z * dilationT < itime) && (start_h + y * dilationH < iheight) && (start_w + x * dilationW < iwidth))
+ {
+ real val = *(ip + z * dilationT * iwidth * iheight + y * dilationH * iwidth + x * dilationW);
+ if (val > maxval)
+ {
+ maxval = val;
+ // Store indices w.r.t the kernel dimension
+ mz = z + (kT - kernel_t);
+ my = y + (kH - kernel_h);
+ mx = x + (kW - kernel_w);
+ }
+ }
+ }
+ }
+ }
+
+ // set max values
+ ((unsigned char*)(indzp))[0] = mz;
+ ((unsigned char*)(indzp))[1] = my;
+ ((unsigned char*)(indzp))[2] = mx;
+ ((unsigned char*)(indzp))[3] = 0;
+
+ /* set output to local max */
+ *op = maxval;
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricDilatedMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *indices,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ int dilationT,
+ int dilationW,
+ int dilationH,
+ bool ceilMode)
+{
+ long nslices;
+ long itime;
+ long iheight;
+ long iwidth;
+ long otime;
+ long oheight;
+ long owidth;
+ real *input_data;
+ real *output_data;
+ real *indices_data;
+
+ THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
+ "4D or 5D (batch-mode) tensor expected"
+ );
+
+ int dimN = 0;
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ if (input->nDimension == 5)
+ {
+ dimN++;
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2,
+ "input image smaller than kernel size"
+ );
+
+ THArgCheck(kT/2 >= pT && kW/2 >= pW && kH/2 >= pH, 2,
+ "pad should be smaller than half of kernel size"
+ );
+
+ /* sizes */
+ nslices = input->size[dimN];
+ itime = input->size[dimt];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ if (ceilMode)
+ {
+ otime = (int)(ceil((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1;
+ oheight = (int)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1;
+ owidth = (int)(ceil((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1;
+ }
+ else
+ {
+ otime = (int)(floor((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1;
+ oheight = (int)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1;
+ owidth = (int)(floor((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1;
+ }
+
+ if (otime < 1 || owidth < 1 || oheight < 1)
+ THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
+ nslices,itime,iheight,iwidth,nslices,otime,oheight,owidth);
+
+ if (pT || pW || pH)
+ {
+ // ensure that the last pooling starts inside the image
+ if ((otime - 1)*dT >= itime + pT)
+ --otime;
+ if ((oheight - 1)*dH >= iheight + pH)
+ --oheight;
+ if ((owidth - 1)*dW >= iwidth + pW)
+ --owidth;
+ }
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ if (input->nDimension == 4) /* non-batch mode */
+ {
+ /* resize output */
+ THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
+ /* indices will contain ti,i,j uchar locations packed into float/double */
+ THTensor_(resize4d)(indices, nslices, otime, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THTensor_(data)(indices);
+
+ THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)(
+ input_data, output_data,
+ indices_data,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ kT, kW, kH,
+ dT, dW, dH,
+ pT, pW, pH,
+ dilationT, dilationW, dilationH
+ );
+ }
+ else /* batch mode */
+ {
+ long p;
+ long nBatch = input->size[0];
+
+ long istride = nslices * itime * iwidth * iheight;
+ long ostride = nslices * otime * owidth * oheight;
+
+ /* resize output */
+ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
+ /* indices will contain ti,i,j locations for each output point */
+ THTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THTensor_(data)(indices);
+
+#pragma omp parallel for private(p)
+ for (p=0; p < nBatch; p++)
+ {
+ THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)(
+ input_data + p * istride,
+ output_data + p * ostride,
+ indices_data + p * ostride,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ kT, kW, kH,
+ dT, dW, dH,
+ pT, pW, pH,
+ dilationT, dilationW, dilationH
+ );
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+}
+
+static void THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)(
+ real *gradInput_p,
+ real *gradOutput_p,
+ real *indz_p,
+ long nslices,
+ long itime,
+ long iwidth,
+ long iheight,
+ long otime,
+ long owidth,
+ long oheight,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ int dilationT,
+ int dilationW,
+ int dilationH)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ real *gradInput_p_k = gradInput_p + k * itime * iwidth * iheight;
+ real *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight;
+ real *indz_p_k = indz_p + k * otime * owidth * oheight;
+
+ /* calculate max points */
+ long ti, i, j;
+ for (ti = 0; ti < otime; ti++)
+ {
+ for (i = 0; i < oheight; i++)
+ {
+ for (j = 0; j < owidth; j++)
+ {
+ /* retrieve position of max */
+ real * indzp = &indz_p_k[ti * oheight * owidth + i * owidth + j];
+ long maxti = ((unsigned char*)(indzp))[0] * dilationT + ti * dT - pT;
+ long maxi = ((unsigned char*)(indzp))[1] * dilationH + i * dH - pH;
+ long maxj = ((unsigned char*)(indzp))[2] * dilationW + j * dW - pW;
+
+ /* update gradient */
+ gradInput_p_k[maxti * iheight * iwidth + maxi * iwidth + maxj] +=
+ gradOutput_p_k[ti * oheight * owidth + i * owidth + j];
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricDilatedMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *indices,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ int dilationT,
+ int dilationW,
+ int dilationH)
+{
+ int nslices;
+ int itime;
+ int iheight;
+ int iwidth;
+ int otime;
+ int oheight;
+ int owidth;
+ real *gradInput_data;
+ real *gradOutput_data;
+ real *indices_data;
+
+ int dimN = 0;
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ if (input->nDimension == 5)
+ {
+ dimN++;
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimN];
+ itime = input->size[dimt];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ otime = gradOutput->size[dimt];
+ oheight = gradOutput->size[dimh];
+ owidth = gradOutput->size[dimw];
+
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ indices_data = THTensor_(data)(indices);
+
+ /* backprop */
+ if (input->nDimension == 4) /* non-batch mode*/
+ {
+ THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)(
+ gradInput_data, gradOutput_data,
+ indices_data,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ dT, dW, dH,
+ pT, pW, pH,
+ dilationT, dilationW, dilationH
+ );
+ }
+ else /* batch mode */
+ {
+ long p;
+ long nBatch = input->size[0];
+
+ long istride = nslices * itime * iwidth * iheight;
+ long ostride = nslices * otime * owidth * oheight;
+
+#pragma omp parallel for private(p)
+ for (p = 0; p < nBatch; p++)
+ {
+ THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)(
+ gradInput_data + p * istride,
+ gradOutput_data + p * ostride,
+ indices_data + p * ostride,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ dT, dW, dH,
+ pT, pW, pH,
+ dilationT, dilationW, dilationH
+ );
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/lib/THNN/generic/VolumetricMaxPooling.c b/lib/THNN/generic/VolumetricMaxPooling.c
index 053c02c02..dc376e6a7 100644
--- a/lib/THNN/generic/VolumetricMaxPooling.c
+++ b/lib/THNN/generic/VolumetricMaxPooling.c
@@ -2,101 +2,6 @@
#define TH_GENERIC_FILE "generic/VolumetricMaxPooling.c"
#else
-static void THNN_(VolumetricMaxPooling_updateOutput_frame)(
- real *input_p,
- real *output_p,
- real *indz_p,
- long nslices,
- long itime,
- long iwidth,
- long iheight,
- long otime,
- long owidth,
- long oheight,
- int kT,
- int kW,
- int kH,
- int dT,
- int dW,
- int dH,
- int pT,
- int pW,
- int pH)
-{
- long k;
-#pragma omp parallel for private(k)
- for (k = 0; k < nslices; k++)
- {
- /* loop over output */
- long i, j, ti;
- for (ti = 0; ti < otime; ti++)
- {
- for (i = 0; i < oheight; i++)
- {
- for (j = 0; j < owidth; j++)
- {
- /* local pointers */
-
- long start_t = ti * dT - pT;
- long start_h = i * dH - pH;
- long start_w = j * dW - pW;
-
- long kernel_t = fminf(kT, kT + start_t);
- long kernel_h = fminf(kH, kH + start_h);
- long kernel_w = fminf(kW, kW + start_w);
-
- start_t = fmaxf(start_t, 0);
- start_h = fmaxf(start_h, 0);
- start_w = fmaxf(start_w, 0);
-
- real *ip = input_p + k * itime * iwidth * iheight
- + start_t * iwidth * iheight + start_h * iwidth + start_w;
- real *op = output_p + k * otime * owidth * oheight
- + ti * owidth * oheight + i * owidth + j;
- real *indzp = indz_p + k * otime * owidth * oheight
- + ti * owidth * oheight + i * owidth + j;
-
- /* compute local max: */
- real maxval = -THInf;
- int x,y,z;
- int mx, my, mz;
-
- for (z = 0; z < kernel_t; z++)
- {
- for (y = 0; y < kernel_h; y++)
- {
- for (x = 0; x < kernel_w; x++)
- {
- if ((start_t + z < itime) && (start_h + y < iheight) && (start_w + x < iwidth))
- {
- real val = *(ip + z * iwidth * iheight + y * iwidth + x);
- if (val > maxval)
- {
- maxval = val;
- // Store indices w.r.t the kernel dimension
- mz = z + (kT - kernel_t);
- my = y + (kH - kernel_h);
- mx = x + (kW - kernel_w);
- }
- }
- }
- }
- }
-
- // set max values
- ((unsigned char*)(indzp))[0] = mz;
- ((unsigned char*)(indzp))[1] = my;
- ((unsigned char*)(indzp))[2] = mx;
- ((unsigned char*)(indzp))[3] = 0;
-
- /* set output to local max */
- *op = maxval;
- }
- }
- }
- }
-}
-
void THNN_(VolumetricMaxPooling_updateOutput)(
THNNState *state,
THTensor *input,
@@ -113,181 +18,10 @@ void THNN_(VolumetricMaxPooling_updateOutput)(
int pH,
bool ceilMode)
{
- long nslices;
- long itime;
- long iheight;
- long iwidth;
- long otime;
- long oheight;
- long owidth;
- real *input_data;
- real *output_data;
- real *indices_data;
-
- THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
- "4D or 5D (batch-mode) tensor expected"
- );
-
- int dimN = 0;
- int dimt = 1;
- int dimh = 2;
- int dimw = 3;
-
- if (input->nDimension == 5)
- {
- dimN++;
- dimt++;
- dimh++;
- dimw++;
- }
-
- THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2,
- "input image smaller than kernel size"
- );
-
- THArgCheck(kT/2 >= pT && kW/2 >= pW && kH/2 >= pH, 2,
- "pad should be smaller than half of kernel size"
- );
-
- /* sizes */
- nslices = input->size[dimN];
- itime = input->size[dimt];
- iheight = input->size[dimh];
- iwidth = input->size[dimw];
- if (ceilMode)
- {
- otime = (int)(ceil((float)(itime - kT + 2 * pT) / dT) + 1);
- oheight = (int)(ceil((float)(iheight - kH + 2 * pH) / dH) + 1);
- owidth = (int)(ceil((float)(iwidth - kW + 2 * pW) / dW) + 1);
- }
- else
- {
- otime = (int)(floor((float)(itime - kT + 2 * pT) / dT) + 1);
- oheight = (int)(floor((float)(iheight - kH + 2 * pH) / dH) + 1);
- owidth = (int)(floor((float)(iwidth - kW + 2 * pW) / dW) + 1);
- }
-
- if (pT || pW || pH)
- {
- // ensure that the last pooling starts inside the image
- if ((otime - 1)*dT >= itime + pT)
- --otime;
- if ((oheight - 1)*dH >= iheight + pH)
- --oheight;
- if ((owidth - 1)*dW >= iwidth + pW)
- --owidth;
- }
-
- /* get contiguous input */
- input = THTensor_(newContiguous)(input);
-
- if (input->nDimension == 4) /* non-batch mode */
- {
- /* resize output */
- THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
- /* indices will contain ti,i,j uchar locations packed into float/double */
- THTensor_(resize4d)(indices, nslices, otime, oheight, owidth);
-
- input_data = THTensor_(data)(input);
- output_data = THTensor_(data)(output);
- indices_data = THTensor_(data)(indices);
-
- THNN_(VolumetricMaxPooling_updateOutput_frame)(
- input_data, output_data,
- indices_data,
- nslices,
- itime, iwidth, iheight,
- otime, owidth, oheight,
- kT, kW, kH,
- dT, dW, dH,
- pT, pW, pH
- );
- }
- else /* batch mode */
- {
- long p;
- long nBatch = input->size[0];
-
- long istride = nslices * itime * iwidth * iheight;
- long ostride = nslices * otime * owidth * oheight;
-
- /* resize output */
- THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
- /* indices will contain ti,i,j locations for each output point */
- THTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth);
-
- input_data = THTensor_(data)(input);
- output_data = THTensor_(data)(output);
- indices_data = THTensor_(data)(indices);
-
-#pragma omp parallel for private(p)
- for (p=0; p < nBatch; p++)
- {
- THNN_(VolumetricMaxPooling_updateOutput_frame)(
- input_data + p * istride,
- output_data + p * ostride,
- indices_data + p * ostride,
- nslices,
- itime, iwidth, iheight,
- otime, owidth, oheight,
- kT, kW, kH,
- dT, dW, dH,
- pT, pW, pH
- );
- }
- }
-
- /* cleanup */
- THTensor_(free)(input);
-}
-
-static void THNN_(VolumetricMaxPooling_updateGradInput_frame)(
- real *gradInput_p,
- real *gradOutput_p,
- real *indz_p,
- long nslices,
- long itime,
- long iwidth,
- long iheight,
- long otime,
- long owidth,
- long oheight,
- int dT,
- int dW,
- int dH,
- int pT,
- int pW,
- int pH)
-{
- long k;
-#pragma omp parallel for private(k)
- for (k = 0; k < nslices; k++)
- {
- real *gradInput_p_k = gradInput_p + k * itime * iwidth * iheight;
- real *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight;
- real *indz_p_k = indz_p + k * otime * owidth * oheight;
-
- /* calculate max points */
- long ti, i, j;
- for (ti = 0; ti < otime; ti++)
- {
- for (i = 0; i < oheight; i++)
- {
- for (j = 0; j < owidth; j++)
- {
- /* retrieve position of max */
- real * indzp = &indz_p_k[ti * oheight * owidth + i * owidth + j];
- long maxti = ((unsigned char*)(indzp))[0] + ti * dT - pT;
- long maxi = ((unsigned char*)(indzp))[1] + i * dH - pH;
- long maxj = ((unsigned char*)(indzp))[2] + j * dW - pW;
-
- /* update gradient */
- gradInput_p_k[maxti * iheight * iwidth + maxi * iwidth + maxj] +=
- gradOutput_p_k[ti * oheight * owidth + i * owidth + j];
- }
- }
- }
- }
+ THNN_(VolumetricDilatedMaxPooling_updateOutput)(
+ state, input, output, indices,
+ kT, kW, kH, dT, dW, dH,
+ pT, pW, pH, 1, 1, 1, ceilMode);
}
void THNN_(VolumetricMaxPooling_updateGradInput)(
@@ -303,90 +37,9 @@ void THNN_(VolumetricMaxPooling_updateGradInput)(
int pW,
int pH)
{
- int nslices;
- int itime;
- int iheight;
- int iwidth;
- int otime;
- int oheight;
- int owidth;
- real *gradInput_data;
- real *gradOutput_data;
- real *indices_data;
-
- int dimN = 0;
- int dimt = 1;
- int dimh = 2;
- int dimw = 3;
-
- /* get contiguous gradOutput */
- gradOutput = THTensor_(newContiguous)(gradOutput);
-
- /* resize */
- THTensor_(resizeAs)(gradInput, input);
- THTensor_(zero)(gradInput);
-
- if (input->nDimension == 5)
- {
- dimN++;
- dimt++;
- dimh++;
- dimw++;
- }
-
- /* sizes */
- nslices = input->size[dimN];
- itime = input->size[dimt];
- iheight = input->size[dimh];
- iwidth = input->size[dimw];
- otime = gradOutput->size[dimt];
- oheight = gradOutput->size[dimh];
- owidth = gradOutput->size[dimw];
-
- /* get raw pointers */
- gradInput_data = THTensor_(data)(gradInput);
- gradOutput_data = THTensor_(data)(gradOutput);
- indices_data = THTensor_(data)(indices);
-
- /* backprop */
- if (input->nDimension == 4) /* non-batch mode*/
- {
- THNN_(VolumetricMaxPooling_updateGradInput_frame)(
- gradInput_data, gradOutput_data,
- indices_data,
- nslices,
- itime, iwidth, iheight,
- otime, owidth, oheight,
- dT, dW, dH,
- pT, pW, pH
- );
- }
- else /* batch mode */
- {
- long p;
- long nBatch = input->size[0];
-
- long istride = nslices * itime * iwidth * iheight;
- long ostride = nslices * otime * owidth * oheight;
-
-#pragma omp parallel for private(p)
- for (p = 0; p < nBatch; p++)
- {
- THNN_(VolumetricMaxPooling_updateGradInput_frame)(
- gradInput_data + p * istride,
- gradOutput_data + p * ostride,
- indices_data + p * ostride,
- nslices,
- itime, iwidth, iheight,
- otime, owidth, oheight,
- dT, dW, dH,
- pT, pW, pH
- );
- }
- }
-
- /* cleanup */
- THTensor_(free)(gradOutput);
+ THNN_(VolumetricDilatedMaxPooling_updateGradInput)(
+ state, input, gradOutput, gradInput, indices,
+ dT, dW, dH, pT, pW, pH, 1, 1, 1);
}
#endif
diff --git a/lib/THNN/init.c b/lib/THNN/init.c
index d26c509d6..c75fa8871 100644
--- a/lib/THNN/init.c
+++ b/lib/THNN/init.c
@@ -172,6 +172,9 @@
#include "generic/VolumetricMaxPooling.c"
#include "THGenerateFloatTypes.h"
+#include "generic/VolumetricDilatedMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
#include "generic/VolumetricMaxUnpooling.c"
#include "THGenerateFloatTypes.h"
diff --git a/test.lua b/test.lua
index 6673fed1d..0b57626a8 100644
--- a/test.lua
+++ b/test.lua
@@ -4223,6 +4223,55 @@ function nntest.VolumetricMaxPooling()
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ')
end
+function nntest.VolumetricDilatedMaxPooling()
+ for _,ceil_mode in pairs({true,false}) do
+ local from = math.random(2,3)
+ local kt = math.random(3,4)
+ local ki = math.random(3,4)
+ local kj = math.random(3,4)
+ local st = math.random(2,3)
+ local si = math.random(2,3)
+ local sj = math.random(2,3)
+ local outt = math.random(3,4)
+ local outi = math.random(3,4)
+ local outj = math.random(3,4)
+ local padT = math.min(math.random(0,1),math.floor(kt/2))
+ local padW = math.min(math.random(0,1),math.floor(ki/2))
+ local padH = math.min(math.random(0,1),math.floor(kj/2))
+ local dilationT = math.random(1,3)
+ local dilationW = math.random(1,3)
+ local dilationH = math.random(1,3)
+ local int = (outt-1)*st+(dilationT*(kt-1)+1)-2*padT
+ local ini = (outi-1)*si+(dilationW*(ki-1)+1)-2*padW
+ local inj = (outj-1)*sj+(dilationH*(kj-1)+1)-2*padH
+
+ local ceil_string = ceil_mode and 'ceil' or 'floor'
+ local module = nn.VolumetricDilatedMaxPooling(kt,ki,kj,st,si,sj,padT,padW,padH,dilationT,dilationW,dilationH)
+ if ceil_mode then module:ceil() else module:floor() end
+ local input = torch.rand(from,int,inj,ini)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+
+ -- batch
+ local nbatch = math.random(2,5)
+ input = torch.rand(nbatch,from,int,inj,ini)
+ module = nn.VolumetricDilatedMaxPooling(kt,ki,kj,st,si,sj,padT,padW,padH,dilationT,dilationW,dilationH)
+ if ceil_mode then module:ceil() else module:floor() end
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state (Batch)')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ')
+ end
+end
+
function nntest.VolumetricMaxUnpooling()
local from = math.random(2,3)
local kt = math.random(3,4)