diff --git a/VolumetricDilatedMaxPooling.lua b/VolumetricDilatedMaxPooling.lua new file mode 100644 index 000000000..050e2c917 --- /dev/null +++ b/VolumetricDilatedMaxPooling.lua @@ -0,0 +1,64 @@ +local THNN = require 'nn.THNN' +local VolumetricDilatedMaxPooling, parent = torch.class('nn.VolumetricDilatedMaxPooling', 'nn.VolumetricMaxPooling') + +function VolumetricDilatedMaxPooling:__init(kT, kW, kH, dT, dW, dH, padT, padW, padH, dilationT, dilationW, dilationH) + parent.__init(self, kT, kW, kH, dT, dW, dH, padT, padW, padH) + + self.dilationT = dilationT or 1 + self.dilationW = dilationW or 1 + self.dilationH = dilationH or 1 + +end + +function VolumetricDilatedMaxPooling:updateOutput(input) + local dims = input:dim() + self.itime = input:size(dims-2) + self.iheight = input:size(dims-1) + self.iwidth = input:size(dims) + + self.indices = self.indices or input.new() + input.THNN.VolumetricDilatedMaxPooling_updateOutput( + input:cdata(), + self.output:cdata(), + self.indices:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + self.dilationT, self.dilationW, self.dilationH, + self.ceil_mode + ) + return self.output +end + +function VolumetricDilatedMaxPooling:updateGradInput(input, gradOutput) + input.THNN.VolumetricDilatedMaxPooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.indices:cdata(), + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + self.dilationT, self.dilationW, self.dilationH + ) + return self.gradInput +end + +function VolumetricDilatedMaxPooling:clearState() + if self.indices then + self.indices:set() + end + return parent.clearState(self) +end + +function VolumetricDilatedMaxPooling:__tostring__() + local s = string.format('%s(%dx%dx%d, %d,%d,%d', torch.type(self), + self.kT, self.kW, self.kH, self.dT, self.dW, self.dH) + if (self.padT or self.padW or self.padH) and + (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padT.. ',' .. self.padW .. ','.. self.padH + end + s = s .. ', ' .. self.dilationT .. ',' .. self.dilationW .. ',' .. self.dilationH + s = s .. ')' + + return s +end diff --git a/doc/convolution.md b/doc/convolution.md index 96d92d9b3..b1a0d4c7c 100644 --- a/doc/convolution.md +++ b/doc/convolution.md @@ -37,6 +37,7 @@ a kernel for computing the weighted average in a neighborhood ; * [VolumetricFullConvolution](#nn.VolumetricFullConvolution) : a 3D full convolution over an input video (a sequence of images) ; * [VolumetricDilatedConvolution](#nn.VolumetricDilatedConvolution) : a 3D dilated convolution over an input image ; * [VolumetricMaxPooling](#nn.VolumetricMaxPooling) : a 3D max-pooling operation over an input video. + * [VolumetricDilatedMaxPooling](#nn.VolumetricDilatedMaxPooling) : a 3D dilated max-pooling operation over an input video ; * [VolumetricAveragePooling](#nn.VolumetricAveragePooling) : a 3D average-pooling operation over an input video. * [VolumetricMaxUnpooling](#nn.VolumetricMaxUnpooling) : a 3D max-unpooling operation. * [VolumetricReplicationPadding](#nn.VolumetricReplicationPadding) : Pads a volumetric feature map with the value at the edge of the input borders. ; @@ -1022,6 +1023,30 @@ Applies 3D max-pooling operation in `kTxkWxkH` regions by step size `dTxdWxdH` steps. The number of output features is equal to the number of input planes / dT. The input can optionally be padded with zeros. Padding should be smaller than half of kernel size. That is, `padT < kT/2`, `padW < kW/2` and `padH < kH/2`. + +### VolumetricDilatedMaxPooling ### + +```lua +module = nn.VolumetricDilatedMaxPooling(kT, kW, kH [, dT, dW, dH, padT, padW, padH, dilationT, dilationW, dilationH]) +``` + +Also sometimes referred to as **atrous pooling**. +Applies 3D dilated max-pooling operation in `kTxkWxkH` regions by step size +`dTxdWxdH` steps. The number of output features is equal to the number of +input planes. If `dilationT`, `dilationW` and `dilationH` are not provided, this is equivalent to performing normal `nn.VolumetricMaxPooling`. + +If the input image is a 4D tensor `nInputPlane x depth x height x width`, the output +image size will be `nOutputPlane x otime x oheight x owidth` where + +```lua +otime = op((depth - (dilationT * (kT - 1) + 1) + 2*padT) / dT + 1) +owidth = op((width - (dilationW * (kW - 1) + 1) + 2*padW) / dW + 1) +oheight = op((height - (dilationH * (kH - 1) + 1) + 2*padH) / dH + 1) +``` + +`op` is a rounding operator. By default, it is `floor`. It can be changed +by calling `:ceil()` or `:floor()` methods. + ### VolumetricAveragePooling ### diff --git a/init.lua b/init.lua index 98edfc532..70027a18c 100644 --- a/init.lua +++ b/init.lua @@ -128,6 +128,7 @@ require('nn.VolumetricConvolution') require('nn.VolumetricFullConvolution') require('nn.VolumetricDilatedConvolution') require('nn.VolumetricMaxPooling') +require('nn.VolumetricDilatedMaxPooling') require('nn.VolumetricMaxUnpooling') require('nn.VolumetricAveragePooling') require('nn.VolumetricBatchNormalization') diff --git a/lib/THNN/generic/THNN.h b/lib/THNN/generic/THNN.h index 319ffc798..d7ecb13e8 100644 --- a/lib/THNN/generic/THNN.h +++ b/lib/THNN/generic/THNN.h @@ -1122,6 +1122,26 @@ TH_API void THNN_(VolumetricMaxPooling_updateGradInput)( int dT, int dW, int dH, int pT, int pW, int pH); +TH_API void THNN_(VolumetricDilatedMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *indices, + int kT, int kW, int kH, + int dT, int dW, int dH, + int pT, int pW, int pH, + int dilationT, int dilationW, int dilationH, + bool ceilMode); +TH_API void THNN_(VolumetricDilatedMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *indices, + int dT, int dW, int dH, + int pT, int pW, int pH, + int dilationT, int dilationW, int dilationH); + TH_API void THNN_(VolumetricMaxUnpooling_updateOutput)( THNNState *state, THTensor *input, diff --git a/lib/THNN/generic/VolumetricDilatedMaxPooling.c b/lib/THNN/generic/VolumetricDilatedMaxPooling.c new file mode 100644 index 000000000..0db41aef3 --- /dev/null +++ b/lib/THNN/generic/VolumetricDilatedMaxPooling.c @@ -0,0 +1,415 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricDilatedMaxPooling.c" +#else + +static void THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)( + real *input_p, + real *output_p, + real *indz_p, + long nslices, + long itime, + long iwidth, + long iheight, + long otime, + long owidth, + long oheight, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + int dilationT, + int dilationW, + int dilationH) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + /* loop over output */ + long i, j, ti; + for (ti = 0; ti < otime; ti++) + { + for (i = 0; i < oheight; i++) + { + for (j = 0; j < owidth; j++) + { + /* local pointers */ + + long start_t = ti * dT - pT; + long start_h = i * dH - pH; + long start_w = j * dW - pW; + + long kernel_t = fminf(kT, kT + start_t); + long kernel_h = fminf(kH, kH + start_h); + long kernel_w = fminf(kW, kW + start_w); + + while(start_t < 0) + start_t += dilationT; + while(start_h < 0) + start_h += dilationH; + while(start_w < 0) + start_w += dilationW; + + real *ip = input_p + k * itime * iwidth * iheight + + start_t * iwidth * iheight + start_h * iwidth + start_w; + real *op = output_p + k * otime * owidth * oheight + + ti * owidth * oheight + i * owidth + j; + real *indzp = indz_p + k * otime * owidth * oheight + + ti * owidth * oheight + i * owidth + j; + + /* compute local max: */ + real maxval = -THInf; + int x,y,z; + int mx, my, mz; + + for (z = 0; z < kernel_t; z++) + { + for (y = 0; y < kernel_h; y++) + { + for (x = 0; x < kernel_w; x++) + { + if ((start_t + z * dilationT < itime) && (start_h + y * dilationH < iheight) && (start_w + x * dilationW < iwidth)) + { + real val = *(ip + z * dilationT * iwidth * iheight + y * dilationH * iwidth + x * dilationW); + if (val > maxval) + { + maxval = val; + // Store indices w.r.t the kernel dimension + mz = z + (kT - kernel_t); + my = y + (kH - kernel_h); + mx = x + (kW - kernel_w); + } + } + } + } + } + + // set max values + ((unsigned char*)(indzp))[0] = mz; + ((unsigned char*)(indzp))[1] = my; + ((unsigned char*)(indzp))[2] = mx; + ((unsigned char*)(indzp))[3] = 0; + + /* set output to local max */ + *op = maxval; + } + } + } + } +} + +void THNN_(VolumetricDilatedMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *indices, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + int dilationT, + int dilationW, + int dilationH, + bool ceilMode) +{ + long nslices; + long itime; + long iheight; + long iwidth; + long otime; + long oheight; + long owidth; + real *input_data; + real *output_data; + real *indices_data; + + THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2, + "4D or 5D (batch-mode) tensor expected" + ); + + int dimN = 0; + int dimt = 1; + int dimh = 2; + int dimw = 3; + + if (input->nDimension == 5) + { + dimN++; + dimt++; + dimh++; + dimw++; + } + + THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, + "input image smaller than kernel size" + ); + + THArgCheck(kT/2 >= pT && kW/2 >= pW && kH/2 >= pH, 2, + "pad should be smaller than half of kernel size" + ); + + /* sizes */ + nslices = input->size[dimN]; + itime = input->size[dimt]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + if (ceilMode) + { + otime = (int)(ceil((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1; + oheight = (int)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1; + owidth = (int)(ceil((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1; + } + else + { + otime = (int)(floor((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1; + oheight = (int)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1; + owidth = (int)(floor((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1; + } + + if (otime < 1 || owidth < 1 || oheight < 1) + THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small", + nslices,itime,iheight,iwidth,nslices,otime,oheight,owidth); + + if (pT || pW || pH) + { + // ensure that the last pooling starts inside the image + if ((otime - 1)*dT >= itime + pT) + --otime; + if ((oheight - 1)*dH >= iheight + pH) + --oheight; + if ((owidth - 1)*dW >= iwidth + pW) + --owidth; + } + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + if (input->nDimension == 4) /* non-batch mode */ + { + /* resize output */ + THTensor_(resize4d)(output, nslices, otime, oheight, owidth); + /* indices will contain ti,i,j uchar locations packed into float/double */ + THTensor_(resize4d)(indices, nslices, otime, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THTensor_(data)(indices); + + THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)( + input_data, output_data, + indices_data, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + kT, kW, kH, + dT, dW, dH, + pT, pW, pH, + dilationT, dilationW, dilationH + ); + } + else /* batch mode */ + { + long p; + long nBatch = input->size[0]; + + long istride = nslices * itime * iwidth * iheight; + long ostride = nslices * otime * owidth * oheight; + + /* resize output */ + THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); + /* indices will contain ti,i,j locations for each output point */ + THTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THTensor_(data)(indices); + +#pragma omp parallel for private(p) + for (p=0; p < nBatch; p++) + { + THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)( + input_data + p * istride, + output_data + p * ostride, + indices_data + p * ostride, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + kT, kW, kH, + dT, dW, dH, + pT, pW, pH, + dilationT, dilationW, dilationH + ); + } + } + + /* cleanup */ + THTensor_(free)(input); +} + +static void THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)( + real *gradInput_p, + real *gradOutput_p, + real *indz_p, + long nslices, + long itime, + long iwidth, + long iheight, + long otime, + long owidth, + long oheight, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + int dilationT, + int dilationW, + int dilationH) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + real *gradInput_p_k = gradInput_p + k * itime * iwidth * iheight; + real *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight; + real *indz_p_k = indz_p + k * otime * owidth * oheight; + + /* calculate max points */ + long ti, i, j; + for (ti = 0; ti < otime; ti++) + { + for (i = 0; i < oheight; i++) + { + for (j = 0; j < owidth; j++) + { + /* retrieve position of max */ + real * indzp = &indz_p_k[ti * oheight * owidth + i * owidth + j]; + long maxti = ((unsigned char*)(indzp))[0] * dilationT + ti * dT - pT; + long maxi = ((unsigned char*)(indzp))[1] * dilationH + i * dH - pH; + long maxj = ((unsigned char*)(indzp))[2] * dilationW + j * dW - pW; + + /* update gradient */ + gradInput_p_k[maxti * iheight * iwidth + maxi * iwidth + maxj] += + gradOutput_p_k[ti * oheight * owidth + i * owidth + j]; + } + } + } + } +} + +void THNN_(VolumetricDilatedMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *indices, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + int dilationT, + int dilationW, + int dilationH) +{ + int nslices; + int itime; + int iheight; + int iwidth; + int otime; + int oheight; + int owidth; + real *gradInput_data; + real *gradOutput_data; + real *indices_data; + + int dimN = 0; + int dimt = 1; + int dimh = 2; + int dimw = 3; + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + if (input->nDimension == 5) + { + dimN++; + dimt++; + dimh++; + dimw++; + } + + /* sizes */ + nslices = input->size[dimN]; + itime = input->size[dimt]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + otime = gradOutput->size[dimt]; + oheight = gradOutput->size[dimh]; + owidth = gradOutput->size[dimw]; + + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + indices_data = THTensor_(data)(indices); + + /* backprop */ + if (input->nDimension == 4) /* non-batch mode*/ + { + THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)( + gradInput_data, gradOutput_data, + indices_data, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + dT, dW, dH, + pT, pW, pH, + dilationT, dilationW, dilationH + ); + } + else /* batch mode */ + { + long p; + long nBatch = input->size[0]; + + long istride = nslices * itime * iwidth * iheight; + long ostride = nslices * otime * owidth * oheight; + +#pragma omp parallel for private(p) + for (p = 0; p < nBatch; p++) + { + THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)( + gradInput_data + p * istride, + gradOutput_data + p * ostride, + indices_data + p * ostride, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + dT, dW, dH, + pT, pW, pH, + dilationT, dilationW, dilationH + ); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + +#endif diff --git a/lib/THNN/generic/VolumetricMaxPooling.c b/lib/THNN/generic/VolumetricMaxPooling.c index 053c02c02..dc376e6a7 100644 --- a/lib/THNN/generic/VolumetricMaxPooling.c +++ b/lib/THNN/generic/VolumetricMaxPooling.c @@ -2,101 +2,6 @@ #define TH_GENERIC_FILE "generic/VolumetricMaxPooling.c" #else -static void THNN_(VolumetricMaxPooling_updateOutput_frame)( - real *input_p, - real *output_p, - real *indz_p, - long nslices, - long itime, - long iwidth, - long iheight, - long otime, - long owidth, - long oheight, - int kT, - int kW, - int kH, - int dT, - int dW, - int dH, - int pT, - int pW, - int pH) -{ - long k; -#pragma omp parallel for private(k) - for (k = 0; k < nslices; k++) - { - /* loop over output */ - long i, j, ti; - for (ti = 0; ti < otime; ti++) - { - for (i = 0; i < oheight; i++) - { - for (j = 0; j < owidth; j++) - { - /* local pointers */ - - long start_t = ti * dT - pT; - long start_h = i * dH - pH; - long start_w = j * dW - pW; - - long kernel_t = fminf(kT, kT + start_t); - long kernel_h = fminf(kH, kH + start_h); - long kernel_w = fminf(kW, kW + start_w); - - start_t = fmaxf(start_t, 0); - start_h = fmaxf(start_h, 0); - start_w = fmaxf(start_w, 0); - - real *ip = input_p + k * itime * iwidth * iheight - + start_t * iwidth * iheight + start_h * iwidth + start_w; - real *op = output_p + k * otime * owidth * oheight - + ti * owidth * oheight + i * owidth + j; - real *indzp = indz_p + k * otime * owidth * oheight - + ti * owidth * oheight + i * owidth + j; - - /* compute local max: */ - real maxval = -THInf; - int x,y,z; - int mx, my, mz; - - for (z = 0; z < kernel_t; z++) - { - for (y = 0; y < kernel_h; y++) - { - for (x = 0; x < kernel_w; x++) - { - if ((start_t + z < itime) && (start_h + y < iheight) && (start_w + x < iwidth)) - { - real val = *(ip + z * iwidth * iheight + y * iwidth + x); - if (val > maxval) - { - maxval = val; - // Store indices w.r.t the kernel dimension - mz = z + (kT - kernel_t); - my = y + (kH - kernel_h); - mx = x + (kW - kernel_w); - } - } - } - } - } - - // set max values - ((unsigned char*)(indzp))[0] = mz; - ((unsigned char*)(indzp))[1] = my; - ((unsigned char*)(indzp))[2] = mx; - ((unsigned char*)(indzp))[3] = 0; - - /* set output to local max */ - *op = maxval; - } - } - } - } -} - void THNN_(VolumetricMaxPooling_updateOutput)( THNNState *state, THTensor *input, @@ -113,181 +18,10 @@ void THNN_(VolumetricMaxPooling_updateOutput)( int pH, bool ceilMode) { - long nslices; - long itime; - long iheight; - long iwidth; - long otime; - long oheight; - long owidth; - real *input_data; - real *output_data; - real *indices_data; - - THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2, - "4D or 5D (batch-mode) tensor expected" - ); - - int dimN = 0; - int dimt = 1; - int dimh = 2; - int dimw = 3; - - if (input->nDimension == 5) - { - dimN++; - dimt++; - dimh++; - dimw++; - } - - THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, - "input image smaller than kernel size" - ); - - THArgCheck(kT/2 >= pT && kW/2 >= pW && kH/2 >= pH, 2, - "pad should be smaller than half of kernel size" - ); - - /* sizes */ - nslices = input->size[dimN]; - itime = input->size[dimt]; - iheight = input->size[dimh]; - iwidth = input->size[dimw]; - if (ceilMode) - { - otime = (int)(ceil((float)(itime - kT + 2 * pT) / dT) + 1); - oheight = (int)(ceil((float)(iheight - kH + 2 * pH) / dH) + 1); - owidth = (int)(ceil((float)(iwidth - kW + 2 * pW) / dW) + 1); - } - else - { - otime = (int)(floor((float)(itime - kT + 2 * pT) / dT) + 1); - oheight = (int)(floor((float)(iheight - kH + 2 * pH) / dH) + 1); - owidth = (int)(floor((float)(iwidth - kW + 2 * pW) / dW) + 1); - } - - if (pT || pW || pH) - { - // ensure that the last pooling starts inside the image - if ((otime - 1)*dT >= itime + pT) - --otime; - if ((oheight - 1)*dH >= iheight + pH) - --oheight; - if ((owidth - 1)*dW >= iwidth + pW) - --owidth; - } - - /* get contiguous input */ - input = THTensor_(newContiguous)(input); - - if (input->nDimension == 4) /* non-batch mode */ - { - /* resize output */ - THTensor_(resize4d)(output, nslices, otime, oheight, owidth); - /* indices will contain ti,i,j uchar locations packed into float/double */ - THTensor_(resize4d)(indices, nslices, otime, oheight, owidth); - - input_data = THTensor_(data)(input); - output_data = THTensor_(data)(output); - indices_data = THTensor_(data)(indices); - - THNN_(VolumetricMaxPooling_updateOutput_frame)( - input_data, output_data, - indices_data, - nslices, - itime, iwidth, iheight, - otime, owidth, oheight, - kT, kW, kH, - dT, dW, dH, - pT, pW, pH - ); - } - else /* batch mode */ - { - long p; - long nBatch = input->size[0]; - - long istride = nslices * itime * iwidth * iheight; - long ostride = nslices * otime * owidth * oheight; - - /* resize output */ - THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); - /* indices will contain ti,i,j locations for each output point */ - THTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth); - - input_data = THTensor_(data)(input); - output_data = THTensor_(data)(output); - indices_data = THTensor_(data)(indices); - -#pragma omp parallel for private(p) - for (p=0; p < nBatch; p++) - { - THNN_(VolumetricMaxPooling_updateOutput_frame)( - input_data + p * istride, - output_data + p * ostride, - indices_data + p * ostride, - nslices, - itime, iwidth, iheight, - otime, owidth, oheight, - kT, kW, kH, - dT, dW, dH, - pT, pW, pH - ); - } - } - - /* cleanup */ - THTensor_(free)(input); -} - -static void THNN_(VolumetricMaxPooling_updateGradInput_frame)( - real *gradInput_p, - real *gradOutput_p, - real *indz_p, - long nslices, - long itime, - long iwidth, - long iheight, - long otime, - long owidth, - long oheight, - int dT, - int dW, - int dH, - int pT, - int pW, - int pH) -{ - long k; -#pragma omp parallel for private(k) - for (k = 0; k < nslices; k++) - { - real *gradInput_p_k = gradInput_p + k * itime * iwidth * iheight; - real *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight; - real *indz_p_k = indz_p + k * otime * owidth * oheight; - - /* calculate max points */ - long ti, i, j; - for (ti = 0; ti < otime; ti++) - { - for (i = 0; i < oheight; i++) - { - for (j = 0; j < owidth; j++) - { - /* retrieve position of max */ - real * indzp = &indz_p_k[ti * oheight * owidth + i * owidth + j]; - long maxti = ((unsigned char*)(indzp))[0] + ti * dT - pT; - long maxi = ((unsigned char*)(indzp))[1] + i * dH - pH; - long maxj = ((unsigned char*)(indzp))[2] + j * dW - pW; - - /* update gradient */ - gradInput_p_k[maxti * iheight * iwidth + maxi * iwidth + maxj] += - gradOutput_p_k[ti * oheight * owidth + i * owidth + j]; - } - } - } - } + THNN_(VolumetricDilatedMaxPooling_updateOutput)( + state, input, output, indices, + kT, kW, kH, dT, dW, dH, + pT, pW, pH, 1, 1, 1, ceilMode); } void THNN_(VolumetricMaxPooling_updateGradInput)( @@ -303,90 +37,9 @@ void THNN_(VolumetricMaxPooling_updateGradInput)( int pW, int pH) { - int nslices; - int itime; - int iheight; - int iwidth; - int otime; - int oheight; - int owidth; - real *gradInput_data; - real *gradOutput_data; - real *indices_data; - - int dimN = 0; - int dimt = 1; - int dimh = 2; - int dimw = 3; - - /* get contiguous gradOutput */ - gradOutput = THTensor_(newContiguous)(gradOutput); - - /* resize */ - THTensor_(resizeAs)(gradInput, input); - THTensor_(zero)(gradInput); - - if (input->nDimension == 5) - { - dimN++; - dimt++; - dimh++; - dimw++; - } - - /* sizes */ - nslices = input->size[dimN]; - itime = input->size[dimt]; - iheight = input->size[dimh]; - iwidth = input->size[dimw]; - otime = gradOutput->size[dimt]; - oheight = gradOutput->size[dimh]; - owidth = gradOutput->size[dimw]; - - /* get raw pointers */ - gradInput_data = THTensor_(data)(gradInput); - gradOutput_data = THTensor_(data)(gradOutput); - indices_data = THTensor_(data)(indices); - - /* backprop */ - if (input->nDimension == 4) /* non-batch mode*/ - { - THNN_(VolumetricMaxPooling_updateGradInput_frame)( - gradInput_data, gradOutput_data, - indices_data, - nslices, - itime, iwidth, iheight, - otime, owidth, oheight, - dT, dW, dH, - pT, pW, pH - ); - } - else /* batch mode */ - { - long p; - long nBatch = input->size[0]; - - long istride = nslices * itime * iwidth * iheight; - long ostride = nslices * otime * owidth * oheight; - -#pragma omp parallel for private(p) - for (p = 0; p < nBatch; p++) - { - THNN_(VolumetricMaxPooling_updateGradInput_frame)( - gradInput_data + p * istride, - gradOutput_data + p * ostride, - indices_data + p * ostride, - nslices, - itime, iwidth, iheight, - otime, owidth, oheight, - dT, dW, dH, - pT, pW, pH - ); - } - } - - /* cleanup */ - THTensor_(free)(gradOutput); + THNN_(VolumetricDilatedMaxPooling_updateGradInput)( + state, input, gradOutput, gradInput, indices, + dT, dW, dH, pT, pW, pH, 1, 1, 1); } #endif diff --git a/lib/THNN/init.c b/lib/THNN/init.c index d26c509d6..c75fa8871 100644 --- a/lib/THNN/init.c +++ b/lib/THNN/init.c @@ -172,6 +172,9 @@ #include "generic/VolumetricMaxPooling.c" #include "THGenerateFloatTypes.h" +#include "generic/VolumetricDilatedMaxPooling.c" +#include "THGenerateFloatTypes.h" + #include "generic/VolumetricMaxUnpooling.c" #include "THGenerateFloatTypes.h" diff --git a/test.lua b/test.lua index 6673fed1d..0b57626a8 100644 --- a/test.lua +++ b/test.lua @@ -4223,6 +4223,55 @@ function nntest.VolumetricMaxPooling() mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ') end +function nntest.VolumetricDilatedMaxPooling() + for _,ceil_mode in pairs({true,false}) do + local from = math.random(2,3) + local kt = math.random(3,4) + local ki = math.random(3,4) + local kj = math.random(3,4) + local st = math.random(2,3) + local si = math.random(2,3) + local sj = math.random(2,3) + local outt = math.random(3,4) + local outi = math.random(3,4) + local outj = math.random(3,4) + local padT = math.min(math.random(0,1),math.floor(kt/2)) + local padW = math.min(math.random(0,1),math.floor(ki/2)) + local padH = math.min(math.random(0,1),math.floor(kj/2)) + local dilationT = math.random(1,3) + local dilationW = math.random(1,3) + local dilationH = math.random(1,3) + local int = (outt-1)*st+(dilationT*(kt-1)+1)-2*padT + local ini = (outi-1)*si+(dilationW*(ki-1)+1)-2*padW + local inj = (outj-1)*sj+(dilationH*(kj-1)+1)-2*padH + + local ceil_string = ceil_mode and 'ceil' or 'floor' + local module = nn.VolumetricDilatedMaxPooling(kt,ki,kj,st,si,sj,padT,padW,padH,dilationT,dilationW,dilationH) + if ceil_mode then module:ceil() else module:floor() end + local input = torch.rand(from,int,inj,ini) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') + + -- batch + local nbatch = math.random(2,5) + input = torch.rand(nbatch,from,int,inj,ini) + module = nn.VolumetricDilatedMaxPooling(kt,ki,kj,st,si,sj,padT,padW,padH,dilationT,dilationW,dilationH) + if ceil_mode then module:ceil() else module:floor() end + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state (Batch)') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ') + end +end + function nntest.VolumetricMaxUnpooling() local from = math.random(2,3) local kt = math.random(3,4)