Skip to content

Commit

Permalink
add NNPACK support for high convolution inference perf (apache#3666)
Browse files Browse the repository at this point in the history
* add NNPACK support for high convolution inference perf

* set USE_NNPACK to 0

* Fix header declaration

* Fix input_size init value

1. data's shape is BxCxHxW, input_size is {width,height}
2. improve algorithm selection policy

* Fix lint error
  • Loading branch information
clcarwin authored and piiswrong committed Nov 10, 2016
1 parent 0071324 commit 251634f
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 0 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ ifeq ($(USE_OPENMP), 1)
CFLAGS += -fopenmp
endif

ifeq ($(USE_NNPACK), 1)
CFLAGS += -DMXNET_USE_NNPACK=1
CFLAGS += -DMXNET_USE_NNPACK_NUM_THREADS=$(USE_NNPACK_NUM_THREADS)
LDFLAGS += -lnnpack
endif

ifeq ($(USE_MKL2017), 1)
CFLAGS += -DMXNET_USE_MKL2017=1
CFLAGS += -DUSE_MKL=1
Expand Down
4 changes: 4 additions & 0 deletions make/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ USE_MKL2017 = 0
# whether use MKL2017 experimental feature for high performance
USE_MKL2017_EXPERIMENTAL = 0

# whether use NNPACK library
USE_NNPACK = 0
USE_NNPACK_NUM_THREADS = 4

# choose the version of blas you want to use
# can be: mkl, blas, atlas, openblas
# in default use atlas for linux while apple for osx
Expand Down
15 changes: 15 additions & 0 deletions src/operator/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include "./mkl/mkl_memory-inl.h"
#include "./mkl/mkl_convolution-inl.h"
#endif // MXNET_USE_MKL2017
#if MXNET_USE_NNPACK == 1
#include "./nnpack/nnpack_convolution-inl.h"
#endif // MXNET_USE_NNPACK

namespace mxnet {
namespace op {
Expand All @@ -32,6 +35,18 @@ Operator* CreateOp<cpu>(ConvolutionParam param, int dtype,
break;
}
}
#endif
#if MXNET_USE_NNPACK == 1
if ((param.dilate[0] == 1 && param.dilate[1] == 1)
&& param.kernel.ndim() == 2 && (!param.no_bias)
&& param.num_group == 1) {
switch (dtype) {
case mshadow::kFloat32:
return new NNPACKConvolutionOp<cpu, float>(param);
default:
break;
}
}
#endif
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
op = new ConvolutionOp<cpu, DType>(param);
Expand Down
117 changes: 117 additions & 0 deletions src/operator/nnpack/nnpack_convolution-inl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*!
* Copyright (c) 2016 by Contributors
* \file nnpack_convolution-inl.h
* \brief
* \author Carwin
*/
#ifndef MXNET_OPERATOR_NNPACK_NNPACK_CONVOLUTION_INL_H_
#define MXNET_OPERATOR_NNPACK_NNPACK_CONVOLUTION_INL_H_

#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <mxnet/operator.h>
#include <algorithm>
#include <map>
#include <vector>
#include <string>
#include <utility>
#include "../convolution-inl.h"
#include "nnpack.h"

namespace mxnet {
namespace op {

class NNPACKInitialize {
public:
pthreadpool_t threadpool;

public:
NNPACKInitialize() {
nnp_status status = nnp_initialize();
if (nnp_status_success != status) {
LOG(FATAL) << "nnp_initialize failed status=" << status;
}
int num_threads = MXNET_USE_NNPACK_NUM_THREADS;
this->threadpool = pthreadpool_create(num_threads);
}
virtual ~NNPACKInitialize() {
nnp_status status = nnp_deinitialize();
if (nnp_status_success != status) {
LOG(FATAL) << "nnp_deinitialize failed status=" << status;
}
pthreadpool_destroy(threadpool);
}
};

static NNPACKInitialize nnpackinitialize;

template <typename xpu, typename DType>
class NNPACKConvolutionOp : public ConvolutionOp<xpu, DType> {
private:
ConvolutionParam param_;

public:
explicit NNPACKConvolutionOp(ConvolutionParam p)
: ConvolutionOp<xpu, DType>(p) {
this->param_ = p;
}

public:
virtual void Forward(const OpContext &ctx, const std::vector<TBlob> &in_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &out_data,
const std::vector<TBlob> &aux_args) {
using namespace mshadow;
using namespace mshadow::expr;
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 4, DType> data = in_data[conv::kData].get<xpu, 4, DType>(s);
Shape<3> wmat_shape =
Shape3(param_.num_group, param_.num_filter / param_.num_group,
data.shape_[1] / param_.num_group * param_.kernel[0] *
param_.kernel[1]);
Tensor<xpu, 3, DType> wmat =
in_data[conv::kWeight].get_with_shape<xpu, 3, DType>(wmat_shape, s);
Tensor<xpu, 4, DType> out = out_data[conv::kOut].get<xpu, 4, DType>(s);

// nnp_convolution_inference optimize for batch_size==1
// when W or H less than 16, ConvolutionOp fast than nnpack's convolution
if ((data.shape_[0] != 1) || (data.shape_[2] < 16) ||
(data.shape_[3] < 16)) {
ConvolutionOp<xpu, DType>::Forward(ctx, in_data, req, out_data, aux_args);
} else {
nnp_size input_size = {data.shape_[3], data.shape_[2]};
nnp_padding input_padding = {param_.pad[0], param_.pad[1], param_.pad[0],
param_.pad[1]};
nnp_size kernel_size = {param_.kernel[1], param_.kernel[0]};
nnp_size output_subsampling = {param_.stride[1], param_.stride[0]};
Tensor<xpu, 1, DType> bias = in_data[conv::kBias].get<xpu, 1, DType>(s);

nnp_convolution_algorithm algorithm = nnp_convolution_algorithm_auto;
if ((data.shape_[2] < 32) || (data.shape_[3] < 32)) {
algorithm = nnp_convolution_algorithm_implicit_gemm;
}

nnp_status status = nnp_convolution_inference(
algorithm, // enum nnp_convolution_algorithm algorithm,
nnp_convolution_transform_strategy_tuple_based,
data.shape_[1], // size_t input_channels,
param_.num_filter, // size_t output_channels,
input_size, // struct nnp_size input_size,
input_padding, // struct nnp_padding input_padding,
kernel_size, // struct nnp_size kernel_size,
output_subsampling, // struct nnp_size output_subsampling,
data.dptr_, // const float input[],
wmat.dptr_, // const float kernel[],
bias.dptr_, // const float bias[],
out.dptr_, // float output[],
nnpackinitialize.threadpool, // pthreadpool_t threadpool,
nullptr);
if (nnp_status_success != status) {
LOG(FATAL) << "nnp_convolution_inference failed status=" << status;
}
}
}
}; // class NNPACKConvolutionOp
} // namespace op
} // namespace mxnet
#endif // MXNET_OPERATOR_NNPACK_NNPACK_CONVOLUTION_INL_H_

0 comments on commit 251634f

Please sign in to comment.