From 428c01fece6766c01e6d14ae45295ac67d51a4fe Mon Sep 17 00:00:00 2001
From: lichao18 <lichao18@xiaomi.com>
Date: Wed, 11 Jul 2018 15:14:15 +0800
Subject: [PATCH] Update README.md

---
 README.md                                   | 123 +++++++++++++-------
 aibench/executors/base_executor.h           |   7 ++
 aibench/executors/tflite/tflite_executor.cc |   4 +-
 aibench/executors/tflite/tflite_executor.h  |   1 +
 4 files changed, 92 insertions(+), 43 deletions(-)

diff --git a/README.md b/README.md
index a639d21..f2269f5 100644
--- a/README.md
+++ b/README.md
@@ -3,61 +3,92 @@ MobileAIBench
 [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
 [![build status](http://v9.git.n.xiaomi.com/deep-computing/mobile-ai-bench/badges/master/build.svg)](http://v9.git.n.xiaomi.com/deep-computing/mobile-ai-bench/commits/master)
 
-In recent years, the on device deep learning applications are getting more and
+In recent years, the on-device deep learning applications are getting more and
 more popular. It's a challenging task for application developers to deploy their
-deep learning models in their applications. They need to choose proper
+deep learning models in their applications. They need to choose a proper
 inference framework, optionally utilizing quantization or compression
-techniques regarding to the precision-performance trade-off, and finally
-run the model on one or more of heterogeneous compute devices. How to make an
-appropriate decision among these choices is a tedious and time consuming task.
+techniques regarding the precision-performance trade-off, and finally
+run the model on one or more of heterogeneous computing devices. How to make an
+appropriate decision among these choices is a tedious and time-consuming task.
 
-The puropse of this project is to provide an end-to-end neural network benchmark
-on mobile devices, which hopefully can provide insights for the developers.
+**Mobile AI Benchmark** (or **MobileAIBench** for short) is an end-to-end 
+benchmark tool to test the models' runtime in the different neural network 
+frameworks on mobile devices, which hopefully can provide insights for the 
+developers.
+
+## Environment requirement
+
+MobileAIBench supports four frameworks (Mace, Snpe, Ncnn and TensorFlow Lite) 
+currently, which may require the following dependencies:
+
+| Software  | Installation command  | Tested version  |
+| :-------: | :-------------------: | :-------------: |
+| Python  |   | 2.7  |
+| ADB  | apt-get install android-tools-adb  | Required by Android run, >= 1.0.32  |
+| Android NDK  | [NDK installation guide](https://developer.android.com/ndk/guides/setup#install) | Required by Android build, r15c |
+| Bazel  | [bazel installation guide](https://docs.bazel.build/versions/master/install.html)  | 0.13.0  |
+| CMake  | apt-get install cmake  | >= 3.11.3  |
+| FileLock  | pip install -I filelock==3.0.0  | Required by Android run  |
+| PyYaml  | pip install -I pyyaml==3.12  | 3.12.0  |
+| sh  | pip install -I sh==1.12.14  | 1.12.14  |
 
 ## How to use
+
+We provide a python script to run the benchmark tool.
 ```
 python tools/benchmark.py --output_dir=output --frameworks=all \
-                          --runtimes=all --model_names=all
+                          --runtimes=all --model_names=all \
+                          --target_abis=armeabi-v7a,arm64-v8a
 ```
 The whole benchmark may take several hours, and continuous benchmarking may heat
-the device very quickly, so you may set following arguments according to your
+the device very quickly, so you may set the following arguments according to your
 interests. 
 
-| option        | type | default     | explanation |
-| ------------- | ---: | -----------:| ------------|
-| --frameworks  | str  | all         | Frameworks(MACE/SNPE/NCNN/TENSORFLOW_LITE), comma seperated list or all. |
-| --runtimes    | str  | all         | Runtimes(CPU/GPU/DSP), comma seperated list or all. |
-| --target_abis | str  | armeabi-v7a | Target ABIs(armeabi-v7a/arm64-v8a), comma separated list. |
-| --model_names | str  | all         | Model names, comma seperated list or all. |
+| option         | type | default     | explanation |
+| :-----------:  | :--: | :----------:| ------------|
+| --output_dir   | str  | output      | Benchmark output directory. |
+| --frameworks   | str  | all         | Frameworks(MACE/SNPE/NCNN/TFLITE), comma separated list or all. |
+| --runtimes     | str  | all         | Runtimes(CPU/GPU/DSP), comma separated list or all. |
+| --target_abis  | str  | armeabi-v7a | Target ABIs(armeabi-v7a,arm64-v8a), comma separated list. |
+| --model_names  | str  | all         | Model names(InceptionV3,MobileNetV1...), comma separated list or all. |
+| --run_interval | int  | 10          | Run interval between benchmarks, seconds. |
+| --num_threads  | int  | 4           | The number of threads. |
 
 ## Architecture
 ```
-+---------------+         +------------------+      +------------------+
-|   Benchmark   |         |   BaseExecutor   | <--- | MaceGpuExecutor  |
-+---------------+         +------------------+      +------------------+
-| - executor    |-------> | - framework      |
-| - model_name  |         | - runtime        |      +------------------+
-| - model_file  |         |                  | <--- | SnpeGpuExecutor  |
-| - input_names |         +------------------+      +------------------+
-| - input_files |         | + Prepare()      |
-| - input_shapes|         | + Run()          |      +------------------+
-+---------------+         | + Finish()       | <--- |  TfLiteExecutor  |
-| - Register()  |         +------------------+      +------------------+
-| - Run()       |                                            .
-+---------------+                                            .
-                                                             .
++-----------------+         +------------------+      +---------------+
+|   Benchmark     |         |   BaseExecutor   | <--- | MaceExecutor  |
++-----------------+         +------------------+      +---------------+
+| - executor      |-------> | - framework      |
+| - model_name    |         | - runtime        |      +---------------+
+| - model_file    |         |                  | <--- | SnpeExecutor  |
+| - input_names   |         +------------------+      +---------------+
+| - input_files   |         | + Init()         |
+| - input_shapes  |         | + Prepare()      |      +---------------+
+| - output_names  |         | + Run()          | <--- | NcnnExecutor  |
+| - output_shapes |         | + Finish()       |      +---------------+
++-----------------+         +------------------+               
+| - Register()    |                                   +---------------+
+| - Run()         |                              <--- | TfLiteExecutor|
++-----------------+                                   +---------------+
+
 ```
-## Adding a new NN framework
+## Adding your new AI framework
 
-1. Add dependencies in `third_party/new_framework` and WORKSPACE.
+1. Add dependencies in `third_party/your_framework`, `aibench/benchmark/BUILD` and WORKSPACE.
 
 2. Define executor and implement the interfaces:
 
     ```c++
-    class NewFrameworkExecutor : public BaseExecutor {
+    class YourFrameworkExecutor : public BaseExecutor {
      public:
-      NewFrameworkExecutor() : BaseExecutor(FRAMEWORK_NAME, CPU) {}
+      YourFrameworkExecutor() : BaseExecutor(FRAMEWORK_NAME, CPU) {}
       
+      // If your framework needs to initialize something other than loading 
+      // model or creating an engine, you can put it here, e.g., Mace needs 
+      // to compile OpenCL kernel once per target.
+      virtual Status Init(const char *model_name, int num_threads);
+
       // Load model and prepare to run
       virtual Status Prepare(const char *model_name);
       
@@ -70,17 +101,27 @@ interests.
     };
     ```
 
-3. Register a new Benchmark in `aibench/benchmark/benchmark_main.cc`
+3. Register your Benchmark in `aibench/benchmark/benchmark_main.cc`:
+
+    ```c++
+    #ifdef AIBENCH_ENABLE_YOUR_FRAMEWORK
+    #include "aibench/executors/your_framework/your_framework_executor.h"
+    #endif
+    ```
 
     ```c++
-    std::unique_ptr<aibench::NewFrameworkExecutor>
-        new_framework_executor(new aibench::NewFrameworkExecutor());
-    AIBENCH_BENCHMARK(new_framework_executor.get(), MODEL_NAME, FRAMEWORK_NAME, CPU,
-                      MODEL_FILE, (std::vector<std::string>{INPUT_NAMES}),
-                      (std::vector<std::string>{INPUT_FILES}),
-                      (std::vector<std::vector<int64_t>>{INPUT_SHAPES}));
+    #ifdef AIBENCH_ENABLE_YOUR_FRAMEWORK
+    std::unique_ptr<aibench::YourFrameworkExecutor>
+        your_model_framework_executor(new aibench::YourFrameworkExecutor());
+    AIBENCH_BENCHMARK(your_framework_executor.get(), MODEL_NAME, FRAMEWORK_NAME, CPU,
+                      MODEL_FILE, (std::vector<std::string>{INPUT_NAME}),
+                      (std::vector<std::string>{INPUT_FILE}),
+                      (std::vector<std::vector<int64_t>>{INPUT_SHAPE}),
+                      (std::vector<std::string>{OUTPUT_NAME}),
+                      (std::vector<std::vector<int64_t>>{OUTPUT_SHAPE}));
+    #endif
     ```
-   MODEL_FILE and INPUT_FILES can be configured in `tools/model_and_input.yml`
+   MODEL_FILE and INPUT_FILES can be configured in `tools/model_and_input.yml`.
 
 
 ## License
diff --git a/aibench/executors/base_executor.h b/aibench/executors/base_executor.h
index 48f376e..36367b1 100644
--- a/aibench/executors/base_executor.h
+++ b/aibench/executors/base_executor.h
@@ -73,16 +73,23 @@ class BaseExecutor {
 
   virtual ~BaseExecutor() = default;
 
+  // If your framework needs to initialize something other than loading
+  // model or creating an engine, you can put it here, e.g., Mace needs
+  // to compile OpenCL kernel once per target.
   virtual Status Init(const char *model_name, int num_threads) = 0;
 
+  // Load model and prepare to run.
   virtual Status Prepare(const char *model_name) = 0;
 
+  // Run the model.
   virtual Status Run(const std::map<std::string, BaseTensor> &inputs,
                      std::map<std::string, BaseTensor> *outputs) = 0;
 
+  // Unload model and free the memory after running the model.
   virtual void Finish() = 0;
   Framework GetFramework() {return framework_;}
   Runtime GetRuntime() {return runtime_;}
+
  private:
   Framework framework_;
   Runtime runtime_;
diff --git a/aibench/executors/tflite/tflite_executor.cc b/aibench/executors/tflite/tflite_executor.cc
index 20f4d76..81a20c9 100644
--- a/aibench/executors/tflite/tflite_executor.cc
+++ b/aibench/executors/tflite/tflite_executor.cc
@@ -20,7 +20,7 @@ namespace aibench {
 
 Status TfLiteExecutor::Init(const char *model_name, int num_threads) {
   (void)model_name;
-  (void)num_threads;
+  num_threads_ = num_threads;
   return Status::SUCCESS;
 }
 
@@ -37,7 +37,7 @@ Status TfLiteExecutor::Prepare(const char *model_name) {
     std::cout << "Failed to construct interpreter" << std::endl;
     return Status::RUNTIME_ERROR;
   }
-  interpreter_->SetNumThreads(4);
+  interpreter_->SetNumThreads(num_threads_);
   interpreter_->UseNNAPI(false);
   if (interpreter_->AllocateTensors() != kTfLiteOk) {
     std::cout << "Failed to allocate tensors!" << std::endl;
diff --git a/aibench/executors/tflite/tflite_executor.h b/aibench/executors/tflite/tflite_executor.h
index ae9da5c..c043a4c 100644
--- a/aibench/executors/tflite/tflite_executor.h
+++ b/aibench/executors/tflite/tflite_executor.h
@@ -44,6 +44,7 @@ class TfLiteExecutor : public BaseExecutor {
  private:
   std::unique_ptr<tflite::Interpreter> interpreter_;
   std::unique_ptr<tflite::FlatBufferModel> model_;
+  int num_threads_;
 };
 
 }  // namespace aibench