diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..b712427
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,203 @@
+Copyright 2018-2020 Open-MMLab. All rights reserved.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2018-2020 Open-MMLab.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f8c7d36
--- /dev/null
+++ b/README.md
@@ -0,0 +1,286 @@
+# Lite-HRNet: A Lightweight High-Resolution Network
+
+## Introduction
+This is an official pytorch implementation of [Lite-HRNet: A Lightweight High-Resolution Network](). In this work, we present an efficient high-resolution network, Lite-HRNet, for human pose estimation. We start by simply applying the efficient shuffle block in ShuffleNet to HRNet (high-resolution network), yielding stronger performance over popular lightweight networks, such as MobileNet, ShuffleNet, and Small HRNet. We find that the heavily-used pointwise (1x1) convolutions in shuffle blocks become the computational bottleneck. We introduce a lightweight unit, conditional channel weighting, to replace costly pointwise (1x1) convolutions in shuffle blocks. The complexity of channel weighting is linear w.r.t the number of channels and lower than the quadratic time complexity for pointwise convolutions. Our solution learns the weights from all the channels and over multiple resolutions that are readily available in the parallel branches in HRNet. It uses the weights as the bridge to exchange information across channels and resolutions, compensating the role played by the pointwise (1x1) convolution. Lite-HRNet demonstrates superior results on human pose estimation over popular lightweight networks. Moreover, Lite-HRNet can be easily applied to semantic segmentation task in the same lightweight manner.
+
+<img width="512" height="512" src="/resources/litehrnet_block.png"/>
+
+## Results and models
+
+### Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch  | Input Size | #Params | FLOPs | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt |
+| :----------------- | :-----------: | :------: | :-----------: | :------: |:------: | :------: | :------: | :------: | :------: |
+| [Lite-HRNet-18](/configs/top_down/lite_hrnet/coco/litehrnet_18_coco_256x192.py)  | 256x192 | 1.1M | 205.2M |0.648 | 0.867 | 0.730 | 0.712 | 0.911 | [GoogleDrive](https://drive.google.com/file/d/1_bLVpm8cVqQF6unmxOQ3ObEr4hpoLuJn/view?usp=sharing) or [OneDrive](https://1drv.ms/u/s!AvreNzlRJaHnc6CkDJVt0dOSu4k?e=xLEMRb) |
+| [Lite-HRNet-18](/configs/top_down/lite_hrnet/coco/litehrnet_18_coco_384x288.py)  | 384x288 | 1.1M | 461.6M | 0.676 | 0.878 | 0.750 | 0.737 | 0.921 | [GoogleDrive](https://drive.google.com/file/d/1djewivNYHKTeaYLL9x7b3wkxHTCtVbJc/view?usp=sharing) or [OneDrive](https://1drv.ms/u/s!AvreNzlRJaHnddnyeYxmLgSymMM?e=AIofxW) |
+| [Lite-HRNet-30](/configs/top_down/lite_hrnet/coco/litehrnet_30_coco_256x192.py)  | 256x192 | 1.8M | 319.2M | 0.672 | 0.880 | 0.750 | 0.733 | 0.922 | [GoogleDrive](https://drive.google.com/file/d/1QczOxBm6rKqDeSlq9SFCma5oqQncZ6ng/view?usp=sharing) or [OneDrive](https://1drv.ms/u/s!AvreNzlRJaHneaSASfryxo9eeGI?e=dVtbiH) |
+| [Lite-HRNet-30](/configs/top_down/lite_hrnet/coco/litehrnet_30_coco_384x288.py)  | 384x288 | 1.8M | 717.8M | 0.704 | 0.887 | 0.777 | 0.762 | 0.928 | [GoogleDrive](https://drive.google.com/file/d/1nx7AT1DMJRuiYNPcI7EqBp6yJN63ewE1/view?usp=sharing) or [OneDrive](https://1drv.ms/u/s!AvreNzlRJaHneJJYbKPmep86U3o?e=pbBP5B) |
+
+
+### Results on MPII val set
+
+| Arch  | Input Size | #Params | FLOPs | Mean | Mean@0.1   | ckpt |
+| :--- | :--------: | :------: | :--------: | :------: | :------: | :------: |
+| [Lite-HRNet-18](/configs/top_down/lite_hrnet/mpii/litehrnet_18_mpii_256x256.py) | 256x256 | 1.1M | 273.4M | 0.854 | 0.295 | [GoogleDrive](https://drive.google.com/file/d/1Bw4shzVJoaaQWngkL4qnlnaPE87uiJtW/view?usp=sharing) or [OneDrive](https://1drv.ms/u/s!AvreNzlRJaHndPtOeefYphHCwpc?e=AccP58) |
+| [Lite-HRNet-30](/configs/top_down/lite_hrnet/mpii/litehrnet_30_mpii_256x256.py) | 256x256 | 1.8M | 425.3M | 0.870 | 0.313 | [GoogleDrive](https://drive.google.com/file/d/1bVOahaHY61bhAPFYYfkgkJviyV-3fLKE/view?usp=sharing) or [OneDrive](https://1drv.ms/u/s!AvreNzlRJaHnd4Zt8pBVuo-Eimo?e=6XtAvE) |
+
+
+## Enviroment
+The code is developed using python 3.6 on Ubuntu 16.04. NVIDIA GPUs are needed. The code is developed and tested using 8 NVIDIA V100 GPU cards. Other platforms or GPU cards are not fully tested.
+## Quick Start
+
+### Requirements
+
+- Linux (Windows is not officially supported)
+- Python 3.6+
+- PyTorch 1.3+
+- CUDA 9.2+ (If you build PyTorch from source, CUDA 9.0 is also compatible)
+- GCC 5+
+- [mmcv](https://github.com/open-mmlab/mmcv) (Please install the latest version of mmcv-full)
+- Numpy
+- cv2
+- json_tricks
+- [xtcocotools](https://github.com/jin-s13/xtcocoapi)
+
+
+### Installation
+<!-- The code is based on [MMPose](https://github.com/open-mmlab/mmpose).
+You need clone the mmpose project and integrate the codes into mmpose first. -->
+
+a. Install mmcv, we recommend you to install the pre-build mmcv as below.
+
+```shell
+pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
+```
+
+Please replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired one. For example, to install the latest ``mmcv-full`` with ``CUDA 11`` and ``PyTorch 1.7.0``, use the following command:
+
+```shell
+pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
+```
+
+If it compiles during installation, then please check that the cuda version and pytorch version **exactly"" matches the version in the mmcv-full installation command. For example, pytorch 1.7.0 and 1.7.1 are treated differently.
+See [here](https://github.com/open-mmlab/mmcv#installation) for different versions of MMCV compatible to different PyTorch and CUDA versions.
+
+Optionally you can choose to compile mmcv from source by the following command
+
+```shell
+git clone https://github.com/open-mmlab/mmcv.git
+cd mmcv
+MMCV_WITH_OPS=1 pip install -e .  # package mmcv-full, which contains cuda ops, will be installed after this step
+# OR pip install -e .  # package mmcv, which contains no cuda ops, will be installed after this step
+cd ..
+```
+
+Or directly run
+
+```shell
+pip install mmcv-full
+# alternative: pip install mmcv
+```
+
+**Important:** You need to run `pip uninstall mmcv` first if you have mmcv installed. If mmcv and mmcv-full are both installed, there will be `ModuleNotFoundError`.
+
+b. Install build requirements
+
+```shell
+pip install -r requirements.txt
+```
+
+### Prepare datasets
+
+It is recommended to symlink the dataset root to `$LITE_HRNET/data`.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+**For COCO data**, please download from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation. [HRNet-Human-Pose-Estimation](https://github.com/HRNet/HRNet-Human-Pose-Estimation) provides person detection result of COCO val2017 to reproduce our multi-person pose estimation results. Please download from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-)
+Download and extract them under `$LITE_HRNET/data`, and make them look like this:
+
+```
+lite_hrnet
+├── configs
+├── models
+├── tools
+`── data
+    │── coco
+        │-- annotations
+        │   │-- person_keypoints_train2017.json
+        │   |-- person_keypoints_val2017.json
+        |-- person_detection_results
+        |   |-- COCO_val2017_detections_AP_H_56_person.json
+        │-- train2017
+        │   │-- 000000000009.jpg
+        │   │-- 000000000025.jpg
+        │   │-- 000000000030.jpg
+        │   │-- ...
+        `-- val2017
+            │-- 000000000139.jpg
+            │-- 000000000285.jpg
+            │-- 000000000632.jpg
+            │-- ...
+
+```
+
+**For MPII data**, please download from [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/).
+We have converted the original annotation files into json format, please download them from [mpii_annotations](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/datasets/mpii_annotations.tar).
+Extract them under `$LITE_HRNET/data`, and make them look like this:
+
+```
+lite_hrnet
+├── configs
+├── models
+├── tools
+`── data
+    │── mpii
+        |── annotations
+        |   |── mpii_gt_val.mat
+        |   |── mpii_test.json
+        |   |── mpii_train.json
+        |   |── mpii_trainval.json
+        |   `── mpii_val.json
+        `── images
+            |── 000001163.jpg
+            |── 000003072.jpg
+
+```
+
+## Training and Testing
+All outputs (log files and checkpoints) will be saved to the working directory,
+which is specified by `work_dir` in the config file.
+
+By default we evaluate the model on the validation set after each epoch, you can change the evaluation interval by modifying the interval argument in the training config
+
+```python
+evaluation = dict(interval=5)  # This evaluate the model per 5 epoch.
+```
+
+According to the [Linear Scaling Rule](https://arxiv.org/abs/1706.02677), you need to set the learning rate proportional to the batch size if you use different GPUs or videos per GPU, e.g., lr=0.01 for 4 GPUs x 2 video/gpu and lr=0.08 for 16 GPUs x 4 video/gpu.
+
+### Training
+
+```shell
+# train with a signle GPU
+python tools/train.py ${CONFIG_FILE} [optional arguments]
+
+# train with multiple GPUs
+./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments]
+```
+
+Optional arguments are:
+
+- `--validate` (**strongly recommended**): Perform evaluation at every k (default value is 5 epochs during the training.
+- `--work-dir ${WORK_DIR}`: Override the working directory specified in the config file.
+- `--resume-from ${CHECKPOINT_FILE}`: Resume from a previous checkpoint file.
+- `--gpus ${GPU_NUM}`: Number of gpus to use, which is only applicable to non-distributed training.
+- `--seed ${SEED}`: Seed id for random state in python, numpy and pytorch to generate random numbers.
+- `--deterministic`: If specified, it will set deterministic options for CUDNN backend.
+- `JOB_LAUNCHER`: Items for distributed job initialization launcher. Allowed choices are `none`, `pytorch`, `slurm`, `mpi`. Especially, if set to none, it will test in a non-distributed mode.
+- `LOCAL_RANK`: ID for local rank. If not specified, it will be set to 0.
+- `--autoscale-lr`: If specified, it will automatically scale lr with the number of gpus by [Linear Scaling Rule](https://arxiv.org/abs/1706.02677).
+
+Difference between `resume-from` and `load-from`:
+`resume-from` loads both the model weights and optimizer status, and the epoch is also inherited from the specified checkpoint. It is usually used for resuming the training process that is interrupted accidentally.
+`load-from` only loads the model weights and the training epoch starts from 0. It is usually used for finetuning.
+
+Examples:
+
+#### Training on COCO train2017 dataset
+```shell
+./tools/dist_train.sh configs/top_down/lite_hrnet/coco/litehrnet_18_coco_256x192.py 8
+```
+
+#### Training on MPII dataset
+
+```shell
+./tools/dist_train.sh configs/top_down/lite_hrnet/mpii/litehrnet_18_mpii_256x256.py 8
+```
+
+### Testing
+You can use the following commands to test a dataset.
+
+```shell
+# single-gpu testing
+python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRIC}] \
+    [--proc_per_gpu ${NUM_PROC_PER_GPU}] [--gpu_collect] [--tmpdir ${TMPDIR}] [--average_clips ${AVG_TYPE}] \
+    [--launcher ${JOB_LAUNCHER}] [--local_rank ${LOCAL_RANK}]
+
+# multiple-gpu testing
+./tools/dist_test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRIC}] \
+    [--proc_per_gpu ${NUM_PROC_PER_GPU}] [--gpu_collect] [--tmpdir ${TMPDIR}] [--average_clips ${AVG_TYPE}] \
+    [--launcher ${JOB_LAUNCHER}] [--local_rank ${LOCAL_RANK}]
+```
+
+Optional arguments:
+
+- `RESULT_FILE`: Filename of the output results. If not specified, the results will not be saved to a file.
+- `EVAL_METRIC`: Items to be evaluated on the results. Allowed values depend on the dataset.
+- `NUM_PROC_PER_GPU`: Number of processes per GPU. If not specified, only one process will be assigned for a single gpu.
+- `--gpu_collect`: If specified, recognition results will be collected using gpu communication. Otherwise, it will save the results on different gpus to `TMPDIR` and collect them by the rank 0 worker.
+- `TMPDIR`: Temporary directory used for collecting results from multiple workers, available when `--gpu_collect` is not specified.
+- `AVG_TYPE`: Items to average the test clips. If set to `prob`, it will apply softmax before averaging the clip scores. Otherwise, it will directly average the clip scores.
+- `JOB_LAUNCHER`: Items for distributed job initialization launcher. Allowed choices are `none`, `pytorch`, `slurm`, `mpi`. Especially, if set to none, it will test in a non-distributed mode.
+- `LOCAL_RANK`: ID for local rank. If not specified, it will be set to 0.
+
+Examples:
+#### Test LiteHRNet-18 on COCO with 8 GPUS, and evaluate the mAP.
+
+```shell
+./tools/dist_test.sh configs/top_down/lite_hrnet/coco/litehrnet_18_coco_256x192.py \
+    checkpoints/SOME_CHECKPOINT.pth 8 \
+    --eval mAP
+```
+
+### Get the compulationaly complexity
+You can use the following commands to compute the complexity of one model.
+```shell
+python tools/summary_network.py ${CONFIG_FILE} --shape ${SHAPE} [--with-head]
+```
+
+Arguments:
+
+- `SHAPE`: Input size.
+- `--with-head`: If specified, the computed complexity contains the complexity of the pose head.
+
+Examples:
+
+#### Test the complexity of LiteHRNet-18 with 256x256 resolution input.
+
+```shell
+python tools/summary_network.py configs/top_down/lite_hrnet/coco/litehrnet_18_coco_256x192.py \
+    --shape 256 256 \
+    --with-head
+```
+
+## Acknowledgement
+
+Thanks to:
+
+- [MMPose](https://github.com/open-mmlab/mmpose)
+- [HRNet](https://github.com/HRNet/deep-high-resolution-net.pytorch)
+
+## Citation
+If you use our code or models in your research, please cite with:
+```
+@inproceedings{Yulitehrnet21,
+  title={Lite-HRNet: A Lightweight High-Resolution Network},
+  author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+  booktitle={CVPR},
+  year={2021}
+}
+
+@inproceedings{SunXLW19,
+  title={Deep High-Resolution Representation Learning for Human Pose Estimation},
+  author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang},
+  booktitle={CVPR},
+  year={2019}
+}
+
+@article{WangSCJDZLMTWLX19,
+  title={Deep High-Resolution Representation Learning for Visual Recognition},
+  author={Jingdong Wang and Ke Sun and Tianheng Cheng and 
+          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and 
+          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+  journal={TPAMI}
+  year={2019}
+}
+
+```
diff --git a/configs/top_down/lite_hrnet/README.md b/configs/top_down/lite_hrnet/README.md
new file mode 100644
index 0000000..ebc9bda
--- /dev/null
+++ b/configs/top_down/lite_hrnet/README.md
@@ -0,0 +1,29 @@
+# Lite-HRNet: A Lightweight High-Resolution Network
+
+## Introduction
+```
+@inproceedings{sun2019deep,
+  title={Lite-HRNet: A Lightweight High-Resolution Network},
+  author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+  year={2021}
+}
+```
+
+## Results and models
+### Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch  | Input Size | #Params | FLOPs | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> |
+| :----------------- | :-----------: | :------: | :-----------: | :------: |:------: | :------: | :------: | :------: |
+| [Lite-HRNet-18](/configs/top_down/lite_hrnet/coco/litehrnet_18_coco_256x192.py)  | 256x192 | 1.1M | 205.2M |0.648 | 0.867 | 0.730 | 0.712 | 0.911 |
+| [Lite-HRNet-18](/configs/top_down/lite_hrnet/coco/litehrnet_18_coco_384x288.py)  | 384x288 | 1.1M | 461.6M | 0.676 | 0.878 | 0.750 | 0.737 | 0.921 |
+| [Lite-HRNet-30](/configs/top_down/lite_hrnet/coco/litehrnet_30_coco_256x192.py)  | 256x192 | 1.8M | 319.2M | 0.672 | 0.880 | 0.750 | 0.733 | 0.922 |
+| [Lite-HRNet-30](/configs/top_down/lite_hrnet/coco/litehrnet_30_coco_384x288.py)  | 384x288 | 1.8M | 717.8M | 0.704 | 0.887 | 0.777 | 0.762 | 0.928 |
+
+
+### Results on MPII val set.
+
+| Arch  | Input Size | #Params | FLOPs | Mean | Mean@0.1   |
+| :--- | :--------: | :------: | :--------: | :------: | :------: |
+| [Lite-HRNet-18](/configs/top_down/lite_hrnet/mpii/litehrnet_18_mpii_256x256.py) | 256x256 | 1.1M | 273.4M | 0.854 | 0.295 |
+| [Lite-HRNet-30](/configs/top_down/lite_hrnet/mpii/litehrnet_30_mpii_256x256.py) | 256x256 | 1.8M | 425.3M | 0.870 | 0.313 |
diff --git a/configs/top_down/lite_hrnet/coco/litehrnet_18_coco_256x192.py b/configs/top_down/lite_hrnet/coco/litehrnet_18_coco_256x192.py
new file mode 100644
index 0000000..2a3b54b
--- /dev/null
+++ b/configs/top_down/lite_hrnet/coco/litehrnet_18_coco_256x192.py
@@ -0,0 +1,181 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    # warmup=None,
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[170, 200])
+total_epochs = 210
+log_config = dict(
+    interval=50,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+
+channel_cfg = dict(
+    num_output_channels=17,
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+# model settings
+model = dict(
+    type='TopDown',
+    pretrained=None,
+    backbone=dict(
+        type='LiteHRNet',
+        in_channels=3,
+        extra=dict(
+            stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+            num_stages=3,
+            stages_spec=dict(
+                num_modules=(2, 4, 2),
+                num_branches=(2, 3, 4),
+                num_blocks=(2, 2, 2),
+                module_type=('LITE', 'LITE', 'LITE'),
+                with_fuse=(True, True, True),
+                reduce_ratios=(8, 8, 8),
+                num_channels=(
+                    (40, 80),
+                    (40, 80, 160),
+                    (40, 80, 160, 320),
+                )),
+            with_head=True,
+        )),
+    keypoint_head=dict(
+        type='TopDownSimpleHead',
+        in_channels=40,
+        out_channels=channel_cfg['num_output_channels'],
+        num_deconv_layers=0,
+        extra=dict(final_conv_kernel=1, ),
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        flip_test=True,
+        post_process=True,
+        shift_heatmap=True,
+        unbiased_decoding=False,
+        modulate_kernel=11),
+    loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
+
+data_cfg = dict(
+    image_size=[192, 256],
+    heatmap_size=[48, 64],
+    num_output_channels=channel_cfg['num_output_channels'],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    soft_nms=False,
+    nms_thr=1.0,
+    oks_thr=0.9,
+    vis_thr=0.2,
+    bbox_thr=1.0,
+    use_gt_bbox=False,
+    image_thr=0.0,
+    bbox_file='data/coco/person_detection_results/'
+    'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+val_data_cfg = dict(
+    image_size=[192, 256],
+    heatmap_size=[48, 64],
+    num_output_channels=channel_cfg['num_output_channels'],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    soft_nms=False,
+    nms_thr=1.0,
+    oks_thr=0.9,
+    vis_thr=0.2,
+    bbox_thr=1.0,
+    use_gt_bbox=True,
+    image_thr=0.0,
+    bbox_file='data/coco/person_detection_results/'
+    'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownRandomFlip', flip_prob=0.5),
+    dict(
+        type='TopDownHalfBodyTransform',
+        num_joints_half_body=8,
+        prob_half_body=0.3),
+    dict(
+        type='TopDownGetRandomScaleRotation', rot_factor=30,
+        scale_factor=0.25),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(type='TopDownGenerateTarget', sigma=2),
+    dict(
+        type='Collect',
+        keys=['img', 'target', 'target_weight'],
+        meta_keys=[
+            'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+            'rotation', 'bbox_score', 'flip_pairs'
+        ]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+            'flip_pairs'
+        ]),
+]
+test_pipeline = val_pipeline
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=64,
+    workers_per_gpu=4,
+    train=dict(
+        type='TopDownCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='TopDownCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=val_data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='TopDownCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
\ No newline at end of file
diff --git a/configs/top_down/lite_hrnet/coco/litehrnet_18_coco_384x288.py b/configs/top_down/lite_hrnet/coco/litehrnet_18_coco_384x288.py
new file mode 100644
index 0000000..9e802c3
--- /dev/null
+++ b/configs/top_down/lite_hrnet/coco/litehrnet_18_coco_384x288.py
@@ -0,0 +1,184 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    # warmup=None,
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[170, 200])
+total_epochs = 210
+log_config = dict(
+    interval=50, hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    num_output_channels=17,
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+# model settings
+model = dict(
+    type='TopDown',
+    pretrained=None,
+    backbone=dict(
+        type='LiteHRNet',
+        in_channels=3,
+        extra=dict(
+            stem=dict(  
+                stem_channels=32,
+                out_channels=32,
+                expand_ratio=1),
+            num_stages=3,
+            stages_spec=dict(
+                num_modules=(2, 4, 2),
+                num_branches=(2, 3, 4),
+                num_blocks=(2, 2, 2),
+                module_type=('LITE', 'LITE', 'LITE'),
+                with_fuse=(True, True, True),
+                reduce_ratios=(8, 8, 8),
+                num_channels=(
+                    (40, 80),
+                    (40, 80, 160),
+                    (40, 80, 160, 320),
+                )),
+            with_head=True,
+            )),
+    keypoint_head=dict(
+        type='TopDownSimpleHead',
+        in_channels=40,
+        out_channels=channel_cfg['num_output_channels'],
+        num_deconv_layers=0,
+        extra=dict(final_conv_kernel=1, ),
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        flip_test=True,
+        post_process=True,
+        shift_heatmap=True,
+        unbiased_decoding=False,
+        modulate_kernel=11),
+    loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
+
+data_cfg = dict(
+    image_size=[288, 384],
+    heatmap_size=[72, 96],
+    num_output_channels=channel_cfg['num_output_channels'],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    soft_nms=False,
+    nms_thr=1.0,
+    oks_thr=0.9,
+    vis_thr=0.2,
+    bbox_thr=1.0,
+    use_gt_bbox=False,
+    image_thr=0.0,
+    bbox_file='data/coco/person_detection_results/'
+    'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+val_data_cfg = dict(
+    image_size=[288, 384],
+    heatmap_size=[72, 96],
+    num_output_channels=channel_cfg['num_output_channels'],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    soft_nms=False,
+    nms_thr=1.0,
+    oks_thr=0.9,
+    vis_thr=0.2,
+    bbox_thr=1.0,
+    use_gt_bbox=True,
+    image_thr=0.0,
+    bbox_file='data/coco/person_detection_results/'
+    'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownRandomFlip', flip_prob=0.5),
+    dict(
+        type='TopDownHalfBodyTransform',
+        num_joints_half_body=8,
+        prob_half_body=0.3),
+    dict(
+        type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(type='TopDownGenerateTarget', sigma=3),
+    dict(
+        type='Collect',
+        keys=['img', 'target', 'target_weight'],
+        meta_keys=[
+            'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+            'rotation', 'bbox_score', 'flip_pairs'
+        ]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+            'flip_pairs'
+        ]),
+]
+test_pipeline = val_pipeline
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=32,
+    workers_per_gpu=4,
+    train=dict(
+        type='TopDownCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='TopDownCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=val_data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='TopDownCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
\ No newline at end of file
diff --git a/configs/top_down/lite_hrnet/coco/litehrnet_30_coco_256x192.py b/configs/top_down/lite_hrnet/coco/litehrnet_30_coco_256x192.py
new file mode 100644
index 0000000..a39b504
--- /dev/null
+++ b/configs/top_down/lite_hrnet/coco/litehrnet_30_coco_256x192.py
@@ -0,0 +1,184 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    # warmup=None,
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[170, 200])
+total_epochs = 210
+log_config = dict(
+    interval=50, hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    num_output_channels=17,
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+# model settings
+model = dict(
+    type='TopDown',
+    pretrained=None,
+    backbone=dict(
+        type='LiteHRNet',
+        in_channels=3,
+        extra=dict(
+            stem=dict(  
+                stem_channels=32,
+                out_channels=32,
+                expand_ratio=1),
+            num_stages=3,
+            stages_spec=dict(
+                num_modules=(3, 8, 3),
+                num_branches=(2, 3, 4),
+                num_blocks=(2, 2, 2),
+                module_type=('LITE', 'LITE', 'LITE'),
+                with_fuse=(True, True, True),
+                reduce_ratios=(8, 8, 8),
+                num_channels=(
+                    (40, 80),
+                    (40, 80, 160),
+                    (40, 80, 160, 320),
+                )),
+            with_head=True,
+            )),
+    keypoint_head=dict(
+        type='TopDownSimpleHead',
+        in_channels=40,
+        out_channels=channel_cfg['num_output_channels'],
+        num_deconv_layers=0,
+        extra=dict(final_conv_kernel=1, ),
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        flip_test=True,
+        post_process=True,
+        shift_heatmap=True,
+        unbiased_decoding=False,
+        modulate_kernel=11),
+    loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
+
+data_cfg = dict(
+    image_size=[192, 256],
+    heatmap_size=[48, 64],
+    num_output_channels=channel_cfg['num_output_channels'],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    soft_nms=False,
+    nms_thr=1.0,
+    oks_thr=0.9,
+    vis_thr=0.2,
+    bbox_thr=1.0,
+    use_gt_bbox=False,
+    image_thr=0.0,
+    bbox_file='data/coco/person_detection_results/'
+    'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+val_data_cfg = dict(
+    image_size=[192, 256],
+    heatmap_size=[48, 64],
+    num_output_channels=channel_cfg['num_output_channels'],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    soft_nms=False,
+    nms_thr=1.0,
+    oks_thr=0.9,
+    vis_thr=0.2,
+    bbox_thr=1.0,
+    use_gt_bbox=True,
+    image_thr=0.0,
+    bbox_file='data/coco/person_detection_results/'
+    'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownRandomFlip', flip_prob=0.5),
+    dict(
+        type='TopDownHalfBodyTransform',
+        num_joints_half_body=8,
+        prob_half_body=0.3),
+    dict(
+        type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(type='TopDownGenerateTarget', sigma=2),
+    dict(
+        type='Collect',
+        keys=['img', 'target', 'target_weight'],
+        meta_keys=[
+            'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+            'rotation', 'bbox_score', 'flip_pairs'
+        ]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+            'flip_pairs'
+        ]),
+]
+test_pipeline = val_pipeline
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=64,
+    workers_per_gpu=4,
+    train=dict(
+        type='TopDownCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='TopDownCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=val_data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='TopDownCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
\ No newline at end of file
diff --git a/configs/top_down/lite_hrnet/coco/litehrnet_30_coco_384x288.py b/configs/top_down/lite_hrnet/coco/litehrnet_30_coco_384x288.py
new file mode 100644
index 0000000..259f383
--- /dev/null
+++ b/configs/top_down/lite_hrnet/coco/litehrnet_30_coco_384x288.py
@@ -0,0 +1,184 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    # warmup=None,
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[170, 200])
+total_epochs = 210
+log_config = dict(
+    interval=50, hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    num_output_channels=17,
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+# model settings
+model = dict(
+    type='TopDown',
+    pretrained=None,
+    backbone=dict(
+        type='LiteHRNet',
+        in_channels=3,
+        extra=dict(
+            stem=dict(  
+                stem_channels=32,
+                out_channels=32,
+                expand_ratio=1),
+            num_stages=3,
+            stages_spec=dict(
+                num_modules=(3, 8, 3),
+                num_branches=(2, 3, 4),
+                num_blocks=(2, 2, 2),
+                module_type=('LITE', 'LITE', 'LITE'),
+                with_fuse=(True, True, True),
+                reduce_ratios=(8, 8, 8),
+                num_channels=(
+                    (40, 80),
+                    (40, 80, 160),
+                    (40, 80, 160, 320),
+                )),
+            with_head=True,
+            )),
+    keypoint_head=dict(
+        type='TopDownSimpleHead',
+        in_channels=40,
+        out_channels=channel_cfg['num_output_channels'],
+        num_deconv_layers=0,
+        extra=dict(final_conv_kernel=1, ),
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        flip_test=True,
+        post_process=True,
+        shift_heatmap=True,
+        unbiased_decoding=False,
+        modulate_kernel=11),
+    loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
+
+data_cfg = dict(
+    image_size=[288, 384],
+    heatmap_size=[72, 96],
+    num_output_channels=channel_cfg['num_output_channels'],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    soft_nms=False,
+    nms_thr=1.0,
+    oks_thr=0.9,
+    vis_thr=0.2,
+    bbox_thr=1.0,
+    use_gt_bbox=False,
+    image_thr=0.0,
+    bbox_file='data/coco/person_detection_results/'
+    'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+val_data_cfg = dict(
+    image_size=[288, 384],
+    heatmap_size=[72, 96],
+    num_output_channels=channel_cfg['num_output_channels'],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    soft_nms=False,
+    nms_thr=1.0,
+    oks_thr=0.9,
+    vis_thr=0.2,
+    bbox_thr=1.0,
+    use_gt_bbox=True,
+    image_thr=0.0,
+    bbox_file='data/coco/person_detection_results/'
+    'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownRandomFlip', flip_prob=0.5),
+    dict(
+        type='TopDownHalfBodyTransform',
+        num_joints_half_body=8,
+        prob_half_body=0.3),
+    dict(
+        type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(type='TopDownGenerateTarget', sigma=3),
+    dict(
+        type='Collect',
+        keys=['img', 'target', 'target_weight'],
+        meta_keys=[
+            'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+            'rotation', 'bbox_score', 'flip_pairs'
+        ]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+            'flip_pairs'
+        ]),
+]
+test_pipeline = val_pipeline
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=32,
+    workers_per_gpu=4,
+    train=dict(
+        type='TopDownCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='TopDownCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=val_data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='TopDownCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
\ No newline at end of file
diff --git a/configs/top_down/lite_hrnet/mpii/litehrnet_18_mpii_256x256.py b/configs/top_down/lite_hrnet/mpii/litehrnet_18_mpii_256x256.py
new file mode 100644
index 0000000..96bc07c
--- /dev/null
+++ b/configs/top_down/lite_hrnet/mpii/litehrnet_18_mpii_256x256.py
@@ -0,0 +1,148 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=5, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[170, 200])
+total_epochs = 210
+log_config = dict(
+    interval=50, hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    num_output_channels=16,
+    dataset_joints=16,
+    dataset_channel=list(range(16)),
+    inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+    type='TopDown',
+    pretrained=None,
+    backbone=dict(
+        type='LiteHRNet',
+        in_channels=3,
+        extra=dict(
+            stem=dict(  
+                stem_channels=32,
+                out_channels=32,
+                expand_ratio=1),
+            num_stages=3,
+            stages_spec=dict(
+                num_modules=(2, 4, 2),
+                num_branches=(2, 3, 4),
+                num_blocks=(2, 2, 2),
+                module_type=('LITE', 'LITE', 'LITE'),
+                with_fuse=(True, True, True),
+                reduce_ratios=(8, 8, 8),
+                num_channels=(
+                    (40, 80),
+                    (40, 80, 160),
+                    (40, 80, 160, 320),
+                )),
+            with_head=True,
+            )),
+    keypoint_head=dict(
+        type='TopDownSimpleHead',
+        in_channels=40,
+        out_channels=channel_cfg['num_output_channels'],
+        num_deconv_layers=0,
+        extra=dict(final_conv_kernel=1, ),
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        flip_test=True,
+        post_process=True,
+        shift_heatmap=True,
+        unbiased_decoding=False,
+        modulate_kernel=11),
+    loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
+
+data_cfg = dict(
+    image_size=[256, 256],
+    heatmap_size=[64, 64],
+    num_output_channels=channel_cfg['num_output_channels'],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    use_gt_bbox=True,
+    bbox_file=None,
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownRandomFlip', flip_prob=0.5),
+    dict(
+        type='TopDownGetRandomScaleRotation', rot_factor=30,
+        scale_factor=0.25),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(type='TopDownGenerateTarget', sigma=2),
+    dict(
+        type='Collect',
+        keys=['img', 'target', 'target_weight'],
+        meta_keys=[
+            'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+            'rotation', 'flip_pairs'
+        ]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+data_root = 'data/mpii'
+data = dict(
+    samples_per_gpu=32,
+    workers_per_gpu=2,
+    train=dict(
+        type='TopDownMpiiDataset',
+        ann_file=f'{data_root}/annotations/mpii_train.json',
+        img_prefix=f'{data_root}/images/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='TopDownMpiiDataset',
+        ann_file=f'{data_root}/annotations/mpii_val.json',
+        img_prefix=f'{data_root}/images/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='TopDownMpiiDataset',
+        ann_file=f'{data_root}/annotations/mpii_val.json',
+        img_prefix=f'{data_root}/images/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/top_down/lite_hrnet/mpii/litehrnet_30_mpii_256x256.py b/configs/top_down/lite_hrnet/mpii/litehrnet_30_mpii_256x256.py
new file mode 100644
index 0000000..616fac5
--- /dev/null
+++ b/configs/top_down/lite_hrnet/mpii/litehrnet_30_mpii_256x256.py
@@ -0,0 +1,148 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=5, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[170, 200])
+total_epochs = 210
+log_config = dict(
+    interval=50, hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    num_output_channels=16,
+    dataset_joints=16,
+    dataset_channel=list(range(16)),
+    inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+    type='TopDown',
+    pretrained=None,
+    backbone=dict(
+        type='LiteHRNet',
+        in_channels=3,
+        extra=dict(
+            stem=dict(  
+                stem_channels=32,
+                out_channels=32,
+                expand_ratio=1),
+            num_stages=3,
+            stages_spec=dict(
+                num_modules=(3, 8, 3),
+                num_branches=(2, 3, 4),
+                num_blocks=(2, 2, 2),
+                module_type=('LITE', 'LITE', 'LITE'),
+                with_fuse=(True, True, True),
+                reduce_ratios=(8, 8, 8),
+                num_channels=(
+                    (40, 80),
+                    (40, 80, 160),
+                    (40, 80, 160, 320),
+                )),
+            with_head=True,
+            )),
+    keypoint_head=dict(
+        type='TopDownSimpleHead',
+        in_channels=40,
+        out_channels=channel_cfg['num_output_channels'],
+        num_deconv_layers=0,
+        extra=dict(final_conv_kernel=1, ),
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        flip_test=True,
+        post_process=True,
+        shift_heatmap=True,
+        unbiased_decoding=False,
+        modulate_kernel=11),
+    loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
+
+data_cfg = dict(
+    image_size=[256, 256],
+    heatmap_size=[64, 64],
+    num_output_channels=channel_cfg['num_output_channels'],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    use_gt_bbox=True,
+    bbox_file=None,
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownRandomFlip', flip_prob=0.5),
+    dict(
+        type='TopDownGetRandomScaleRotation', rot_factor=30,
+        scale_factor=0.25),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(type='TopDownGenerateTarget', sigma=2),
+    dict(
+        type='Collect',
+        keys=['img', 'target', 'target_weight'],
+        meta_keys=[
+            'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+            'rotation', 'flip_pairs'
+        ]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+data_root = 'data/mpii'
+data = dict(
+    samples_per_gpu=32,
+    workers_per_gpu=2,
+    train=dict(
+        type='TopDownMpiiDataset',
+        ann_file=f'{data_root}/annotations/mpii_train.json',
+        img_prefix=f'{data_root}/images/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='TopDownMpiiDataset',
+        ann_file=f'{data_root}/annotations/mpii_val.json',
+        img_prefix=f'{data_root}/images/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='TopDownMpiiDataset',
+        ann_file=f'{data_root}/annotations/mpii_val.json',
+        img_prefix=f'{data_root}/images/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 0000000..592345c
--- /dev/null
+++ b/models/__init__.py
@@ -0,0 +1,8 @@
+from .backbones import *  # noqa
+from .builder import (build_backbone, build_head, build_loss, build_neck,
+                      build_posenet)
+
+__all__ = [
+    'build_backbone', 'build_head',
+    'build_loss', 'build_posenet', 'build_neck'
+]
diff --git a/models/backbones/__init__.py b/models/backbones/__init__.py
new file mode 100644
index 0000000..25aad73
--- /dev/null
+++ b/models/backbones/__init__.py
@@ -0,0 +1,5 @@
+from .litehrnet import LiteHRNet
+
+__all__ = [
+    'LiteHRNet'
+]
diff --git a/models/backbones/litehrnet.py b/models/backbones/litehrnet.py
new file mode 100644
index 0000000..1cd5025
--- /dev/null
+++ b/models/backbones/litehrnet.py
@@ -0,0 +1,915 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule,
+                      build_conv_layer, build_norm_layer, constant_init,
+                      normal_init)
+from torch.nn.modules.batchnorm import _BatchNorm
+import torch.utils.checkpoint as cp
+
+import mmcv
+from mmpose.utils import get_root_logger
+from mmpose.models.registry import BACKBONES
+from mmpose.models.backbones.resnet import BasicBlock, Bottleneck
+from mmpose.models.backbones.utils import load_checkpoint, channel_shuffle
+
+
+class SpatialWeighting(nn.Module):
+
+    def __init__(self,
+                 channels,
+                 ratio=16,
+                 conv_cfg=None,
+                 act_cfg=(dict(type='ReLU'), dict(type='Sigmoid'))):
+        super().__init__()
+        if isinstance(act_cfg, dict):
+            act_cfg = (act_cfg, act_cfg)
+        assert len(act_cfg) == 2
+        assert mmcv.is_tuple_of(act_cfg, dict)
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.conv1 = ConvModule(
+            in_channels=channels,
+            out_channels=int(channels / ratio),
+            kernel_size=1,
+            stride=1,
+            conv_cfg=conv_cfg,
+            act_cfg=act_cfg[0])
+        self.conv2 = ConvModule(
+            in_channels=int(channels / ratio),
+            out_channels=channels,
+            kernel_size=1,
+            stride=1,
+            conv_cfg=conv_cfg,
+            act_cfg=act_cfg[1])
+
+    def forward(self, x):
+        out = self.global_avgpool(x)
+        out = self.conv1(out)
+        out = self.conv2(out)
+        return x * out
+
+
+class CrossResolutionWeighting(nn.Module):
+
+    def __init__(self,
+                 channels,
+                 ratio=16,
+                 conv_cfg=None,
+                 norm_cfg=None,
+                 act_cfg=(dict(type='ReLU'), dict(type='Sigmoid'))):
+        super().__init__()
+        if isinstance(act_cfg, dict):
+            act_cfg = (act_cfg, act_cfg)
+        assert len(act_cfg) == 2
+        assert mmcv.is_tuple_of(act_cfg, dict)
+        self.channels = channels
+        total_channel = sum(channels)
+        self.conv1 = ConvModule(
+            in_channels=total_channel,
+            out_channels=int(total_channel / ratio),
+            kernel_size=1,
+            stride=1,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg[0])
+        self.conv2 = ConvModule(
+            in_channels=int(total_channel / ratio),
+            out_channels=total_channel,
+            kernel_size=1,
+            stride=1,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg[1])
+
+    def forward(self, x):
+        mini_size = x[-1].size()[-2:]
+        out = [F.adaptive_avg_pool2d(s, mini_size) for s in x[:-1]] + [x[-1]]
+        out = torch.cat(out, dim=1)
+        out = self.conv1(out)
+        out = self.conv2(out)
+        out = torch.split(out, self.channels, dim=1)
+        out = [
+            s * F.interpolate(a, size=s.size()[-2:], mode='nearest')
+            for s, a in zip(x, out)
+        ]
+        return out
+
+
+class ConditionalChannelWeighting(nn.Module):
+
+    def __init__(self,
+                 in_channels,
+                 stride,
+                 reduce_ratio,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 with_cp=False):
+        super().__init__()
+        self.with_cp = with_cp
+        self.stride = stride
+        assert stride in [1, 2]
+
+        branch_channels = [channel // 2 for channel in in_channels]
+
+        self.cross_resolution_weighting = CrossResolutionWeighting(
+            branch_channels,
+            ratio=reduce_ratio,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg)
+
+        self.depthwise_convs = nn.ModuleList([
+            ConvModule(
+                channel,
+                channel,
+                kernel_size=3,
+                stride=self.stride,
+                padding=1,
+                groups=channel,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=None) for channel in branch_channels
+        ])
+
+        self.spatial_weighting = nn.ModuleList([
+            SpatialWeighting(channels=channel, ratio=4)
+            for channel in branch_channels
+        ])
+
+    def forward(self, x):
+
+        def _inner_forward(x):
+            x = [s.chunk(2, dim=1) for s in x]
+            x1 = [s[0] for s in x]
+            x2 = [s[1] for s in x]
+
+            x2 = self.cross_resolution_weighting(x2)
+            x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
+            x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
+
+            out = [torch.cat([s1, s2], dim=1) for s1, s2 in zip(x1, x2)]
+            out = [channel_shuffle(s, 2) for s in out]
+
+            return out
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        return out
+
+
+class Stem(nn.Module):
+
+    def __init__(self,
+                 in_channels,
+                 stem_channels,
+                 out_channels,
+                 expand_ratio,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 with_cp=False):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.with_cp = with_cp
+
+        self.conv1 = ConvModule(
+            in_channels=in_channels,
+            out_channels=stem_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=dict(type='ReLU'))
+
+        mid_channels = int(round(stem_channels * expand_ratio))
+        branch_channels = stem_channels // 2
+        if stem_channels == self.out_channels:
+            inc_channels = self.out_channels - branch_channels
+        else:
+            inc_channels = self.out_channels - stem_channels
+
+        self.branch1 = nn.Sequential(
+            ConvModule(
+                branch_channels,
+                branch_channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                groups=branch_channels,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            ConvModule(
+                branch_channels,
+                inc_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=dict(type='ReLU')),
+        )
+
+        self.expand_conv = ConvModule(
+            branch_channels,
+            mid_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=dict(type='ReLU'))
+        self.depthwise_conv = ConvModule(
+            mid_channels,
+            mid_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            groups=mid_channels,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=None)
+        self.linear_conv = ConvModule(
+            mid_channels,
+            branch_channels
+            if stem_channels == self.out_channels else stem_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=dict(type='ReLU'))
+
+    def forward(self, x):
+
+        def _inner_forward(x):
+            x = self.conv1(x)
+            x1, x2 = x.chunk(2, dim=1)
+
+            x2 = self.expand_conv(x2)
+            x2 = self.depthwise_conv(x2)
+            x2 = self.linear_conv(x2)
+
+            out = torch.cat((self.branch1(x1), x2), dim=1)
+
+            out = channel_shuffle(out, 2)
+
+            return out
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        return out
+
+
+class IterativeHead(nn.Module):
+
+    def __init__(self, in_channels, conv_cfg=None, norm_cfg=dict(type='BN')):
+        super().__init__()
+        projects = []
+        num_branchs = len(in_channels)
+        self.in_channels = in_channels[::-1]
+
+        for i in range(num_branchs):
+            if i != num_branchs - 1:
+                projects.append(
+                    DepthwiseSeparableConvModule(
+                        in_channels=self.in_channels[i],
+                        out_channels=self.in_channels[i + 1],
+                        kernel_size=3,
+                        stride=1,
+                        padding=1,
+                        norm_cfg=norm_cfg,
+                        act_cfg=dict(type='ReLU'),
+                        dw_act_cfg=None,
+                        pw_act_cfg=dict(type='ReLU')))
+            else:
+                projects.append(
+                    DepthwiseSeparableConvModule(
+                        in_channels=self.in_channels[i],
+                        out_channels=self.in_channels[i],
+                        kernel_size=3,
+                        stride=1,
+                        padding=1,
+                        norm_cfg=norm_cfg,
+                        act_cfg=dict(type='ReLU'),
+                        dw_act_cfg=None,
+                        pw_act_cfg=dict(type='ReLU')))
+        self.projects = nn.ModuleList(projects)
+
+    def forward(self, x):
+        x = x[::-1]
+
+        y = []
+        last_x = None
+        for i, s in enumerate(x):
+            if last_x is not None:
+                last_x = F.interpolate(
+                    last_x,
+                    size=s.size()[-2:],
+                    mode='bilinear',
+                    align_corners=True)
+                s = s + last_x
+            s = self.projects[i](s)
+            y.append(s)
+            last_x = s
+
+        return y[::-1]
+
+
+class ShuffleUnit(nn.Module):
+    """InvertedResidual block for ShuffleNetV2 backbone.
+
+    Args:
+        in_channels (int): The input channels of the block.
+        out_channels (int): The output channels of the block.
+        stride (int): Stride of the 3x3 convolution layer. Default: 1
+        conv_cfg (dict): Config dict for convolution layer.
+            Default: None, which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU').
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride=1,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 with_cp=False):
+        super().__init__()
+        self.stride = stride
+        self.with_cp = with_cp
+
+        branch_features = out_channels // 2
+        if self.stride == 1:
+            assert in_channels == branch_features * 2, (
+                f'in_channels ({in_channels}) should equal to '
+                f'branch_features * 2 ({branch_features * 2}) '
+                'when stride is 1')
+
+        if in_channels != branch_features * 2:
+            assert self.stride != 1, (
+                f'stride ({self.stride}) should not equal 1 when '
+                f'in_channels != branch_features * 2')
+
+        if self.stride > 1:
+            self.branch1 = nn.Sequential(
+                ConvModule(
+                    in_channels,
+                    in_channels,
+                    kernel_size=3,
+                    stride=self.stride,
+                    padding=1,
+                    groups=in_channels,
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=None),
+                ConvModule(
+                    in_channels,
+                    branch_features,
+                    kernel_size=1,
+                    stride=1,
+                    padding=0,
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg),
+            )
+
+        self.branch2 = nn.Sequential(
+            ConvModule(
+                in_channels if (self.stride > 1) else branch_features,
+                branch_features,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg),
+            ConvModule(
+                branch_features,
+                branch_features,
+                kernel_size=3,
+                stride=self.stride,
+                padding=1,
+                groups=branch_features,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            ConvModule(
+                branch_features,
+                branch_features,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg))
+
+    def forward(self, x):
+
+        def _inner_forward(x):
+            if self.stride > 1:
+                out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
+            else:
+                x1, x2 = x.chunk(2, dim=1)
+                out = torch.cat((x1, self.branch2(x2)), dim=1)
+
+            out = channel_shuffle(out, 2)
+
+            return out
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        return out
+
+
+class LiteHRModule(nn.Module):
+
+    def __init__(
+            self,
+            num_branches,
+            num_blocks,
+            in_channels,
+            reduce_ratio,
+            module_type,
+            multiscale_output=False,
+            with_fuse=True,
+            conv_cfg=None,
+            norm_cfg=dict(type='BN'),
+            with_cp=False,
+    ):
+        super().__init__()
+        self._check_branches(num_branches, in_channels)
+
+        self.in_channels = in_channels
+        self.num_branches = num_branches
+
+        self.module_type = module_type
+        self.multiscale_output = multiscale_output
+        self.with_fuse = with_fuse
+        self.norm_cfg = norm_cfg
+        self.conv_cfg = conv_cfg
+        self.with_cp = with_cp
+
+        if self.module_type == 'LITE':
+            self.layers = self._make_weighting_blocks(num_blocks, reduce_ratio)
+        elif self.module_type == 'NAIVE':
+            self.layers = self._make_naive_branches(num_branches, num_blocks)
+        if self.with_fuse:
+            self.fuse_layers = self._make_fuse_layers()
+            self.relu = nn.ReLU()
+
+    def _check_branches(self, num_branches, in_channels):
+        """Check input to avoid ValueError."""
+        if num_branches != len(in_channels):
+            error_msg = f'NUM_BRANCHES({num_branches}) ' \
+                f'!= NUM_INCHANNELS({len(in_channels)})'
+            raise ValueError(error_msg)
+
+    def _make_weighting_blocks(self, num_blocks, reduce_ratio, stride=1):
+        layers = []
+        for i in range(num_blocks):
+            layers.append(
+                ConditionalChannelWeighting(
+                    self.in_channels,
+                    stride=stride,
+                    reduce_ratio=reduce_ratio,
+                    conv_cfg=self.conv_cfg,
+                    norm_cfg=self.norm_cfg,
+                    with_cp=self.with_cp))
+
+        return nn.Sequential(*layers)
+
+    def _make_one_branch(self, branch_index, num_blocks, stride=1):
+        """Make one branch."""
+        layers = []
+        layers.append(
+            ShuffleUnit(
+                self.in_channels[branch_index],
+                self.in_channels[branch_index],
+                stride=stride,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=dict(type='ReLU'),
+                with_cp=self.with_cp))
+        for i in range(1, num_blocks):
+            layers.append(
+                ShuffleUnit(
+                    self.in_channels[branch_index],
+                    self.in_channels[branch_index],
+                    stride=1,
+                    conv_cfg=self.conv_cfg,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg=dict(type='ReLU'),
+                    with_cp=self.with_cp))
+
+        return nn.Sequential(*layers)
+
+    def _make_naive_branches(self, num_branches, num_blocks):
+        """Make branches."""
+        branches = []
+
+        for i in range(num_branches):
+            branches.append(self._make_one_branch(i, num_blocks))
+
+        return nn.ModuleList(branches)
+
+    def _make_fuse_layers(self):
+        """Make fuse layer."""
+        if self.num_branches == 1:
+            return None
+
+        num_branches = self.num_branches
+        in_channels = self.in_channels
+        fuse_layers = []
+        num_out_branches = num_branches if self.multiscale_output else 1
+        for i in range(num_out_branches):
+            fuse_layer = []
+            for j in range(num_branches):
+                if j > i:
+                    fuse_layer.append(
+                        nn.Sequential(
+                            build_conv_layer(
+                                self.conv_cfg,
+                                in_channels[j],
+                                in_channels[i],
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False),
+                            build_norm_layer(self.norm_cfg, in_channels[i])[1],
+                            nn.Upsample(
+                                scale_factor=2**(j - i), mode='nearest')))
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv_downsamples = []
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            conv_downsamples.append(
+                                nn.Sequential(
+                                    build_conv_layer(
+                                        self.conv_cfg,
+                                        in_channels[j],
+                                        in_channels[j],
+                                        kernel_size=3,
+                                        stride=2,
+                                        padding=1,
+                                        groups=in_channels[j],
+                                        bias=False),
+                                    build_norm_layer(self.norm_cfg,
+                                                     in_channels[j])[1],
+                                    build_conv_layer(
+                                        self.conv_cfg,
+                                        in_channels[j],
+                                        in_channels[i],
+                                        kernel_size=1,
+                                        stride=1,
+                                        padding=0,
+                                        bias=False),
+                                    build_norm_layer(self.norm_cfg,
+                                                     in_channels[i])[1]))
+                        else:
+                            conv_downsamples.append(
+                                nn.Sequential(
+                                    build_conv_layer(
+                                        self.conv_cfg,
+                                        in_channels[j],
+                                        in_channels[j],
+                                        kernel_size=3,
+                                        stride=2,
+                                        padding=1,
+                                        groups=in_channels[j],
+                                        bias=False),
+                                    build_norm_layer(self.norm_cfg,
+                                                     in_channels[j])[1],
+                                    build_conv_layer(
+                                        self.conv_cfg,
+                                        in_channels[j],
+                                        in_channels[j],
+                                        kernel_size=1,
+                                        stride=1,
+                                        padding=0,
+                                        bias=False),
+                                    build_norm_layer(self.norm_cfg,
+                                                     in_channels[j])[1],
+                                    nn.ReLU(inplace=True)))
+                    fuse_layer.append(nn.Sequential(*conv_downsamples))
+            fuse_layers.append(nn.ModuleList(fuse_layer))
+
+        return nn.ModuleList(fuse_layers)
+
+    def forward(self, x):
+        """Forward function."""
+        if self.num_branches == 1:
+            return [self.layers[0](x[0])]
+
+        if self.module_type == 'LITE':
+            out = self.layers(x)
+        elif self.module_type == 'NAIVE':
+            for i in range(self.num_branches):
+                x[i] = self.layers[i](x[i])
+            out = x
+
+        if self.with_fuse:
+            out_fuse = []
+            for i in range(len(self.fuse_layers)):
+                y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
+                for j in range(self.num_branches):
+                    if i == j:
+                        y += out[j]
+                    else:
+                        y += self.fuse_layers[i][j](out[j])
+                out_fuse.append(self.relu(y))
+            out = out_fuse
+        elif not self.multiscale_output:
+            out = [out[0]]
+        return out
+
+
+@BACKBONES.register_module()
+class LiteHRNet(nn.Module):
+    """Lite-HRNet backbone.
+
+    `High-Resolution Representations for Labeling Pixels and Regions
+    <https://arxiv.org/abs/1904.04514>`_
+
+    Args:
+        extra (dict): detailed configuration for each stage of HRNet.
+        in_channels (int): Number of input image channels. Default: 3.
+        conv_cfg (dict): dictionary to construct and config conv layer.
+        norm_cfg (dict): dictionary to construct and config norm layer.
+        norm_eval (bool): Whether to set norm layers to eval mode, namely,
+            freeze running stats (mean and var). Note: Effect on Batch Norm
+            and its variants only. Default: False
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed.
+        zero_init_residual (bool): whether to use zero init for last norm layer
+            in resblocks to let them behave as identity.
+
+    Example:
+        >>> from mmpose.models import HRNet
+        >>> import torch
+        >>> extra = dict(
+        >>>     stage1=dict(
+        >>>         num_modules=1,
+        >>>         num_branches=1,
+        >>>         block='BOTTLENECK',
+        >>>         num_blocks=(4, ),
+        >>>         num_channels=(64, )),
+        >>>     stage2=dict(
+        >>>         num_modules=1,
+        >>>         num_branches=2,
+        >>>         block='BASIC',
+        >>>         num_blocks=(4, 4),
+        >>>         num_channels=(32, 64)),
+        >>>     stage3=dict(
+        >>>         num_modules=4,
+        >>>         num_branches=3,
+        >>>         block='BASIC',
+        >>>         num_blocks=(4, 4, 4),
+        >>>         num_channels=(32, 64, 128)),
+        >>>     stage4=dict(
+        >>>         num_modules=3,
+        >>>         num_branches=4,
+        >>>         block='BASIC',
+        >>>         num_blocks=(4, 4, 4, 4),
+        >>>         num_channels=(32, 64, 128, 256)))
+        >>> self = HRNet(extra, in_channels=1)
+        >>> self.eval()
+        >>> inputs = torch.rand(1, 1, 32, 32)
+        >>> level_outputs = self.forward(inputs)
+        >>> for level_out in level_outputs:
+        ...     print(tuple(level_out.shape))
+        (1, 32, 8, 8)
+        (1, 64, 4, 4)
+        (1, 128, 2, 2)
+        (1, 256, 1, 1)
+    """
+
+    def __init__(self,
+                 extra,
+                 in_channels=3,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 norm_eval=False,
+                 with_cp=False,
+                 zero_init_residual=False):
+        super().__init__()
+        self.extra = extra
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.norm_eval = norm_eval
+        self.with_cp = with_cp
+        self.zero_init_residual = zero_init_residual
+
+        self.stem = Stem(
+            in_channels,
+            stem_channels=self.extra['stem']['stem_channels'],
+            out_channels=self.extra['stem']['out_channels'],
+            expand_ratio=self.extra['stem']['expand_ratio'],
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg)
+
+        self.num_stages = self.extra['num_stages']
+        self.stages_spec = self.extra['stages_spec']
+
+        num_channels_last = [
+            self.stem.out_channels,
+        ]
+        for i in range(self.num_stages):
+            num_channels = self.stages_spec['num_channels'][i]
+            num_channels = [num_channels[i] for i in range(len(num_channels))]
+            setattr(
+                self, 'transition{}'.format(i),
+                self._make_transition_layer(num_channels_last, num_channels))
+
+            stage, num_channels_last = self._make_stage(
+                self.stages_spec, i, num_channels, multiscale_output=True)
+            setattr(self, 'stage{}'.format(i), stage)
+
+        self.with_head = self.extra['with_head']
+        if self.with_head:
+            self.head_layer = IterativeHead(
+                in_channels=num_channels_last,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+            )
+
+    def _make_transition_layer(self, num_channels_pre_layer,
+                               num_channels_cur_layer):
+        """Make transition layer."""
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.Sequential(
+                            build_conv_layer(
+                                self.conv_cfg,
+                                num_channels_pre_layer[i],
+                                num_channels_pre_layer[i],
+                                kernel_size=3,
+                                stride=1,
+                                padding=1,
+                                groups=num_channels_pre_layer[i],
+                                bias=False),
+                            build_norm_layer(self.norm_cfg,
+                                             num_channels_pre_layer[i])[1],
+                            build_conv_layer(
+                                self.conv_cfg,
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False),
+                            build_norm_layer(self.norm_cfg,
+                                             num_channels_cur_layer[i])[1],
+                            nn.ReLU()))
+                else:
+                    transition_layers.append(None)
+            else:
+                conv_downsamples = []
+                for j in range(i + 1 - num_branches_pre):
+                    in_channels = num_channels_pre_layer[-1]
+                    out_channels = num_channels_cur_layer[i] \
+                        if j == i - num_branches_pre else in_channels
+                    conv_downsamples.append(
+                        nn.Sequential(
+                            build_conv_layer(
+                                self.conv_cfg,
+                                in_channels,
+                                in_channels,
+                                kernel_size=3,
+                                stride=2,
+                                padding=1,
+                                groups=in_channels,
+                                bias=False),
+                            build_norm_layer(self.norm_cfg, in_channels)[1],
+                            build_conv_layer(
+                                self.conv_cfg,
+                                in_channels,
+                                out_channels,
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False),
+                            build_norm_layer(self.norm_cfg, out_channels)[1],
+                            nn.ReLU()))
+                transition_layers.append(nn.Sequential(*conv_downsamples))
+
+        return nn.ModuleList(transition_layers)
+
+    def _make_stage(self,
+                    stages_spec,
+                    stage_index,
+                    in_channels,
+                    multiscale_output=True):
+        num_modules = stages_spec['num_modules'][stage_index]
+        num_branches = stages_spec['num_branches'][stage_index]
+        num_blocks = stages_spec['num_blocks'][stage_index]
+        reduce_ratio = stages_spec['reduce_ratios'][stage_index]
+        with_fuse = stages_spec['with_fuse'][stage_index]
+        module_type = stages_spec['module_type'][stage_index]
+
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multiscale_output and i == num_modules - 1:
+                reset_multiscale_output = False
+            else:
+                reset_multiscale_output = True
+
+            modules.append(
+                LiteHRModule(
+                    num_branches,
+                    num_blocks,
+                    in_channels,
+                    reduce_ratio,
+                    module_type,
+                    multiscale_output=reset_multiscale_output,
+                    with_fuse=with_fuse,
+                    conv_cfg=self.conv_cfg,
+                    norm_cfg=self.norm_cfg,
+                    with_cp=self.with_cp))
+            in_channels = modules[-1].in_channels
+
+        return nn.Sequential(*modules), in_channels
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    normal_init(m, std=0.001)
+                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                    constant_init(m, 1)
+
+            if self.zero_init_residual:
+                for m in self.modules():
+                    if isinstance(m, Bottleneck):
+                        constant_init(m.norm3, 0)
+                    elif isinstance(m, BasicBlock):
+                        constant_init(m.norm2, 0)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+        """Forward function."""
+        x = self.stem(x)
+
+        y_list = [x]
+        for i in range(self.num_stages):
+            x_list = []
+            transition = getattr(self, 'transition{}'.format(i))
+            for j in range(self.stages_spec['num_branches'][i]):
+                if transition[j]:
+                    if j >= len(y_list):
+                        x_list.append(transition[j](y_list[-1]))
+                    else:
+                        x_list.append(transition[j](y_list[j]))
+                else:
+                    x_list.append(y_list[j])
+            y_list = getattr(self, 'stage{}'.format(i))(x_list)
+
+        x = y_list
+        if self.with_head:
+            x = self.head_layer(x)
+
+        return [x[0]]
+
+    def train(self, mode=True):
+        """Convert the model into training mode."""
+        super().train(mode)
+        if mode and self.norm_eval:
+            for m in self.modules():
+                if isinstance(m, _BatchNorm):
+                    m.eval()
diff --git a/models/builder.py b/models/builder.py
new file mode 100644
index 0000000..b3331f2
--- /dev/null
+++ b/models/builder.py
@@ -0,0 +1,52 @@
+from mmcv.utils import build_from_cfg
+from torch import nn
+
+from mmpose.models.registry import BACKBONES, HEADS, LOSSES, NECKS, POSENETS
+
+
+def build(cfg, registry, default_args=None):
+    """Build a module.
+
+    Args:
+        cfg (dict, list[dict]): The config of modules, it is either a dict
+            or a list of configs.
+        registry (:obj:`Registry`): A registry the module belongs to.
+        default_args (dict, optional): Default arguments to build the module.
+            Defaults to None.
+
+    Returns:
+        nn.Module: A built nn module.
+    """
+
+    if isinstance(cfg, list):
+        modules = [
+            build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
+        ]
+        return nn.Sequential(*modules)
+
+    return build_from_cfg(cfg, registry, default_args)
+
+
+def build_backbone(cfg):
+    """Build backbone."""
+    return build(cfg, BACKBONES)
+
+
+def build_neck(cfg):
+    """Build neck."""
+    return build(cfg, NECKS)
+
+
+def build_head(cfg):
+    """Build head."""
+    return build(cfg, HEADS)
+
+
+def build_loss(cfg):
+    """Build loss."""
+    return build(cfg, LOSSES)
+
+
+def build_posenet(cfg):
+    """Build posenet."""
+    return build(cfg, POSENETS)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b5b5d97
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+-r requirements/build.txt
+-r requirements/runtime.txt
+-r requirements/tests.txt
+-r requirements/optional.txt
diff --git a/requirements/build.txt b/requirements/build.txt
new file mode 100644
index 0000000..a956694
--- /dev/null
+++ b/requirements/build.txt
@@ -0,0 +1,3 @@
+# These must be installed before building mmpose
+numpy
+torch>=1.3
diff --git a/requirements/docs.txt b/requirements/docs.txt
new file mode 100644
index 0000000..89fbf86
--- /dev/null
+++ b/requirements/docs.txt
@@ -0,0 +1,4 @@
+recommonmark
+sphinx
+sphinx_markdown_tables
+sphinx_rtd_theme
diff --git a/requirements/optional.txt b/requirements/optional.txt
new file mode 100644
index 0000000..dc0d5f5
--- /dev/null
+++ b/requirements/optional.txt
@@ -0,0 +1,5 @@
+albumentations>=0.3.2
+onnx
+onnxruntime
+poseval@git+https://github.com/svenkreiss/poseval.git
+smplx
diff --git a/requirements/readthedocs.txt b/requirements/readthedocs.txt
new file mode 100644
index 0000000..55b295e
--- /dev/null
+++ b/requirements/readthedocs.txt
@@ -0,0 +1,8 @@
+mmcv-full
+munkres
+poseval@git+https://github.com/svenkreiss/poseval.git
+scipy
+titlecase
+torch
+torchvision
+xtcocotools>=1.6
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
new file mode 100644
index 0000000..bfc4fd8
--- /dev/null
+++ b/requirements/runtime.txt
@@ -0,0 +1,14 @@
+chumpy
+dataclasses; python_version == '3.6'
+json_tricks
+matplotlib
+munkres
+numpy
+opencv-python
+pillow
+scipy
+torchvision
+xtcocotools>=1.6
+future
+tensorboard
+mmpose
diff --git a/requirements/tests.txt b/requirements/tests.txt
new file mode 100644
index 0000000..003b25c
--- /dev/null
+++ b/requirements/tests.txt
@@ -0,0 +1,9 @@
+coverage
+flake8
+interrogate
+isort==4.3.21
+pytest
+pytest-runner
+smplx
+xdoctest >= 0.10.0
+yapf
diff --git a/resources/litehrnet_block.png b/resources/litehrnet_block.png
new file mode 100644
index 0000000..b8ee8a7
Binary files /dev/null and b/resources/litehrnet_block.png differ
diff --git a/tools/dist_test.sh b/tools/dist_test.sh
new file mode 100755
index 0000000..3c74ec6
--- /dev/null
+++ b/tools/dist_test.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+CONFIG=$1
+CHECKPOINT=$2
+GPUS=$3
+PORT=${PORT:-29500}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+    $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
diff --git a/tools/dist_train.sh b/tools/dist_train.sh
new file mode 100755
index 0000000..5b43fff
--- /dev/null
+++ b/tools/dist_train.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+CONFIG=$1
+GPUS=$2
+PORT=${PORT:-29500}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+    $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
diff --git a/tools/summary_network.py b/tools/summary_network.py
new file mode 100644
index 0000000..f6a9690
--- /dev/null
+++ b/tools/summary_network.py
@@ -0,0 +1,70 @@
+import argparse
+# import tensorwatch as tw
+
+from mmcv import Config
+# from mmcv.cnn import get_model_complexity_info
+from tools.torchstat_utils import model_stats
+
+from mmpose.models import build_posenet
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train a segmentor')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument(
+        '--shape',
+        type=int,
+        nargs='+',
+        default=[2048, 1024],
+        help='input image size')
+    parser.add_argument(
+        '--with-head',
+        action='store_true',
+        help='whether to compute the complexity of the deconv head.')
+    parser.add_argument('--out-file', type=str,
+                        help='Output file name') 
+    args = parser.parse_args()
+    return args
+
+
+def main():
+
+    args = parse_args()
+
+    if len(args.shape) == 1:
+        input_shape = (1, 3, args.shape[0], args.shape[0])
+    elif len(args.shape) == 2:
+        input_shape = (1, 3, ) + tuple(args.shape)
+    else:
+        raise ValueError('invalid input shape')
+
+    cfg = Config.fromfile(args.config)
+    model = build_posenet(cfg.model)
+    model.eval()
+
+    if args.with_head and hasattr(model, 'forward_with_head'):
+        model.forward = model.forward_with_head
+    elif not args.with_head and hasattr(model, 'forward_without_head'):
+        model.forward = model.forward_without_head
+    else:
+        raise NotImplementedError(
+            'FLOPs counter is currently not currently supported with {}'.
+            format(model.__class__.__name__))
+
+    df = model_stats(model, input_shape)
+    print(df)
+    if args.out_file:
+        df.to_html(args.out_file + '.html')
+        df.to_csv(args.out_file + '.csv')
+
+    # flops, params = get_model_complexity_info(model, input_shape)
+    # split_line = '=' * 30
+    # print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format(
+    #     split_line, input_shape, flops, params))
+    print('!!!Please be cautious if you use the results in papers. '
+          'You may need to check if all ops are supported and verify that the '
+          'flops computation is correct.')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/test.py b/tools/test.py
new file mode 100644
index 0000000..53fd1bc
--- /dev/null
+++ b/tools/test.py
@@ -0,0 +1,141 @@
+import argparse
+import os
+import os.path as osp
+
+import mmcv
+import torch
+from mmcv import Config, DictAction
+from mmcv.cnn import fuse_conv_bn
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import get_dist_info, init_dist, load_checkpoint
+
+from mmpose.apis import multi_gpu_test, single_gpu_test
+from mmpose.core import wrap_fp16_model
+from mmpose.datasets import build_dataloader, build_dataset
+from models import build_posenet
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='mmpose test model')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument('--out', help='output result file')
+    parser.add_argument(
+        '--fuse-conv-bn',
+        action='store_true',
+        help='Whether to fuse conv and bn, this will slightly increase'
+        'the inference speed')
+    parser.add_argument(
+        '--eval',
+        default=None,
+        nargs='+',
+        help='evaluation metric, which depends on the dataset,'
+        ' e.g., "mAP" for MSCOCO')
+    parser.add_argument(
+        '--gpu_collect',
+        action='store_true',
+        help='whether to use gpu to collect results')
+    parser.add_argument('--tmpdir', help='tmp dir for writing some results')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        default={},
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. For example, '
+        "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
+    args = parser.parse_args()
+    if 'LOCAL_RANK' not in os.environ:
+        os.environ['LOCAL_RANK'] = str(args.local_rank)
+    return args
+
+
+def merge_configs(cfg1, cfg2):
+    # Merge cfg2 into cfg1
+    # Overwrite cfg1 if repeated, ignore if value is None.
+    cfg1 = {} if cfg1 is None else cfg1.copy()
+    cfg2 = {} if cfg2 is None else cfg2
+    for k, v in cfg2.items():
+        if v:
+            cfg1[k] = v
+    return cfg1
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+    cfg.model.pretrained = None
+    cfg.data.test.test_mode = True
+
+    args.work_dir = osp.join('./work_dirs',
+                             osp.splitext(osp.basename(args.config))[0])
+    mmcv.mkdir_or_exist(osp.abspath(args.work_dir))
+
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == 'none':
+        distributed = False
+    else:
+        distributed = True
+        init_dist(args.launcher, **cfg.dist_params)
+
+    # build the dataloader
+    dataset = build_dataset(cfg.data.test, dict(test_mode=True))
+    dataloader_setting = dict(
+        samples_per_gpu=1,
+        workers_per_gpu=cfg.data.get('workers_per_gpu', 1),
+        dist=distributed,
+        shuffle=False,
+        drop_last=False)
+    dataloader_setting = dict(dataloader_setting,
+                              **cfg.data.get('test_dataloader', {}))
+    data_loader = build_dataloader(dataset, **dataloader_setting)
+
+    # build the model and load checkpoint
+    model = build_posenet(cfg.model)
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        wrap_fp16_model(model)
+    load_checkpoint(model, args.checkpoint, map_location='cpu')
+
+    if args.fuse_conv_bn:
+        model = fuse_conv_bn(model)
+
+    if not distributed:
+        model = MMDataParallel(model, device_ids=[0])
+        outputs = single_gpu_test(model, data_loader)
+    else:
+        model = MMDistributedDataParallel(
+            model.cuda(),
+            device_ids=[torch.cuda.current_device()],
+            broadcast_buffers=False)
+        outputs = multi_gpu_test(model, data_loader, args.tmpdir,
+                                 args.gpu_collect)
+
+    rank, _ = get_dist_info()
+    eval_config = cfg.get('evaluation', {})
+    eval_config = merge_configs(eval_config, dict(metric=args.eval))
+
+    if rank == 0:
+        if args.out:
+            print(f'\nwriting results to {args.out}')
+            mmcv.dump(outputs, args.out)
+
+        print(dataset.evaluate(outputs, args.work_dir, **eval_config))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/torchstat/README.md b/tools/torchstat/README.md
new file mode 100644
index 0000000..6c6a4a9
--- /dev/null
+++ b/tools/torchstat/README.md
@@ -0,0 +1,7 @@
+# Credits
+
+Code in this folder is almost as-is from torchstat repository located at https://github.com/Swall0w/torchstat.
+
+Additional merges are from:
+- https://github.com/kenshohara/torchstat
+- https://github.com/lyakaap/torchstat
\ No newline at end of file
diff --git a/tools/torchstat/__init__.py b/tools/torchstat/__init__.py
new file mode 100644
index 0000000..980f561
--- /dev/null
+++ b/tools/torchstat/__init__.py
@@ -0,0 +1,5 @@
+from .analyzer import analyze
+
+__all__ = [
+    'analyze',
+]
\ No newline at end of file
diff --git a/tools/torchstat/analyzer.py b/tools/torchstat/analyzer.py
new file mode 100644
index 0000000..5b03310
--- /dev/null
+++ b/tools/torchstat/analyzer.py
@@ -0,0 +1,176 @@
+import time
+from collections import OrderedDict
+from typing import Dict, Sequence
+import functools
+import itertools
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from .compute_madd import compute_madd
+from .compute_flops import compute_flops
+from .compute_memory import compute_memory
+from .stat_tree import StatTree, StatNode
+from .reporter import report_format
+
+
+class ModuleStats:
+
+    def __init__(self, name) -> None:
+        self.name = name
+        self.start_time = 0.0
+        self.end_time = 0.0
+        self.inference_memory = 0
+        self.input_shape: Sequence[int] = []
+        self.output_shape: Sequence[int] = []
+        self.MAdd = 0
+        self.duration = 0.0
+        self.Flops = 0
+        self.Memory = 0, 0
+        self.parameter_quantity = 0
+        self.done = False
+
+
+def print_report(self, collected_nodes):
+    report = report_format(self.collected_nodes)
+    print(report)
+
+
+def analyze(model: nn.Module, input_size, query_granularity: int):
+    assert isinstance(model, nn.Module)
+    assert isinstance(input_size, (list, tuple))
+
+    pre_hooks, post_hooks = [], []
+    stats: OrderedDict[str, ModuleStats] = OrderedDict()
+
+    try:
+        _for_leaf(model, _register_hooks, pre_hooks, post_hooks, stats)
+
+        x = torch.rand(*input_size)  # add module duration time
+        x = x.to(next(model.parameters()).device)
+        model.eval()
+        model(x)
+
+        stat_tree = _convert_leaf_modules_to_stat_tree(stats)
+
+        return stat_tree.get_collected_stat_nodes(query_granularity)
+
+    finally:
+        for stat in stats.values():
+            stat.done = True
+        for hook in itertools.chain(pre_hooks, post_hooks):
+            hook.remove()
+
+
+def _for_leaf(model, fn, *args):
+    for name, module in model.named_modules():
+        if len(list(module.children())) == 0:
+            fn(name, module, *args)
+
+
+def _register_hooks(name: str, module: nn.Module, pre_hooks, post_hooks,
+                    stats):
+    assert isinstance(module, nn.Module) and len(list(module.children())) == 0
+
+    if name in stats:
+        return
+
+    module_stats = ModuleStats(name)
+    stats[name] = module_stats
+
+    post_hook = module.register_forward_hook(
+        functools.partial(_forward_post_hook, module_stats))
+    post_hooks.append(post_hook)
+
+    pre_hook = module.register_forward_pre_hook(
+        functools.partial(_forward_pre_hook, module_stats))
+    pre_hooks.append(pre_hook)
+
+
+def _flatten(x):
+    """Flattens the tree of tensors to flattened sequence of tensors"""
+    if isinstance(x, torch.Tensor):
+        return [x]
+    if isinstance(x, Sequence):
+        res = []
+        for xi in x:
+            res += _flatten(xi)
+        return res
+    return []
+
+
+def _forward_pre_hook(module_stats: ModuleStats, module: nn.Module, input):
+    assert not module_stats.done
+    module_stats.start_time = time.time()
+
+
+def _forward_post_hook(module_stats: ModuleStats, module: nn.Module, input,
+                       output):
+    assert not module_stats.done
+
+    module_stats.end_time = time.time()
+    module_stats.duration = module_stats.end_time - module_stats.start_time
+
+    inputs, outputs = _flatten(input), _flatten(output)
+    module_stats.input_shape = inputs[0].size()
+    module_stats.output_shape = outputs[0].size()
+
+    parameter_quantity = 0
+    # iterate through parameters and count num params
+    for name, p in module.named_parameters():
+        parameter_quantity += (0 if p is None else torch.numel(p.data))
+    module_stats.parameter_quantity = parameter_quantity
+
+    inference_memory = 1
+    for oi in outputs:
+        for s in oi.size():
+            inference_memory *= s
+    # memory += parameters_number  # exclude parameter memory
+    inference_memory = inference_memory * 4 / (1024**2)  # shown as MB unit
+    module_stats.inference_memory = inference_memory
+    module_stats.MAdd = compute_madd(module, inputs, outputs)
+    module_stats.Flops = compute_flops(module, inputs, outputs)
+    module_stats.Memory = compute_memory(module, inputs, outputs)
+
+    return output
+
+
+def get_parent_node(root_node, stat_node_name):
+    assert isinstance(root_node, StatNode)
+
+    node = root_node
+    names = stat_node_name.split('.')
+    for i in range(len(names) - 1):
+        node_name = '.'.join(names[0:i + 1])
+        child_index = node.find_child_index(node_name)
+        assert child_index != -1
+        node = node.children[child_index]
+    return node
+
+
+def _convert_leaf_modules_to_stat_tree(leaf_modules):
+    assert isinstance(leaf_modules, OrderedDict)
+
+    create_index = 1
+    root_node = StatNode(name='root', parent=None)
+    for name, module_stats in leaf_modules.items():
+        names = name.split('.')
+        for i in range(len(names)):
+            create_index += 1
+            stat_node_name = '.'.join(names[0:i + 1])
+            parent_node = get_parent_node(root_node, stat_node_name)
+            node = StatNode(name=stat_node_name, parent=parent_node)
+            parent_node.add_child(node)
+            if i == len(names) - 1:  # leaf module itself
+                input_shape = module_stats.input_shape
+                output_shape = module_stats.output_shape
+                node.input_shape = input_shape
+                node.output_shape = output_shape
+                node.parameter_quantity = module_stats.parameter_quantity
+                node.inference_memory = module_stats.inference_memory
+                node.MAdd = module_stats.MAdd
+                node.Flops = module_stats.Flops
+                node.duration = module_stats.duration
+                node.Memory = module_stats.Memory
+    return StatTree(root_node)
diff --git a/tools/torchstat/compute_flops.py b/tools/torchstat/compute_flops.py
new file mode 100644
index 0000000..e9910da
--- /dev/null
+++ b/tools/torchstat/compute_flops.py
@@ -0,0 +1,180 @@
+import torch.nn as nn
+import torch
+import numpy as np
+import math
+
+
+def compute_flops(module, inp, out):
+    if isinstance(module, nn.Conv2d):
+        return compute_Conv2d_flops(module, inp[0], out[0])
+    elif type(module).__name__ == 'ConvFunction':
+        return compute_Conv2d_flops(module, inp[0], out[0])
+    elif type(module).__name__ == 'SplitKernelConvFunction':
+        return compute_Conv2d_flops(module, inp[0], out[0])
+    elif isinstance(module, nn.ConvTranspose2d):
+        return compute_ConvTranspose2d_flops(module, inp[0], out[0])
+    elif isinstance(module, nn.BatchNorm2d):
+        return compute_BatchNorm2d_flops(module, inp[0], out[0])
+    elif isinstance(module, (nn.AvgPool2d, nn.MaxPool2d)):
+        return compute_Pool2d_flops(module, inp[0], out[0])
+    elif isinstance(module, (nn.AdaptiveAvgPool2d, nn.AdaptiveMaxPool2d)):
+        return compute_adaptivepool_flops(module, inp[0], out[0])
+    elif isinstance(module,
+                    (nn.ReLU, nn.ReLU6, nn.PReLU, nn.ELU, nn.LeakyReLU)):
+        return compute_ReLU_flops(module, inp[0], out[0])
+    elif isinstance(module, nn.Upsample):
+        return compute_Upsample_flops(module, inp[0], out[0])
+    elif isinstance(module, nn.Linear):
+        return compute_Linear_flops(module, inp[0], out[0])
+    elif type(module).__name__ == 'MatMul':
+        return compute_matmul_flops(module, inp, out)
+    else:
+        #print(f"[Flops]: {type(module).__name__} is not supported!")
+        return 0
+    pass
+
+
+def compute_matmul_flops(moudle, inp, out):
+    x, y = inp
+    batch_size = x.size(0)
+    _, l, m = x.size()
+    _, _, n = y.size()
+    return batch_size * 2 * l * m * n
+
+
+def compute_Conv2d_flops(module, inp, out):
+    # Can have multiple inputs, getting the first one
+    # assert isinstance(module, nn.Conv2d)
+    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
+
+    batch_size = inp.size()[0]
+    in_c = inp.size()[1]
+    k_h, k_w = module.kernel_size
+    out_c, out_h, out_w = out.size()[1:]
+    groups = module.groups
+
+    filters_per_channel = out_c // groups
+    conv_per_position_flops = k_h * k_w * in_c * filters_per_channel
+    active_elements_count = batch_size * out_h * out_w
+
+    total_conv_flops = conv_per_position_flops * active_elements_count
+
+    bias_flops = 0
+    if module.bias is not None:
+        bias_flops = out_c * active_elements_count
+
+    total_flops = total_conv_flops + bias_flops
+    return total_flops
+
+
+def compute_ConvTranspose2d_flops(module, inp, out):
+    # Can have multiple inputs, getting the first one
+    assert isinstance(module, nn.ConvTranspose2d)
+    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
+
+    batch_size = inp.size()[0]
+    in_h, in_w = inp.size()[2:]
+
+    k_h, k_w = module.kernel_size
+    in_c = module.in_channels
+    out_c = module.out_channels
+    groups = module.groups
+
+    filters_per_channel = out_c // groups
+    conv_per_position_flops = k_h * k_w * in_c * filters_per_channel
+    active_elements_count = batch_size * in_h * in_w
+
+    total_conv_flops = conv_per_position_flops * active_elements_count
+
+    bias_flops = 0
+    if module.bias is not None:
+        out_h, out_w = out.size()[2:]
+        bias_flops = out_c * batch_size * out_h * out_w
+
+    total_flops = total_conv_flops + bias_flops
+
+    return total_flops
+
+
+def compute_adaptivepool_flops(module, input, output):
+    # credits: https://github.com/xternalz/SDPoint/blob/master/utils/flops.py
+    batch_size = input.size(0)
+    input_planes = input.size(1)
+    input_height = input.size(2)
+    input_width = input.size(3)
+
+    flops = 0
+    for i in range(output.size(2)):
+        y_start = int(math.floor(float(i * input_height) / output.size(2)))
+        y_end = int(math.ceil(float((i + 1) * input_height) / output.size(2)))
+        for j in range(output.size(3)):
+            x_start = int(math.floor(float(j * input_width) / output.size(3)))
+            x_end = int(
+                math.ceil(float((j + 1) * input_width) / output.size(3)))
+
+            flops += batch_size * input_planes * (y_end - y_start + 1) * (
+                x_end - x_start + 1)
+    return flops
+
+
+def compute_BatchNorm2d_flops(module, inp, out):
+    assert isinstance(module, nn.BatchNorm2d)
+    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
+    in_c, in_h, in_w = inp.size()[1:]
+    batch_flops = np.prod(inp.shape)
+    if module.affine:
+        batch_flops *= 2
+    return batch_flops
+
+
+def compute_ReLU_flops(module, inp, out):
+    assert isinstance(module,
+                      (nn.ReLU, nn.ReLU6, nn.PReLU, nn.ELU, nn.LeakyReLU))
+    batch_size = inp.size()[0]
+    active_elements_count = batch_size
+
+    for s in inp.size()[1:]:
+        active_elements_count *= s
+
+    return active_elements_count
+
+
+def compute_Pool2d_flops(module, input, out):
+    batch_size = input.size(0)
+    input_planes = input.size(1)
+    input_height = input.size(2)
+    input_width = input.size(3)
+    kernel_size = ('int' in str(type(module.kernel_size))) and [
+        module.kernel_size, module.kernel_size
+    ] or module.kernel_size
+    kernel_ops = kernel_size[0] * kernel_size[1]
+    stride = ('int' in str(type(
+        module.stride))) and [module.stride, module.stride] or module.stride
+    padding = ('int' in str(type(module.padding))) and [
+        module.padding, module.padding
+    ] or module.padding
+
+    output_width = math.floor((input_width + 2 * padding[0] - kernel_size[0]) /
+                              float(stride[0]) + 1)
+    output_height = math.floor(
+        (input_height + 2 * padding[1] - kernel_size[1]) / float(stride[0]) +
+        1)
+    return batch_size * input_planes * output_width * output_height * kernel_ops
+
+
+def compute_Linear_flops(module, inp, out):
+    assert isinstance(module, nn.Linear)
+    assert len(inp.size()) == 2 and len(out.size()) == 2
+    batch_size = inp.size()[0]
+    return batch_size * inp.size()[1] * out.size()[1]
+
+
+def compute_Upsample_flops(module, inp, out):
+    assert isinstance(module, nn.Upsample)
+    output_size = out[0]
+    batch_size = inp.size()[0]
+    output_elements_count = batch_size
+    for s in output_size.shape[1:]:
+        output_elements_count *= s
+
+    return output_elements_count
diff --git a/tools/torchstat/compute_madd.py b/tools/torchstat/compute_madd.py
new file mode 100644
index 0000000..ddcb1af
--- /dev/null
+++ b/tools/torchstat/compute_madd.py
@@ -0,0 +1,162 @@
+"""
+compute Multiply-Adds(MAdd) of each leaf module
+"""
+
+import torch.nn as nn
+
+
+def compute_Conv2d_madd(module, inp, out):
+    assert isinstance(module, nn.Conv2d)
+    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
+
+    in_c = inp.size()[1]
+    k_h, k_w = module.kernel_size
+    out_c, out_h, out_w = out.size()[1:]
+    groups = module.groups
+
+    # ops per output element
+    kernel_mul = k_h * k_w * (in_c // groups)
+    kernel_add = kernel_mul - 1 + (0 if module.bias is None else 1)
+
+    kernel_mul_group = kernel_mul * out_h * out_w * (out_c // groups)
+    kernel_add_group = kernel_add * out_h * out_w * (out_c // groups)
+
+    total_mul = kernel_mul_group * groups
+    total_add = kernel_add_group * groups
+
+    return total_mul + total_add
+
+
+def compute_ConvTranspose2d_madd(module, inp, out):
+    assert isinstance(module, nn.ConvTranspose2d)
+    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
+
+    in_c, in_h, in_w = inp.size()[1:]
+    k_h, k_w = module.kernel_size
+    out_c, out_h, out_w = out.size()[1:]
+    groups = module.groups
+
+    kernel_mul = k_h * k_w * (in_c // groups)
+    kernel_add = kernel_mul - 1 + (0 if module.bias is None else 1)
+
+    kernel_mul_group = kernel_mul * in_h * in_w * (out_c // groups)
+    kernel_add_group = kernel_add * in_h * in_w * (out_c // groups)
+
+    total_mul = kernel_mul_group * groups
+    total_add = kernel_add_group * groups
+
+    return total_mul + total_add
+
+
+def compute_BatchNorm2d_madd(module, inp, out):
+    assert isinstance(module, nn.BatchNorm2d)
+    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
+
+    in_c, in_h, in_w = inp.size()[1:]
+
+    # 1. sub mean
+    # 2. div standard deviation
+    # 3. mul alpha
+    # 4. add beta
+    return 4 * in_c * in_h * in_w
+
+
+def compute_MaxPool2d_madd(module, inp, out):
+    assert isinstance(module, nn.MaxPool2d)
+    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
+
+    if isinstance(module.kernel_size, (tuple, list)):
+        k_h, k_w = module.kernel_size
+    else:
+        k_h, k_w = module.kernel_size, module.kernel_size
+    out_c, out_h, out_w = out.size()[1:]
+
+    return (k_h * k_w - 1) * out_h * out_w * out_c
+
+
+def compute_AvgPool2d_madd(module, inp, out):
+    assert isinstance(module, nn.AvgPool2d)
+    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
+
+    if isinstance(module.kernel_size, (tuple, list)):
+        k_h, k_w = module.kernel_size
+    else:
+        k_h, k_w = module.kernel_size, module.kernel_size
+    out_c, out_h, out_w = out.size()[1:]
+
+    kernel_add = k_h * k_w - 1
+    kernel_avg = 1
+
+    return (kernel_add + kernel_avg) * (out_h * out_w) * out_c
+
+
+def compute_ReLU_madd(module, inp, out):
+    assert isinstance(module, (nn.ReLU, nn.ReLU6))
+
+    count = 1
+    for i in inp.size()[1:]:
+        count *= i
+    return count
+
+
+def compute_Softmax_madd(module, inp, out):
+    assert isinstance(module, nn.Softmax)
+    assert len(inp.size()) > 1
+
+    count = 1
+    for s in inp.size()[1:]:
+        count *= s
+    exp = count
+    add = count - 1
+    div = count
+    return exp + add + div
+
+
+def compute_Linear_madd(module, inp, out):
+    assert isinstance(module, nn.Linear)
+    assert len(inp.size()) == 2 and len(out.size()) == 2
+
+    num_in_features = inp.size()[1]
+    num_out_features = out.size()[1]
+
+    mul = num_in_features
+    add = num_in_features - 1
+    return num_out_features * (mul + add)
+
+
+def compute_Bilinear_madd(module, inp1, inp2, out):
+    assert isinstance(module, nn.Bilinear)
+    assert len(inp1.size()) == 2 and len(inp2.size()) == 2 and len(
+        out.size()) == 2
+
+    num_in_features_1 = inp1.size()[1]
+    num_in_features_2 = inp2.size()[1]
+    num_out_features = out.size()[1]
+
+    mul = num_in_features_1 * num_in_features_2 + num_in_features_2
+    add = num_in_features_1 * num_in_features_2 + num_in_features_2 - 1
+    return num_out_features * (mul + add)
+
+
+def compute_madd(module, inp, out):
+    if isinstance(module, nn.Conv2d):
+        return compute_Conv2d_madd(module, inp[0], out[0])
+    elif isinstance(module, nn.ConvTranspose2d):
+        return compute_ConvTranspose2d_madd(module, inp[0], out[0])
+    elif isinstance(module, nn.BatchNorm2d):
+        return compute_BatchNorm2d_madd(module, inp[0], out[0])
+    elif isinstance(module, nn.MaxPool2d):
+        return compute_MaxPool2d_madd(module, inp[0], out[0])
+    elif isinstance(module, nn.AvgPool2d):
+        return compute_AvgPool2d_madd(module, inp[0], out[0])
+    elif isinstance(module, (nn.ReLU, nn.ReLU6)):
+        return compute_ReLU_madd(module, inp[0], out[0])
+    elif isinstance(module, nn.Softmax):
+        return compute_Softmax_madd(module, inp[0], out[0])
+    elif isinstance(module, nn.Linear):
+        return compute_Linear_madd(module, inp[0], out[0])
+    elif isinstance(module, nn.Bilinear):
+        return compute_Bilinear_madd(module, inp[0], inp[1], out)
+    else:
+        #print(f"[MAdd]: {type(module).__name__} is not supported!")
+        return 0
diff --git a/tools/torchstat/compute_memory.py b/tools/torchstat/compute_memory.py
new file mode 100644
index 0000000..4371017
--- /dev/null
+++ b/tools/torchstat/compute_memory.py
@@ -0,0 +1,93 @@
+import torch.nn as nn
+import torch
+import numpy as np
+
+
+def compute_memory(module, inp, out):
+    if isinstance(module, (nn.ReLU, nn.ReLU6, nn.ELU, nn.LeakyReLU)):
+        return compute_ReLU_memory(module, inp[0], out[0])
+    elif isinstance(module, nn.PReLU):
+        return compute_PReLU_memory(module, inp[0], out[0])
+    elif isinstance(module, nn.Conv2d):
+        return compute_Conv2d_memory(module, inp[0], out[0])
+    elif isinstance(module, nn.BatchNorm2d):
+        return compute_BatchNorm2d_memory(module, inp[0], out[0])
+    elif isinstance(module, nn.Linear):
+        return compute_Linear_memory(module, inp[0], out[0])
+    elif isinstance(module, (nn.AvgPool2d, nn.MaxPool2d)):
+        return compute_Pool2d_memory(module, inp[0], out[0])
+    else:
+        #print(f"[Memory]: {type(module).__name__} is not supported!")
+        return 0, 0
+    pass
+
+
+def num_params(module):
+    return sum(p.numel() for p in module.parameters() if p.requires_grad)
+
+
+def compute_ReLU_memory(module, inp, out):
+    assert isinstance(module, (nn.ReLU, nn.ReLU6, nn.ELU, nn.LeakyReLU))
+
+    mread = inp.numel()
+    mwrite = out.numel()
+
+    return mread * inp.element_size(), mwrite * out.element_size()
+
+
+def compute_PReLU_memory(module, inp, out):
+    assert isinstance(module, nn.PReLU)
+
+    batch_size = inp.size()[0]
+    mread = batch_size * (inp[0].numel() + num_params(module))
+    mwrite = out.numel()
+
+    return mread * inp.element_size(), mwrite * out.element_size()
+
+
+def compute_Conv2d_memory(module, inp, out):
+    # Can have multiple inputs, getting the first one
+    assert isinstance(module, nn.Conv2d)
+    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
+
+    batch_size = inp.size()[0]
+
+    # This includes weights with bias if the module contains it.
+    mread = batch_size * (inp[0].numel() + num_params(module))
+    mwrite = out.numel()
+
+    return mread * inp.element_size(), mwrite * out.element_size()
+
+
+def compute_BatchNorm2d_memory(module, inp, out):
+    assert isinstance(module, nn.BatchNorm2d)
+    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
+
+    batch_size, in_c, in_h, in_w = inp.size()
+    mread = batch_size * (inp[0].numel() + 2 * in_c)
+    mwrite = out.numel()
+
+    return mread * inp.element_size(), mwrite * out.element_size()
+
+
+def compute_Linear_memory(module, inp, out):
+    assert isinstance(module, nn.Linear)
+    assert len(inp.size()) == 2 and len(out.size()) == 2
+
+    batch_size = inp.size()[0]
+
+    # This includes weights with bias if the module contains it.
+    mread = batch_size * (inp[0].numel() + num_params(module))
+    mwrite = out.numel()
+
+    return mread * inp.element_size(), mwrite * out.element_size()
+
+
+def compute_Pool2d_memory(module, inp, out):
+    assert isinstance(module, (nn.MaxPool2d, nn.AvgPool2d))
+    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
+
+    mread = inp.numel()
+    mwrite = out.numel()
+
+    return mread * inp.element_size(), mwrite * out.element_size()
diff --git a/tools/torchstat/reporter.py b/tools/torchstat/reporter.py
new file mode 100644
index 0000000..dc95dcc
--- /dev/null
+++ b/tools/torchstat/reporter.py
@@ -0,0 +1,90 @@
+import pandas as pd
+
+pd.set_option('display.width', 1000)
+pd.set_option('display.max_rows', 10000)
+pd.set_option('display.max_columns', 10000)
+
+
+def round_value(value, binary=False):
+    divisor = 1024. if binary else 1000.
+
+    if value // divisor**4 > 0:
+        return str(round(value / divisor**4, 2)) + 'T'
+    elif value // divisor**3 > 0:
+        return str(round(value / divisor**3, 2)) + 'G'
+    elif value // divisor**2 > 0:
+        return str(round(value / divisor**2, 2)) + 'M'
+    elif value // divisor > 0:
+        return str(round(value / divisor, 2)) + 'K'
+    return str(value)
+
+
+def report_format(collected_nodes):
+    data = list()
+    for node in collected_nodes:
+        name = node.name
+        input_shape = ' '.join(
+            ['{:>3d}'] *
+            len(node.input_shape)).format(*[e for e in node.input_shape])
+        output_shape = ' '.join(
+            ['{:>3d}'] *
+            len(node.output_shape)).format(*[e for e in node.output_shape])
+        parameter_quantity = node.parameter_quantity
+        inference_memory = node.inference_memory
+        MAdd = node.MAdd
+        Flops = node.Flops
+        mread, mwrite = [i for i in node.Memory]
+        duration = node.duration
+        data.append([
+            name, input_shape, output_shape, parameter_quantity,
+            inference_memory, MAdd, duration, Flops, mread, mwrite
+        ])
+    df = pd.DataFrame(data)
+    df.columns = [
+        'module name', 'input shape', 'output shape', 'params', 'memory(MB)',
+        'MAdd', 'duration', 'Flops', 'MemRead(B)', 'MemWrite(B)'
+    ]
+    df['duration[%]'] = df['duration'] / (df['duration'].sum() + 1e-7)
+    df['MemR+W(B)'] = df['MemRead(B)'] + df['MemWrite(B)']
+    total_parameters_quantity = df['params'].sum()
+    total_memory = df['memory(MB)'].sum()
+    total_operation_quantity = df['MAdd'].sum()
+    total_flops = df['Flops'].sum()
+    total_duration = df['duration[%]'].sum()
+    total_mread = df['MemRead(B)'].sum()
+    total_mwrite = df['MemWrite(B)'].sum()
+    total_memrw = df['MemR+W(B)'].sum()
+    del df['duration']
+
+    # Add Total row
+    total_df = pd.Series([
+        total_parameters_quantity, total_memory, total_operation_quantity,
+        total_flops, total_duration, mread, mwrite, total_memrw
+    ],
+                         index=[
+                             'params', 'memory(MB)', 'MAdd', 'Flops',
+                             'duration[%]', 'MemRead(B)', 'MemWrite(B)',
+                             'MemR+W(B)'
+                         ],
+                         name='total')
+    df = df.append(total_df)
+
+    df = df.fillna(' ')
+    df['memory(MB)'] = df['memory(MB)'].apply(lambda x: '{:.2f}'.format(x))
+    df['duration[%]'] = df['duration[%]'].apply(lambda x: '{:.2%}'.format(x))
+    df['MAdd'] = df['MAdd'].apply(lambda x: '{:,}'.format(x))
+    df['Flops'] = df['Flops'].apply(lambda x: '{:,}'.format(x))
+
+    summary = str(df) + '\n'
+    summary += "=" * len(str(df).split('\n')[0])
+    summary += '\n'
+    summary += "Total params: {:,}\n".format(total_parameters_quantity)
+
+    summary += "-" * len(str(df).split('\n')[0])
+    summary += '\n'
+    summary += "Total memory: {:.2f}MB\n".format(total_memory)
+    summary += "Total MAdd: {}MAdd\n".format(
+        round_value(total_operation_quantity))
+    summary += "Total Flops: {}Flops\n".format(round_value(total_flops))
+    summary += "Total MemR+W: {}B\n".format(round_value(total_memrw, True))
+    return summary
diff --git a/tools/torchstat/stat_tree.py b/tools/torchstat/stat_tree.py
new file mode 100644
index 0000000..12845a1
--- /dev/null
+++ b/tools/torchstat/stat_tree.py
@@ -0,0 +1,194 @@
+import queue
+
+
+class StatTree(object):
+
+    def __init__(self, root_node):
+        assert isinstance(root_node, StatNode)
+
+        self.root_node = root_node
+
+    def get_same_level_max_node_depth(self, query_node):
+        if query_node.name == self.root_node.name:
+            return 0
+        same_level_depth = max(
+            [child.depth for child in query_node.parent.children])
+        return same_level_depth
+
+    def update_stat_nodes_granularity(self):
+        q = queue.Queue()
+        q.put(self.root_node)
+        while not q.empty():
+            node = q.get()
+            node.granularity = self.get_same_level_max_node_depth(node)
+            for child in node.children:
+                q.put(child)
+
+    def get_collected_stat_nodes(self, query_granularity):
+        self.update_stat_nodes_granularity()
+
+        collected_nodes = []
+        stack = list()
+        stack.append(self.root_node)
+        while len(stack) > 0:
+            node = stack.pop()
+            for child in reversed(node.children):
+                stack.append(child)
+            if node.depth == query_granularity:
+                collected_nodes.append(node)
+            if node.depth < query_granularity <= node.granularity:
+                collected_nodes.append(node)
+        return collected_nodes
+
+
+class StatNode(object):
+
+    def __init__(self, name=str(), parent=None):
+        self._name = name
+        self._input_shape = None
+        self._output_shape = None
+        self._parameter_quantity = 0
+        self._inference_memory = 0
+        self._MAdd = 0
+        self._Memory = (0, 0)
+        self._Flops = 0
+        self._duration = 0
+        self._duration_percent = 0
+
+        self._granularity = 1
+        self._depth = 1
+        self.parent = parent
+        self.children = list()
+
+    @property
+    def name(self):
+        return self._name
+
+    @name.setter
+    def name(self, name):
+        self._name = name
+
+    @property
+    def granularity(self):
+        return self._granularity
+
+    @granularity.setter
+    def granularity(self, g):
+        self._granularity = g
+
+    @property
+    def depth(self):
+        d = self._depth
+        if len(self.children) > 0:
+            d += max([child.depth for child in self.children])
+        return d
+
+    @property
+    def input_shape(self):
+        if len(self.children) == 0:  # leaf
+            return self._input_shape
+        else:
+            return self.children[0].input_shape
+
+    @input_shape.setter
+    def input_shape(self, input_shape):
+        assert isinstance(input_shape, (list, tuple))
+        self._input_shape = input_shape
+
+    @property
+    def output_shape(self):
+        if len(self.children) == 0:  # leaf
+            return self._output_shape
+        else:
+            return self.children[-1].output_shape
+
+    @output_shape.setter
+    def output_shape(self, output_shape):
+        assert isinstance(output_shape, (list, tuple))
+        self._output_shape = output_shape
+
+    @property
+    def parameter_quantity(self):
+        # return self.parameters_quantity
+        total_parameter_quantity = self._parameter_quantity
+        for child in self.children:
+            total_parameter_quantity += child.parameter_quantity
+        return total_parameter_quantity
+
+    @parameter_quantity.setter
+    def parameter_quantity(self, parameter_quantity):
+        assert parameter_quantity >= 0
+        self._parameter_quantity = parameter_quantity
+
+    @property
+    def inference_memory(self):
+        total_inference_memory = self._inference_memory
+        for child in self.children:
+            total_inference_memory += child.inference_memory
+        return total_inference_memory
+
+    @inference_memory.setter
+    def inference_memory(self, inference_memory):
+        self._inference_memory = inference_memory
+
+    @property
+    def MAdd(self):
+        total_MAdd = self._MAdd
+        for child in self.children:
+            total_MAdd += child.MAdd
+        return total_MAdd
+
+    @MAdd.setter
+    def MAdd(self, MAdd):
+        self._MAdd = MAdd
+
+    @property
+    def Flops(self):
+        total_Flops = self._Flops
+        for child in self.children:
+            total_Flops += child.Flops
+        return total_Flops
+
+    @Flops.setter
+    def Flops(self, Flops):
+        self._Flops = Flops
+
+    @property
+    def Memory(self):
+        total_Memory = self._Memory
+        for child in self.children:
+            total_Memory[0] += child.Memory[0]
+            total_Memory[1] += child.Memory[1]
+            print(total_Memory)
+        return total_Memory
+
+    @Memory.setter
+    def Memory(self, Memory):
+        assert isinstance(Memory, (list, tuple))
+        self._Memory = Memory
+
+    @property
+    def duration(self):
+        total_duration = self._duration
+        for child in self.children:
+            total_duration += child.duration
+        return total_duration
+
+    @duration.setter
+    def duration(self, duration):
+        self._duration = duration
+
+    def find_child_index(self, child_name):
+        assert isinstance(child_name, str)
+
+        index = -1
+        for i in range(len(self.children)):
+            if child_name == self.children[i].name:
+                index = i
+        return index
+
+    def add_child(self, node):
+        assert isinstance(node, StatNode)
+
+        if self.find_child_index(node.name) == -1:  # not exist
+            self.children.append(node)
diff --git a/tools/torchstat_utils.py b/tools/torchstat_utils.py
new file mode 100644
index 0000000..1105d71
--- /dev/null
+++ b/tools/torchstat_utils.py
@@ -0,0 +1,108 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from .torchstat import analyze
+import pandas as pd
+import copy
+
+
+class LayerStats:
+
+    def __init__(self, node) -> None:
+        self.name = node.name
+        self.input_shape = node.input_shape
+        self.output_shape = node.output_shape
+        self.parameters = node.parameter_quantity
+        self.inference_memory = node.inference_memory
+        self.MAdd = node.MAdd
+        self.Flops = node.Flops
+        self.mread, self.mwrite = node.Memory[0], node.Memory[1]
+        self.duration = node.duration
+
+
+class ModelStats(LayerStats):
+
+    def __init__(self, model, input_shape, clone_model=False) -> None:
+        if clone_model:
+            model = copy.deepcopy(model)
+        collected_nodes = analyze(model, input_shape, 1)
+        self.layer_stats = []
+        for node in collected_nodes:
+            self.layer_stats.append(LayerStats(node))
+
+        self.name = 'Model'
+        self.input_shape = input_shape
+        self.output_shape = self.layer_stats[-1].output_shape
+        self.parameters = sum((l.parameters for l in self.layer_stats))
+        self.inference_memory = sum(
+            (l.inference_memory for l in self.layer_stats))
+        self.MAdd = sum((l.MAdd for l in self.layer_stats))
+        self.Flops = sum((l.Flops for l in self.layer_stats))
+        self.mread = sum((l.mread for l in self.layer_stats))
+        self.mwrite = sum((l.mwrite for l in self.layer_stats))
+        self.duration = sum((l.duration for l in self.layer_stats))
+
+
+def model_stats(model, input_shape):
+    ms = ModelStats(model, input_shape)
+    return model_stats2df(ms)
+
+
+def _round_value(value, binary=False):
+    divisor = 1024. if binary else 1000.
+
+    if value // divisor**4 > 0:
+        return str(round(value / divisor**4, 2)) + 'T'
+    elif value // divisor**3 > 0:
+        return str(round(value / divisor**3, 2)) + 'G'
+    elif value // divisor**2 > 0:
+        return str(round(value / divisor**2, 2)) + 'M'
+    elif value // divisor > 0:
+        return str(round(value / divisor, 2)) + 'K'
+    return str(value)
+
+
+def model_stats2df(model_stats: ModelStats):
+    pd.set_option('display.width', 1000)
+    pd.set_option('display.max_rows', 10000)
+    pd.set_option('display.max_columns', 10000)
+
+    df = pd.DataFrame([l.__dict__ for l in model_stats.layer_stats])
+    total_df = pd.Series(model_stats.__dict__, name='Total')
+    df = df.append(total_df[df.columns], ignore_index=True)
+
+    df = df.fillna(' ')
+    # df['memory(MB)'] = df['memory(MB)'].apply(
+    #     lambda x: '{:.2f}'.format(x))
+    # df['duration[%]'] = df['duration[%]'].apply(lambda x: '{:.2%}'.format(x))
+    for c in [
+            'MAdd', 'Flops', 'parameters', 'inference_memory', 'mread',
+            'mwrite'
+    ]:  
+        if c == 'Flops':
+            df[c] = df[c].apply(lambda x: _round_value(x, True))
+        elif c == 'parameters':
+            df[c] = df[c].apply(lambda x: _round_value(x))
+        else:
+            df[c] = df[c].apply(lambda x: '{:,}'.format(x))
+
+    df.rename(
+        columns={
+            'name': 'module name',
+            'input_shape': 'input shape',
+            'input_shape': 'input shape',
+            'inference_memory': 'infer memory(MB)',
+            'mread': 'MemRead(B)',
+            'mwrite': 'MemWrite(B)'
+        },
+        inplace=True)
+
+    #summary = "Total params: {:,}\n".format(total_parameters_quantity)
+
+    #summary += "-" * len(str(df).split('\n')[0])
+    #summary += '\n'
+    #summary += "Total memory: {:.2f}MB\n".format(total_memory)
+    #summary += "Total MAdd: {}MAdd\n".format(_round_value(total_operation_quantity))
+    #summary += "Total Flops: {}Flops\n".format(_round_value(total_flops))
+    #summary += "Total MemR+W: {}B\n".format(_round_value(total_memrw, True))
+    return df
\ No newline at end of file
diff --git a/tools/train.py b/tools/train.py
new file mode 100644
index 0000000..986ab27
--- /dev/null
+++ b/tools/train.py
@@ -0,0 +1,166 @@
+import argparse
+import copy
+import os
+import os.path as osp
+import time
+
+import mmcv
+import torch
+from mmcv import Config, DictAction
+from mmcv.runner import init_dist, set_random_seed
+from mmcv.utils import get_git_hash
+
+from mmpose import __version__
+from mmpose.apis import train_model
+from mmpose.datasets import build_dataset
+from models import build_posenet
+from mmpose.utils import collect_env, get_root_logger
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train a pose model')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument('--work-dir', help='the dir to save logs and models')
+    parser.add_argument(
+        '--resume-from', help='the checkpoint file to resume from')
+    parser.add_argument(
+        '--no-validate',
+        action='store_true',
+        help='whether not to evaluate the checkpoint during training')
+    group_gpus = parser.add_mutually_exclusive_group()
+    group_gpus.add_argument(
+        '--gpus',
+        type=int,
+        help='number of gpus to use '
+        '(only applicable to non-distributed training)')
+    group_gpus.add_argument(
+        '--gpu-ids',
+        type=int,
+        nargs='+',
+        help='ids of gpus to use '
+        '(only applicable to non-distributed training)')
+    parser.add_argument('--seed', type=int, default=None, help='random seed')
+    parser.add_argument(
+        '--deterministic',
+        action='store_true',
+        help='whether to set deterministic options for CUDNN backend.')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        default={},
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. For example, '
+        "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
+    parser.add_argument(
+        '--autoscale-lr',
+        action='store_true',
+        help='automatically scale lr with the number of gpus')
+    args = parser.parse_args()
+    if 'LOCAL_RANK' not in os.environ:
+        os.environ['LOCAL_RANK'] = str(args.local_rank)
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+
+    # work_dir is determined in this priority: CLI > segment in file > filename
+    if args.work_dir is not None:
+        # update configs according to CLI args if args.work_dir is not None
+        cfg.work_dir = args.work_dir
+    elif cfg.get('work_dir', None) is None:
+        # use config filename as default work_dir if cfg.work_dir is None
+        cfg.work_dir = osp.join('./work_dirs',
+                                osp.splitext(osp.basename(args.config))[0])
+    if args.resume_from is not None:
+        cfg.resume_from = args.resume_from
+    if args.gpu_ids is not None:
+        cfg.gpu_ids = args.gpu_ids
+    else:
+        cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
+
+    if args.autoscale_lr:
+        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
+        cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
+
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == 'none':
+        distributed = False
+    else:
+        distributed = True
+        init_dist(args.launcher, **cfg.dist_params)
+
+    # create work_dir
+    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+    # init the logger before other steps
+    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
+    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
+
+    # init the meta dict to record some important information such as
+    # environment info and seed, which will be logged
+    meta = dict()
+    # log env info
+    env_info_dict = collect_env()
+    env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
+    dash_line = '-' * 60 + '\n'
+    logger.info('Environment info:\n' + dash_line + env_info + '\n' +
+                dash_line)
+    meta['env_info'] = env_info
+
+    # log some basic info
+    logger.info(f'Distributed training: {distributed}')
+    logger.info(f'Config:\n{cfg.pretty_text}')
+
+    # set random seeds
+    if args.seed is not None:
+        logger.info(f'Set random seed to {args.seed}, '
+                    f'deterministic: {args.deterministic}')
+        set_random_seed(args.seed, deterministic=args.deterministic)
+    cfg.seed = args.seed
+    meta['seed'] = args.seed
+
+    model = build_posenet(cfg.model)
+    datasets = [build_dataset(cfg.data.train)]
+
+    if len(cfg.workflow) == 2:
+        val_dataset = copy.deepcopy(cfg.data.val)
+        val_dataset.pipeline = cfg.data.train.pipeline
+        datasets.append(build_dataset(val_dataset))
+
+    if cfg.checkpoint_config is not None:
+        # save mmpose version, config file content
+        # checkpoints as meta data
+        cfg.checkpoint_config.meta = dict(
+            mmpose_version=__version__ + get_git_hash(digits=7),
+            config=cfg.pretty_text,
+        )
+    train_model(
+        model,
+        datasets,
+        cfg,
+        distributed=distributed,
+        validate=(not args.no_validate),
+        timestamp=timestamp,
+        meta=meta)
+
+
+if __name__ == '__main__':
+    main()